forked from Shreeshrii/tess5train-fonts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount_chars.py
executable file
·45 lines (38 loc) · 1.05 KB
/
count_chars.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Source: https://github.com/cmroughan/kraken_generated-data/blob/master/tools/count_chars.py
import unicodedata
import sys, getopt
def main(argv):
txt_file = ''
try:
opts, args = getopt.getopt(argv,"h")
except getopt.GetoptError:
print('USAGE: count_chars.py <txt_file>')
sys.exit(2)
for opt, arg in opts:
if opt in ('-h','--help'):
print('USAGE: count_chars.py <txt_file>')
sys.exit()
for arg in args:
txt_file = arg
inFile = open(txt_file)
rawText = inFile.read()
inFile.close()
chars = {}
for char in rawText:
if char not in chars:
chars[char] = 1
else:
chars[char] +=1
keys = list(chars.keys())
keys.sort()
#print('Count\tCharacter\n-----\t---------')
for char in keys:
try:
print(chars[char], '\t', char,
unicodedata.name(char))
except:
pass
if __name__ == "__main__":
main(sys.argv[1:])