forked from ddhira123/Stop-Words-List
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
38 lines (33 loc) · 1.05 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os
from alphabet_detector import AlphabetDetector
def repair_double_and_rearrange(file, filename):
words = file.readlines()
words = [i.strip() for i in words]
words = list(dict.fromkeys(words))
words.sort()
file1 = open("./list/" + filename, "w", encoding='utf-8')
for word in words:
file1.write(word+"\n")
file1.close()
def repair_case(file, filename):
lists = file.readlines()
file1 = open("./list/" + filename, "w", encoding='utf-8')
ad = AlphabetDetector()
words = []
for word in lists:
if ad.is_latin(word) and word.islower() == False:
word = word.lower()
words.append(word)
for word in words:
file1.write(word)
return True
def runner():
for filename in os.listdir('list'):
file = open("./list/" + filename, encoding='utf-8')
repair_case(file, filename)
file.close()
file = open("./list/" + filename, encoding='utf-8')
repair_double_and_rearrange(file, filename)
file.close()
return "PASS"
runner()