-
Notifications
You must be signed in to change notification settings - Fork 1
/
myWordcloud.py
71 lines (56 loc) · 2.28 KB
/
myWordcloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# coding=utf-8
import os
try:
from wordcloud import WordCloud
except ImportError:
import platform
if platform.system() == 'Windows':
print('WordCloud will not run, as Microsoft Visual C++ 14.0 is required to build it on Windows..!')
exit(-1)
from supportFuncs import stopWords
import csv
def show_wordcloud(stop_words, data, title=None, dynamic_datasets_path=''):
print('Creating WordCloud "' + title + '" image...')
image_location = os.path.join(dynamic_datasets_path, 'Resources', 'images', title + '.png')
WordCloud(
background_color='black',
stopwords=stop_words,
max_words=200,
max_font_size=40,
scale=5,
random_state=1
).generate(str(data)).to_file(image_location)
def my_wordcloud(stop_words, dynamic_datasets_path):
print('Running myWordcloud...\n')
# print 'StopWords ', stop_words
businessStr = ''
politicsStr = ''
footballStr = ''
filmStr = ''
technologyStr = ''
location_train = os.path.join(dynamic_datasets_path, 'Resources', 'datasets', 'train_set.csv')
with open(location_train, mode='r', encoding="utf8") as csvfile:
csvReader = csv.DictReader(csvfile, delimiter='\t', quotechar='|')
for row in csvReader:
category = row["Category"]
if category == 'Business':
businessStr += row["Content"]
elif category == 'Politics':
politicsStr += row["Content"]
elif category == 'Football':
footballStr += row["Content"]
elif category == 'Film':
filmStr += row["Content"]
elif category == 'Technology':
technologyStr += row["Content"]
show_wordcloud(stop_words, businessStr, 'Business', dynamic_datasets_path)
show_wordcloud(stop_words, politicsStr, 'Politics', dynamic_datasets_path)
show_wordcloud(stop_words, footballStr, 'Football', dynamic_datasets_path)
show_wordcloud(stop_words, filmStr, 'Film', dynamic_datasets_path)
show_wordcloud(stop_words, technologyStr, 'Technology', dynamic_datasets_path)
print('myWordcloud finished!\n')
# Run myWordcloud directly:
if __name__ == '__main__':
dynamic_datasets_path = ''
my_wordcloud(stopWords.get_stop_words(), dynamic_datasets_path)
exit()