-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.py
More file actions
38 lines (29 loc) · 1.04 KB
/
analysis.py
File metadata and controls
38 lines (29 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- encoding:utf-8 -*-
import csv
import sys
try:
reload(sys)
sys.setdefaultencoding('utf-8')
except:
pass
from textrank4zh import TextRank4Keyword, TextRank4Sentence
tr4w = TextRank4Keyword()
with open(f'./downloads/result/chatgpt_hot.csv', 'r') as csvfile:
reader = csv.DictReader(csvfile)
column_values = [row['content'] for row in reader]
content = '\n'.join(column_values)
# py2中text必须是utf8编码的str或者unicode对象,py3中必须是utf8编码的bytes或者str对象
tr4w.analyze(text=content, lower=True, window=2)
print('关键词:')
for item in tr4w.get_keywords(50, word_min_len=2):
print(item.word, item.weight)
print()
print('关键短语:')
for phrase in tr4w.get_keyphrases(keywords_num=20, min_occur_num=2):
print(phrase)
# tr4s = TextRank4Sentence()
# tr4s.analyze(text=text, lower=True, source='all_filters')
# print()
# print('摘要:')
# for item in tr4s.get_key_sentences(num=3):
# print(item.index, item.weight, item.sentence) # index是语句在文本中位置,weight是权重