-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNER1_with_table.py
105 lines (82 loc) · 2.86 KB
/
NER1_with_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 17 10:15:59 2018
@author: mayur
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-...
"""
Created on Thu Jul 12 11:24:59 2018
@author: mayur
"""
from SPARQLWrapper import SPARQLWrapper, JSON
import requests
import urllib.parse
from nltk.corpus import stopwords
import pandas as pd
entities = pd.DataFrame({'Entities' : ['Count'] , 'person' : [0],'place' : [0],
'animal' : [0],'city' : [0], 'country' : [0],
'organisation' : [0],'Food' : [0]})
## initial constants
BASE_URL = 'http://api.dbpedia-spotlight.org/en/annotate?text={text}&confidence={confidence}&support={support}'
Text =
CONFIDENCE = '0.2'
SUPPORT = '10'
Text =
Text = Text.split()
Text1 = [word for word in Text if word not in stopwords.words('english')]
TEXT = ' '.join(Text1)
REQUEST = BASE_URL.format(
text=urllib.parse.quote_plus(TEXT),
confidence=CONFIDENCE,
support=SUPPORT
)
HEADERS = {'Accept': 'application/json'}
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
all_urls = []
r = requests.get(url = REQUEST , headers=HEADERS)
response = r.json()
resources = response['Resources']
for res in resources:
all_urls.append(res['@URI'])
x = list()
for i in range(len(all_urls)):
#i=0
values = '(<{0}>)'.format(all_urls[i])
sparql.setQuery(
"""PREFIX vrank:<http://purl.org/voc/vrank#>
SELECT DISTINCT ?l ?rank
FROM <http://dbpedia.org>
FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank>
WHERE {
VALUES (?s) {""" + values +
""" }
?s rdf:type ?p .
?p rdfs:label ?l.
FILTER (lang(?l) = 'en')
} limit 6
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
x.append([])
for result in results["results"]["bindings"]:
x[i].append( result['l']['value'])
item = list()
for res in resources:
item.append(res['@surfaceForm'])
mainlist = {}
j = 0
for i in item:
mainlist[i] = x[j]
j = j +1
for i in mainlist:
print(i,':', mainlist[i][:])
print ('\n')
#entities.ProperNoun = item
m=0
for i in mainlist:
print(i)
for j in range(1,len(entities.columns)):
if (entities.columns[j] in mainlist[i][:]):
entities.loc[m,entities.columns[j]]= entities.loc[m,entities.columns[j]] +1