-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathword2vec-api.py
executable file
·127 lines (108 loc) · 4.59 KB
/
word2vec-api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
'''
Simple web service wrapping a Word2Vec as implemented in Gensim
Example call: curl http://127.0.0.1:5000/wor2vec/n_similarity/ws1=Sushi&ws1=Shop&ws2=Japanese&ws2=Restaurant
@TODO: Add more methods
@TODO: Add command line parameter: path to the trained model
@TODO: Add command line parameters: host and port
'''
from flask import Flask, request, jsonify
from flask.ext.restful import Resource, Api, reqparse
from gensim.models.word2vec import Word2Vec as w
from gensim import utils, matutils
from numpy import exp, dot, zeros, outer, random, dtype, get_include, float32 as REAL,\
uint32, seterr, array, uint8, vstack, argsort, fromstring, sqrt, newaxis, ndarray, empty, sum as np_sum
import cPickle
import argparse
import base64
import sys
parser = reqparse.RequestParser()
def filter_words(words):
if words is None:
return
return [word for word in words if word in model.vocab]
class N_Similarity(Resource):
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('ws1', type=str, required=True, help="Word set 1 cannot be blank!", action='append')
parser.add_argument('ws2', type=str, required=True, help="Word set 2 cannot be blank!", action='append')
args = parser.parse_args()
return model.n_similarity(filter_words(args['ws1']),filter_words(args['ws2']))
class Similarity(Resource):
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('w1', type=str, required=True, help="Word 1 cannot be blank!")
parser.add_argument('w2', type=str, required=True, help="Word 2 cannot be blank!")
args = parser.parse_args()
return model.similarity(args['w1'], args['w2'])
class MostSimilar(Resource):
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('positive', type=str, required=False, help="Positive words.", action='append')
parser.add_argument('negative', type=str, required=False, help="Negative words.", action='append')
parser.add_argument('topn', type=int, required=False, help="Number of results.")
args = parser.parse_args()
pos = filter_words(args.get('positive', []))
neg = filter_words(args.get('negative', []))
t = args.get('topn', 10)
pos = [] if pos == None else pos
neg = [] if neg == None else neg
t = 10 if t == None else t
print "positive: " + str(pos) + " negative: " + str(neg) + " topn: " + str(t)
try:
res = model.most_similar_cosmul(positive=pos,negative=neg,topn=t)
return res
except Exception, e:
print e
print res
class Model(Resource):
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('word', type=str, required=True, help="word to query.")
args = parser.parse_args()
try:
res = model[args['word']]
res = base64.b64encode(res)
return res
except Exception, e:
print e
return
class ModelWordSet(Resource):
def get(self):
try:
res = base64.b64encode(cPickle.dumps(set(model.index2word)))
return res
except Exception, e:
print e
return
app = Flask(__name__)
api = Api(app)
@app.errorhandler(404)
def pageNotFound(error):
return "page not found"
@app.errorhandler(500)
def raiseError(error):
return error
if __name__ == '__main__':
global model
#----------- Parsing Arguments ---------------
p = argparse.ArgumentParser()
p.add_argument("--model", help="Path to the trained model")
p.add_argument("--binary", help="Specifies the loaded model is binary")
p.add_argument("--host", help="Host name (default: localhost)")
p.add_argument("--port", help="Port (default: 5000)")
p.add_argument("--path", help="Path (default: /word2vec)")
args = p.parse_args()
model_path = args.model if args.model else "/home/fox/xavier_corpus/word2vec/sgns-50-tra.model"
binary = True if args.binary else False
host = args.host if args.host else "localhost"
path = args.path if args.path else "/word2vec"
port = int(args.port) if args.port else 5000
if not args.model:
print "Usage: word2vec-api.py --model path/to/the/model [--host host --port 1234]"
model = w.load(model_path)
api.add_resource(N_Similarity, path+'/n_similarity')
api.add_resource(Similarity, path+'/similarity')
api.add_resource(MostSimilar, path+'/most_similar')
api.add_resource(Model, path+'/model')
api.add_resource(ModelWordSet, '/word2vec/model_word_set')
app.run(host=host, port=port)