-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
39 lines (37 loc) · 1.54 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import io
import numpy as np
from config import *
import tensorflow as tf
import keras
from keras.models import Sequential, Model
from keras.layers import Embedding, Bidirectional, LSTM, Dense, TimeDistributed
from keras.optimizers import Adam
import crf
from crf import CRF
def create_model(vocab_size, max_length, embedding_dim, word_index, tag_index):
embeddings_index = {}
with io.open('glove.6B.100d.txt', 'r', encoding='utf-8') as f:
for line in f:
values = line.strip().split()
curr_word = values[0]
coefs = np.asarray(values[1:], dtype='float64')
embeddings_index[curr_word] = coefs
embeddings_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in word_index.items():
if i > vocab_size:
continue
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embeddings_matrix[i] = embedding_vector
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim,
input_length=max_length, weights=[embeddings_matrix], mask_zero=True))
model.add(Bidirectional(LSTM(units=embedding_dim, return_sequences=True,
recurrent_dropout=0.01)))
model.add(TimeDistributed(Dense(len(tag_index))))
# model.add(Activation('relu'))
crf = CRF(len(tag_index), sparse_target=True)
model.add(crf)
model.compile(optimizer='adam', loss=crf.loss, metrics=[crf.accuracy])
model.summary()
return model