6
6
import numpy as np
7
7
import tensorflow as tf
8
8
from q2_initialization import xavier_weight_init
9
+ from q1_softmax import softmax
9
10
import data_utils .utils as du
10
11
import data_utils .ner as ner
11
12
from utils import data_iterator
@@ -22,8 +23,8 @@ class Config(object):
22
23
batch_size = 64
23
24
label_size = 5
24
25
hidden_size = 100
25
- max_epochs = 24
26
- early_stopping = 2
26
+ max_epochs = 1 #24
27
+ early_stopping = 3
27
28
dropout = 0.9
28
29
lr = 0.001
29
30
l2 = 0.001
@@ -84,15 +85,20 @@ def add_placeholders(self):
84
85
type tf.float32
85
86
86
87
Add these placeholders to self as the instance variables
87
-
88
+
88
89
self.input_placeholder
89
90
self.labels_placeholder
90
91
self.dropout_placeholder
91
92
92
93
(Don't change the variable names)
93
94
"""
94
95
### YOUR CODE HERE
95
- raise NotImplementedError
96
+ self .input_placeholder = tf .placeholder (tf .int32 ,(None , self .config .window_size ),name = "input_placeholder" )
97
+ self .labels_placeholder = tf .placeholder (tf .float32 ,(None , \
98
+ self .config .label_size ),name = "labels_placeholder" )
99
+ self .dropout_placeholder = tf .placeholder (tf .float32 ,name = "dropout_placeholder" )
100
+
101
+
96
102
### END YOUR CODE
97
103
98
104
def create_feed_dict (self , input_batch , dropout , label_batch = None ):
@@ -109,15 +115,26 @@ def create_feed_dict(self, input_batch, dropout, label_batch=None):
109
115
Hint: The keys for the feed_dict should be a subset of the placeholder
110
116
tensors created in add_placeholders.
111
117
Hint: When label_batch is None, don't add a labels entry to the feed_dict.
112
-
118
+
113
119
Args:
114
120
input_batch: A batch of input data.
115
121
label_batch: A batch of label data.
116
122
Returns:
117
123
feed_dict: The feed dictionary mapping from placeholders to values.
118
124
"""
119
125
### YOUR CODE HERE
120
- raise NotImplementedError
126
+
127
+ if label_batch is not None :
128
+ feed_dict = {
129
+ self .input_placeholder : input_batch ,
130
+ self .labels_placeholder : label_batch ,
131
+ self .dropout_placeholder : dropout
132
+ }
133
+ else :
134
+ feed_dict = {
135
+ self .input_placeholder : input_batch ,
136
+ self .dropout_placeholder : dropout
137
+ }
121
138
### END YOUR CODE
122
139
return feed_dict
123
140
@@ -148,7 +165,13 @@ def add_embedding(self):
148
165
# The embedding lookup is currently only implemented for the CPU
149
166
with tf .device ('/cpu:0' ):
150
167
### YOUR CODE HERE
151
- raise NotImplementedError
168
+ # init the embeddings to random values between -1 and 1 for our chosen
169
+ # vocabulary (len(self.wv)) and for our embed_size (50) from the configuration
170
+ embeddings = tf .Variable (tf .random_uniform ([len (self .wv ), \
171
+ self .config .embed_size ], - 1 , 1 ))
172
+
173
+ window = tf .reshape (tf .nn .embedding_lookup (embeddings , \
174
+ self .input_placeholder ), (- 1 , self .config .window_size * self .config .embed_size ))
152
175
### END YOUR CODE
153
176
return window
154
177
@@ -180,9 +203,48 @@ def add_model(self, window):
180
203
output: tf.Tensor of shape (batch_size, label_size)
181
204
"""
182
205
### YOUR CODE HERE
183
- raise NotImplementedError
206
+ xavier_initializer = xavier_weight_init ()
207
+
208
+ def weight_init (name , shape ):
209
+ #weight = tf.get_variable(name, shape, tf.float32)
210
+ weight = xavier_initializer (shape )
211
+ return weight
212
+
213
+ def bias_init (name , shape ):
214
+ return tf .get_variable (name , shape , \
215
+ tf .float32 ) #, initializer=tf.random_normal_initializer)
216
+
217
+ with tf .variable_scope ("hidden_layer" ):
218
+
219
+ weights = weight_init ("weights" , \
220
+ (self .config .window_size * self .config .embed_size , self .config .hidden_size ))
221
+ biases = bias_init ("biases" , self .config .hidden_size )
222
+ # using sigmoid?
223
+ # Dropout our weights
224
+ weights = tf .nn .dropout (weights , self .config .dropout , name = "hidden_weights_dropout" )
225
+ hidden_out = tf .add (tf .matmul (window , weights ), biases )
226
+ weight_regularization = tf .get_variable ("weight_regularization" , 1 )
227
+ weight_regularization = self .config .l2 * tf .reduce_sum ((weights ** 2 ))/ 2
228
+ tf .add_to_collection ("total_loss" , weight_regularization )
229
+ print ("Reg 1" )
230
+ print (tf .get_collection ("total_loss" ))
231
+ with tf .variable_scope ("answer_layer" ):
232
+ weights = weight_init ("weights" , \
233
+ (self .config .hidden_size , self .config .label_size ))
234
+ biases = bias_init ("biases" , self .config .label_size )
235
+ output = tf .get_variable ("output" , (self .config .window_size , self .config .label_size ))
236
+ # Dropout our weights
237
+ weights = tf .nn .dropout (weights , self .config .dropout , name = "hidden_weights_dropout" )
238
+ # the softmax is applied later on based on the results of this model
239
+ output = tf .add (tf .matmul (hidden_out , weights ), biases )
240
+ weight_regularization = tf .get_variable ("weight_regularization" , 1 )
241
+ weight_regularization = self .config .l2 * tf .reduce_sum ((weights ** 2 ))/ 2
242
+ tf .add_to_collection ("total_loss" , weight_regularization )
243
+ print ("Reg 2" )
244
+ print (tf .get_collection ("total_loss" ))
245
+
184
246
### END YOUR CODE
185
- return output
247
+ return output
186
248
187
249
def add_loss_op (self , y ):
188
250
"""Adds cross_entropy_loss ops to the computational graph.
@@ -195,7 +257,14 @@ def add_loss_op(self, y):
195
257
loss: A 0-d tensor (scalar)
196
258
"""
197
259
### YOUR CODE HERE
198
- raise NotImplementedError
260
+ # TODO add the two weight regularizations from the model. Use variable
261
+ # scopet to get their values
262
+ loss = tf .reduce_mean (tf .nn .softmax_cross_entropy_with_logits (labels = self .labels_placeholder ,
263
+ logits = y ))
264
+ loss += tf .get_collection ("total_loss" )
265
+ print ("Reg total" )
266
+ print (tf .get_collection ("total_loss" ))
267
+ tf .add_to_collection ("total_loss" , loss )
199
268
### END YOUR CODE
200
269
return loss
201
270
@@ -204,7 +273,7 @@ def add_training_op(self, loss):
204
273
205
274
Creates an optimizer and applies the gradients to all trainable variables.
206
275
The Op returned by this function is what must be passed to the
207
- `sess.run()` call to cause the model to train. See
276
+ `sess.run()` call to cause the model to train. See
208
277
209
278
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
210
279
@@ -219,7 +288,9 @@ def add_training_op(self, loss):
219
288
train_op: The Op for training.
220
289
"""
221
290
### YOUR CODE HERE
222
- raise NotImplementedError
291
+
292
+ adamOptimizer = tf .train .AdamOptimizer (self .config .lr )
293
+ train_op = adamOptimizer .minimize (loss )
223
294
### END YOUR CODE
224
295
return train_op
225
296
@@ -252,6 +323,7 @@ def run_epoch(self, session, input_data, input_labels,
252
323
data_iterator (orig_X , orig_y , batch_size = self .config .batch_size ,
253
324
label_size = self .config .label_size , shuffle = shuffle )):
254
325
feed = self .create_feed_dict (input_batch = x , dropout = dp , label_batch = y )
326
+
255
327
loss , total_correct , _ = session .run (
256
328
[self .loss , self .correct_predictions , self .train_op ],
257
329
feed_dict = feed )
@@ -333,7 +405,7 @@ def test_NER():
333
405
with tf .Graph ().as_default ():
334
406
model = NERModel (config )
335
407
336
- init = tf .initialize_all_variables ()
408
+ init = tf .global_variables_initializer ()
337
409
saver = tf .train .Saver ()
338
410
339
411
with tf .Session () as session :
@@ -346,7 +418,7 @@ def test_NER():
346
418
start = time .time ()
347
419
###
348
420
train_loss , train_acc = model .run_epoch (session , model .X_train ,
349
- model .y_train )
421
+ model .y_train , verbose = 100 )
350
422
val_loss , predictions = model .predict (session , model .X_dev , model .y_dev )
351
423
print 'Training loss: {}' .format (train_loss )
352
424
print 'Training acc: {}' .format (train_acc )
@@ -356,15 +428,16 @@ def test_NER():
356
428
best_val_epoch = epoch
357
429
if not os .path .exists ("./weights" ):
358
430
os .makedirs ("./weights" )
359
-
431
+
360
432
saver .save (session , './weights/ner.weights' )
361
433
if epoch - best_val_epoch > config .early_stopping :
434
+ print ("I am stopping early. {0} - {1} = {2}" .format (epoch , best_val_epoch , epoch - best_val_epoch ))
362
435
break
363
436
###
364
437
confusion = calculate_confusion (config , predictions , model .y_dev )
365
438
print_confusion (confusion , model .num_to_tag )
366
439
print 'Total time: {}' .format (time .time () - start )
367
-
440
+
368
441
saver .restore (session , './weights/ner.weights' )
369
442
print 'Test'
370
443
print '=-=-='
0 commit comments