Skip to content

Commit 33582a7

Browse files
Merge pull request dmitrinesterenko#4 from dmitrinesterenko/assignment/2-NER-in-tf
Assignment/2 ner in tf
2 parents 38f0c73 + ffbc970 commit 33582a7

File tree

9 files changed

+46681
-21
lines changed

9 files changed

+46681
-21
lines changed

assignment2/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#TODO
2+
3+
- [] Add regularization
4+
- [] Add dropout
5+
6+
# Running time comparisons with AWS p2 instance
7+
## 1 Epoch
8+
Training loss: 0.402585595846595846
9+
Training acc: 0.874025763551
10+
Validation loss: 0.291980147362
11+
12+
[[41939 151 105 219 345]
13+
[ 403 1439 45 132 75]
14+
[ 485 53 612 70 48]
15+
[ 886 153 57 843 153]
16+
[ 948 81 18 147 1955]]
17+
Tag: O - P 0.9391 / R 0.9808
18+
Tag: LOC - P 0.7666 / R 0.6872
19+
Tag: MISC - P 0.7312 / R 0.4826
20+
Tag: ORG - P 0.5974 / R 0.4030
21+
Tag: PER - P 0.7589 / R 0.6208
22+
Total time: 105.241132975
23+
Test
24+
=-=-=
25+
26+
## Another restart gives a better total time
27+
28+
[[42050 129 99 228 253]
29+
[ 214 1683 23 120 54]
30+
[ 244 35 905 56 28]
31+
[ 586 100 40 1269 97]
32+
[ 513 39 11 99 2487]]
33+
Tag: O - P 0.9643 / R 0.9834
34+
Tag: LOC - P 0.8474 / R 0.8037
35+
Tag: MISC - P 0.8395 / R 0.7137
36+
Tag: ORG - P 0.7161 / R 0.6066
37+
Tag: PER - P 0.8520 / R 0.7898
38+
Total time: 85.1370520592
39+
Epoch 5
40+
Training loss: 0.0791734457016457016
41+
Training acc: 0.974315026446
42+
Validation loss: 0.22555449605
43+
44+
## A test run with dropout and regularization
45+
Epoch 0
46+
Training loss: 0.446270912886912886
47+
Training acc: 0.867474376415
48+
Validation loss: 0.322937309742
49+
50+
[[41772 167 226 276 318]
51+
[ 311 1389 56 197 141]
52+
[ 366 46 737 83 36]
53+
[ 853 114 76 849 200]
54+
[ 957 80 27 211 1874]]
55+
Tag: O - P 0.9438 / R 0.9769
56+
Tag: LOC - P 0.7734 / R 0.6633
57+
Tag: MISC - P 0.6569 / R 0.5812
58+
Tag: ORG - P 0.5254 / R 0.4058
59+
Tag: PER - P 0.7295 / R 0.5951
60+
Total time: 152.228859901
61+
Test
62+
=-=-=
63+

assignment2/q2_NER.py

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
import tensorflow as tf
88
from q2_initialization import xavier_weight_init
9+
from q1_softmax import softmax
910
import data_utils.utils as du
1011
import data_utils.ner as ner
1112
from utils import data_iterator
@@ -22,8 +23,8 @@ class Config(object):
2223
batch_size = 64
2324
label_size = 5
2425
hidden_size = 100
25-
max_epochs = 24
26-
early_stopping = 2
26+
max_epochs = 1 #24
27+
early_stopping = 3
2728
dropout = 0.9
2829
lr = 0.001
2930
l2 = 0.001
@@ -84,15 +85,20 @@ def add_placeholders(self):
8485
type tf.float32
8586
8687
Add these placeholders to self as the instance variables
87-
88+
8889
self.input_placeholder
8990
self.labels_placeholder
9091
self.dropout_placeholder
9192
9293
(Don't change the variable names)
9394
"""
9495
### YOUR CODE HERE
95-
raise NotImplementedError
96+
self.input_placeholder = tf.placeholder(tf.int32,(None, self.config.window_size),name="input_placeholder")
97+
self.labels_placeholder = tf.placeholder(tf.float32,(None, \
98+
self.config.label_size),name="labels_placeholder")
99+
self.dropout_placeholder = tf.placeholder(tf.float32,name="dropout_placeholder")
100+
101+
96102
### END YOUR CODE
97103

98104
def create_feed_dict(self, input_batch, dropout, label_batch=None):
@@ -109,15 +115,26 @@ def create_feed_dict(self, input_batch, dropout, label_batch=None):
109115
Hint: The keys for the feed_dict should be a subset of the placeholder
110116
tensors created in add_placeholders.
111117
Hint: When label_batch is None, don't add a labels entry to the feed_dict.
112-
118+
113119
Args:
114120
input_batch: A batch of input data.
115121
label_batch: A batch of label data.
116122
Returns:
117123
feed_dict: The feed dictionary mapping from placeholders to values.
118124
"""
119125
### YOUR CODE HERE
120-
raise NotImplementedError
126+
127+
if label_batch is not None:
128+
feed_dict = {
129+
self.input_placeholder: input_batch,
130+
self.labels_placeholder: label_batch,
131+
self.dropout_placeholder: dropout
132+
}
133+
else:
134+
feed_dict = {
135+
self.input_placeholder: input_batch,
136+
self.dropout_placeholder: dropout
137+
}
121138
### END YOUR CODE
122139
return feed_dict
123140

@@ -148,7 +165,13 @@ def add_embedding(self):
148165
# The embedding lookup is currently only implemented for the CPU
149166
with tf.device('/cpu:0'):
150167
### YOUR CODE HERE
151-
raise NotImplementedError
168+
# init the embeddings to random values between -1 and 1 for our chosen
169+
# vocabulary (len(self.wv)) and for our embed_size (50) from the configuration
170+
embeddings = tf.Variable(tf.random_uniform([len(self.wv), \
171+
self.config.embed_size], -1, 1))
172+
173+
window = tf.reshape(tf.nn.embedding_lookup(embeddings, \
174+
self.input_placeholder), (-1, self.config.window_size * self.config.embed_size))
152175
### END YOUR CODE
153176
return window
154177

@@ -180,9 +203,48 @@ def add_model(self, window):
180203
output: tf.Tensor of shape (batch_size, label_size)
181204
"""
182205
### YOUR CODE HERE
183-
raise NotImplementedError
206+
xavier_initializer = xavier_weight_init()
207+
208+
def weight_init(name, shape):
209+
#weight = tf.get_variable(name, shape, tf.float32)
210+
weight = xavier_initializer(shape)
211+
return weight
212+
213+
def bias_init(name, shape):
214+
return tf.get_variable(name, shape, \
215+
tf.float32) #, initializer=tf.random_normal_initializer)
216+
217+
with tf.variable_scope("hidden_layer"):
218+
219+
weights = weight_init("weights", \
220+
(self.config.window_size * self.config.embed_size, self.config.hidden_size))
221+
biases = bias_init("biases", self.config.hidden_size)
222+
# using sigmoid?
223+
# Dropout our weights
224+
weights = tf.nn.dropout(weights, self.config.dropout, name="hidden_weights_dropout")
225+
hidden_out = tf.add(tf.matmul(window, weights), biases)
226+
weight_regularization = tf.get_variable("weight_regularization", 1)
227+
weight_regularization = self.config.l2 * tf.reduce_sum((weights**2))/2
228+
tf.add_to_collection("total_loss", weight_regularization)
229+
print("Reg 1")
230+
print(tf.get_collection("total_loss"))
231+
with tf.variable_scope("answer_layer"):
232+
weights = weight_init("weights", \
233+
(self.config.hidden_size, self.config.label_size))
234+
biases = bias_init("biases", self.config.label_size)
235+
output = tf.get_variable("output", (self.config.window_size, self.config.label_size))
236+
# Dropout our weights
237+
weights = tf.nn.dropout(weights, self.config.dropout, name="hidden_weights_dropout")
238+
# the softmax is applied later on based on the results of this model
239+
output = tf.add(tf.matmul(hidden_out, weights), biases)
240+
weight_regularization = tf.get_variable("weight_regularization", 1)
241+
weight_regularization = self.config.l2 * tf.reduce_sum((weights**2))/2
242+
tf.add_to_collection("total_loss", weight_regularization)
243+
print("Reg 2")
244+
print(tf.get_collection("total_loss"))
245+
184246
### END YOUR CODE
185-
return output
247+
return output
186248

187249
def add_loss_op(self, y):
188250
"""Adds cross_entropy_loss ops to the computational graph.
@@ -195,7 +257,14 @@ def add_loss_op(self, y):
195257
loss: A 0-d tensor (scalar)
196258
"""
197259
### YOUR CODE HERE
198-
raise NotImplementedError
260+
# TODO add the two weight regularizations from the model. Use variable
261+
# scopet to get their values
262+
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.labels_placeholder,
263+
logits=y))
264+
loss += tf.get_collection("total_loss")
265+
print("Reg total")
266+
print(tf.get_collection("total_loss"))
267+
tf.add_to_collection("total_loss", loss)
199268
### END YOUR CODE
200269
return loss
201270

@@ -204,7 +273,7 @@ def add_training_op(self, loss):
204273
205274
Creates an optimizer and applies the gradients to all trainable variables.
206275
The Op returned by this function is what must be passed to the
207-
`sess.run()` call to cause the model to train. See
276+
`sess.run()` call to cause the model to train. See
208277
209278
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
210279
@@ -219,7 +288,9 @@ def add_training_op(self, loss):
219288
train_op: The Op for training.
220289
"""
221290
### YOUR CODE HERE
222-
raise NotImplementedError
291+
292+
adamOptimizer = tf.train.AdamOptimizer(self.config.lr)
293+
train_op = adamOptimizer.minimize(loss)
223294
### END YOUR CODE
224295
return train_op
225296

@@ -252,6 +323,7 @@ def run_epoch(self, session, input_data, input_labels,
252323
data_iterator(orig_X, orig_y, batch_size=self.config.batch_size,
253324
label_size=self.config.label_size, shuffle=shuffle)):
254325
feed = self.create_feed_dict(input_batch=x, dropout=dp, label_batch=y)
326+
255327
loss, total_correct, _ = session.run(
256328
[self.loss, self.correct_predictions, self.train_op],
257329
feed_dict=feed)
@@ -333,7 +405,7 @@ def test_NER():
333405
with tf.Graph().as_default():
334406
model = NERModel(config)
335407

336-
init = tf.initialize_all_variables()
408+
init = tf.global_variables_initializer()
337409
saver = tf.train.Saver()
338410

339411
with tf.Session() as session:
@@ -346,7 +418,7 @@ def test_NER():
346418
start = time.time()
347419
###
348420
train_loss, train_acc = model.run_epoch(session, model.X_train,
349-
model.y_train)
421+
model.y_train, verbose=100)
350422
val_loss, predictions = model.predict(session, model.X_dev, model.y_dev)
351423
print 'Training loss: {}'.format(train_loss)
352424
print 'Training acc: {}'.format(train_acc)
@@ -356,15 +428,16 @@ def test_NER():
356428
best_val_epoch = epoch
357429
if not os.path.exists("./weights"):
358430
os.makedirs("./weights")
359-
431+
360432
saver.save(session, './weights/ner.weights')
361433
if epoch - best_val_epoch > config.early_stopping:
434+
print("I am stopping early. {0} - {1} = {2}".format(epoch, best_val_epoch, epoch - best_val_epoch))
362435
break
363436
###
364437
confusion = calculate_confusion(config, predictions, model.y_dev)
365438
print_confusion(confusion, model.num_to_tag)
366439
print 'Total time: {}'.format(time.time() - start)
367-
440+
368441
saver.restore(session, './weights/ner.weights')
369442
print 'Test'
370443
print '=-=-='

assignment2/q2_initialization.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ def _xavier_initializer(shape, **kwargs):
2424
out: tf.Tensor of specified shape sampled from Xavier distribution.
2525
"""
2626
### YOUR CODE HERE
27-
raise NotImplementedError
27+
#import pdb; pdb.set_trace()
28+
eta = tf.sqrt(6.0) / tf.sqrt(tf.to_float(sum(shape)))
29+
with tf.variable_scope(''.join(map(str, shape))):
30+
out = tf.get_variable("xavier_weights", shape, \
31+
initializer=tf.random_uniform_initializer(-eta, eta))
32+
tf.get_variable_scope().reuse_variables()
2833
### END YOUR CODE
2934
return out
3035
# Returns defined initializer function.
@@ -46,16 +51,32 @@ def test_initialization_basic():
4651
print "Basic (non-exhaustive) Xavier initialization tests pass\n"
4752

4853
def test_initialization():
49-
"""
54+
"""
5055
Use this space to test your Xavier initialization code by running:
51-
python q1_initialization.py
56+
python q1_initialization.py
5257
This function will not be called by the autograder, nor will
5358
your tests be graded.
5459
"""
5560
print "Running your tests..."
5661
### YOUR CODE HERE
57-
raise NotImplementedError
58-
### END YOUR CODE
62+
with tf.variable_scope("init_tests"):
63+
xavier_initializer = xavier_weight_init()
64+
shape = (1, 2, 3)
65+
xavier_mat = xavier_initializer(shape)
66+
print(xavier_mat)
67+
shape = (10000, 20, 30)
68+
large_mat = xavier_initializer(shape)
69+
init = tf.initialize_all_variables()
70+
sess = tf.Session()
71+
sess.run(init)
72+
print(sess.run(xavier_mat))
73+
print(sess.run(large_mat))
74+
75+
76+
77+
78+
### END YOUR CODE
5979

6080
if __name__ == "__main__":
6181
test_initialization_basic()
82+
test_initialization()

0 commit comments

Comments
 (0)