-
Notifications
You must be signed in to change notification settings - Fork 0
/
day2-2.py
93 lines (70 loc) · 3.94 KB
/
day2-2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
import gzip
import pickle
import cntk as C
from cntk.layers import Dense, Dropout, BatchNormalization
from os.path import expanduser, join
def load_data(filepath: str):
f = gzip.open(filepath, 'rb')
(x_train, y_train), (x_test, y_test), __ = pickle.load(f, encoding='latin-1')
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train.reshape((len(x_train), 28, 28))
x_test = x_test.reshape((len(x_test), 28, 28))
y_train = ohe_labels(y_train, 10)
y_test = ohe_labels(y_test, 10)
print(f"Shape of train data {x_train.shape} and truth {y_train.shape}")
print(f"Shape of train data {x_test.shape} and truth {y_test.shape}")
return x_train, x_test, y_train, y_test
def ohe_labels(a, nb_class):
tmp = np.zeros((a.shape[0], nb_class))
tmp[np.arange(a.shape[0]), a] = 1
return tmp.astype(np.float32)
home = expanduser("~")
dataset_file_path = join(home, "OneDrive/Work/SAFORO Internal Training/Deep learning with CNTK/datasets/mnist/mnist.pkl.gz")
x_train, x_test, y_train, y_test = load_data(dataset_file_path)
total_number_of_samples = x_train.shape[0]
mini_batch_size = 16
total_train_batches = int(total_number_of_samples / mini_batch_size)
total_nb_test_samples = int(x_test.shape[0] / 10) # divided by 10 to reduce test dataset
total_test_batches = int(total_nb_test_samples / mini_batch_size)
# ==================================================
# ---------------- MODEL BUIlDING ------------------
# ==================================================
# Only CNTK Computation Nodes to be used from here on
image_tensor = C.input_variable((28, 28), name="image")
ground_truth_tensor = C.input_variable(10, name="ground_truth")
# Introducing dropout layer encourages the model not to learn spurious relationships i.e. not overfit
dropped_image = Dropout(0.3)(image_tensor)
hidden_layer_output = Dense(shape=(16, ), activation=C.tanh)(dropped_image)
# activation is None because loss function already has softmax!!
batch_normed_hidden = BatchNormalization()(hidden_layer_output)
dropped_hidden = Dropout(0.1)(batch_normed_hidden)
output_tensor = Dense(shape=(10, ), activation=None)(dropped_hidden)
loss = C.cross_entropy_with_softmax(output_tensor, ground_truth_tensor) # Used to learn parameters
metric = C.classification_error(output_tensor, ground_truth_tensor) # Not used to learn parameters
# Always a good idea to add some regularisation to your optimiser
lr = [0.001] * 5 + [0.0001] * 10 + [0.00005]
lr_schedule = C.learning_parameter_schedule(lr, minibatch_size=mini_batch_size, epoch_size=100)
adam = C.adam(output_tensor.parameters, lr_schedule, 0.912, l2_regularization_weight=0.001) # optimisation scheme
pp = C.logging.ProgressPrinter(freq=0, log_to_file="mnist_dense_log.txt")
trainer = C.Trainer(output_tensor, (loss, metric), [adam], progress_writers=pp)
# ==================================================
# ==================================================
# epoch is frequently defined as one entire sweep of the training set
nb_epoches = 5
for e in range(nb_epoches):
for mb_idx in range(total_train_batches):
lbound, ubound = mb_idx * mini_batch_size, (mb_idx + 1) * mini_batch_size
data_x = x_train[lbound: ubound, ...] # first dimension is the batch axis
data_y = y_train[lbound: ubound, ...]
trainer.train_minibatch({image_tensor: data_x,
ground_truth_tensor: data_y})
# print(f"loss: {trainer.previous_minibatch_loss_average:.4f}, metric: {trainer.previous_minibatch_evaluation_average}")
trainer.summarize_training_progress()
for test_idx in range(total_test_batches):
lbound, ubound = test_idx * mini_batch_size, (test_idx + 1) * mini_batch_size
trainer.test_minibatch({image_tensor: x_test[lbound:ubound, ...],
ground_truth_tensor: y_test[lbound:ubound, ...]})
trainer.summarize_test_progress()
output_tensor.save("mnist_dense.model")