-
Notifications
You must be signed in to change notification settings - Fork 0
/
day2-4-2.py
82 lines (61 loc) · 3.37 KB
/
day2-4-2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
import gzip
import pickle
import cntk as C
from cntk.layers import Dense, Dropout, BatchNormalization
from os.path import expanduser, join
import matplotlib.pyplot as plt
import pandas as pd
def load_creditcardfraud(data_filepath):
df = pd.read_csv(data_filepath)
df = df.drop(['Time'], axis=1)
df_normal = df[df['Class'] == 0]
df_abnormal = df[df['Class'] == 1]
data_df_normal = df_normal.drop(['Class'], axis=1)
data_df_abnormal = df_abnormal.drop(['Class'], axis=1)
normal_x = np.ascontiguousarray(data_df_normal.values, dtype=np.float32)
abnormal_x = np.ascontiguousarray(data_df_abnormal.values, dtype=np.float32)
print(f"There are {normal_x.shape[0]} normal and {abnormal_x.shape[0]} abnormal samples")
return normal_x, abnormal_x
home = expanduser("~")
dataset_file_path = join(home, "OneDrive/Work/SAFORO Internal Training/Deep learning with CNTK/datasets/creditcard.csv")
normal_x, abnormal_x = load_creditcardfraud(dataset_file_path)
total_number_of_samples = normal_x.shape[0]
mini_batch_size = 8
total_train_batches = int(total_number_of_samples / mini_batch_size)
# ==================================================
# ---------------- MODEL BUIlDING ------------------
# ==================================================
# Only CNTK Computation Nodes to be used from here on
input_tensor = C.input_variable(29, name="input_tensor")
# Introducing dropout layer encourages the model not to learn spurious relationships i.e. not overfit
encode1 = Dense(shape=(20, ), activation=C.relu)(input_tensor)
encode2 = Dense(shape=(15, ), activation=C.relu)(encode1)
encode3 = Dense(shape=(8, ), activation=C.relu)(encode2)
# activation is None because loss function already has softmax!!
decode1 = Dense(shape=(15,), activation=C.relu)(encode3)
decode2 = Dense(shape=(20,), activation=C.relu)(decode1)
decoded_tensor = Dense(shape=(29,), activation=None, name='reconstructed')(decode2)
anomaly_scorer = C.squared_error(decoded_tensor, input_tensor, name="anomaly_score")
loss = C.squared_error(decoded_tensor, input_tensor) # Used to learn parameters
# Always a good idea to add some regularisation to your optimiser
lr = [0.001] * 5 + [0.0001] * 10 + [0.00005]
lr_schedule = C.learning_parameter_schedule(0.001, minibatch_size=mini_batch_size, epoch_size=100)
adam = C.adam(decoded_tensor.parameters, lr_schedule, 0.912, l2_regularization_weight=0.001) # optimisation scheme
pp = C.logging.ProgressPrinter(freq=10, log_to_file="autoencoder_creditcard_log.txt")
trainer = C.Trainer(decoded_tensor, (loss, ), [adam], progress_writers=pp)
# ==================================================
# ==================================================
# epoch is frequently defined as one entire sweep of the training set
nb_epoches = 5
for e in range(nb_epoches):
for mb_idx in range(total_train_batches):
lbound, ubound = mb_idx * mini_batch_size, (mb_idx + 1) * mini_batch_size
data_x = normal_x[lbound: ubound, ...] # first dimension is the batch axis
trainer.train_minibatch({input_tensor: data_x})
# print(f"loss: {trainer.previous_minibatch_loss_average:.4f}")
trainer.summarize_training_progress()
anomaly_scorer.save("autoencoder_creditcard.model")
# Evaluation of test case
anomaly_scores = anomaly_scorer.eval({input_tensor: abnormal_x[:5]})
print(f"Anomaly score on handcrafted anomaly test cases are {anomaly_scores}")