diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..50d23d2 Binary files /dev/null and b/.DS_Store differ diff --git a/PreprocessData.py b/PreprocessData.py new file mode 100644 index 0000000..86d5db3 --- /dev/null +++ b/PreprocessData.py @@ -0,0 +1,70 @@ +import os +import numpy as np + +main_dir = './resize_data/frames/' +envs = ['house','lab','office'] + +label_name = 'labels/' +label_device = ['obj_left','obj_right'] +# label_device = ['obj_left','obj_left'] + +data_name = 'train/' +data_part = ['1','2','3','4'] +# data_name = 'test/' +# data_part = ['4','5','6','7','8'] +data_device = ['Lhand','Rhand'] +# data_device = ['head','head'] #why here need to duplicate is because we have to make the data amount of head equal to Lhand+Rhead + +## note for the pre-process of head-data: +''' +for head 'image' and 'label', I arbitrary select left-hand label as its correponding head-label, since I will +not use the label of the head in the parallel structure, so it doesn;t matter. + +''' + +for _, env in enumerate(envs): + for idx, device in enumerate(label_device): + for _, part in enumerate(data_part): + label_f_dir = main_dir+label_name+env+'/'+device+part+'.npy' + # if (env == 'lab' and part == '4') or (env != 'lab' and (part == '7' or part == '8')): + if env != 'lab' and part == '4': + continue + label_array = np.load(label_f_dir) + print('now reading %s' % main_dir+label_name+env+'/'+device+part+'.npy' ) + # img_num = len(label_array) + + for i, label in enumerate(label_array): + + with open("hand_head_all_test.txt", "a") as text_file: + f_dir = main_dir+data_name+env+'/'+part+'/'+data_device[idx]+'/'+'Image'+str(i+1)+'.png' + f_head_dir = main_dir+data_name+env+'/'+part+'/'+'head'+'/'+'Image'+str(i+1)+'.png' + cores_label = str(int(label)) + text_file.write(f_dir+' '+ f_head_dir +' '+cores_label+'\n') + total_train_num += 1 + + with open("hand_all_test.txt", "a") as text_file: + f_dir = main_dir+data_name+env+'/'+part+'/'+data_device[idx]+'/'+'Image'+str(i+1)+'.png' + f_head_dir = main_dir+data_name+env+'/'+part+'/'+'head'+'/'+'Image'+str(i+1)+'.png' + cores_label = str(int(label)) + text_file.write(f_dir+' '+cores_label+'\n') + total_train_num += 1 + + + # inappropriate way on divide train/val data + # train_num = int(len(label_array)*0.7) + # val_num = len(label_array) - train_num + + # if i < train_num: + # with open("hand_head_train.txt", "a") as text_file: + # f_dir = main_dir+data_name+env+'/'+part+'/'+data_device[idx]+'/'+'Image'+str(i+1)+'.png' + # f_head_dir = main_dir+data_name+env+'/'+part+'/'+'head'+'/'+'Image'+str(i+1)+'.png' + # cores_label = str(int(label)) + # text_file.write(f_dir+' '+ f_head_dir +' '+cores_label+'\n') + # total_train_num += 1 + # else: + # with open("hand_head_val.txt", "a") as text_file: + # f_dir = main_dir+data_name+env+'/'+part+'/'+data_device[idx]+'/'+'Image'+str(i+1)+'.png' + # f_head_dir = main_dir+data_name+env+'/'+part+'/'+'head'+'/'+'Image'+str(i+1)+'.png' + # cores_label = str(int(label)) + # text_file.write(f_dir+' '+ f_head_dir +' '+cores_label+'\n') + # total_val_num += 1 \ No newline at end of file diff --git a/alexnet.py b/alexnet.py new file mode 100644 index 0000000..14f4599 --- /dev/null +++ b/alexnet.py @@ -0,0 +1,204 @@ +""" +This is an TensorFLow implementation of AlexNet by Alex Krizhevsky at all +(http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) + +Following my blogpost at: +https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html + +This script enables finetuning AlexNet on any given Dataset with any number of classes. +The structure of this script is strongly inspired by the fast.ai Deep Learning +class by Jeremy Howard and Rachel Thomas, especially their vgg16 finetuning +script: +- https://github.com/fastai/courses/blob/master/deeplearning1/nbs/vgg16.py + + +The pretrained weights can be downloaded here and should be placed in the same folder: +- http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/ + +@author: Frederik Kratzert (contact: f.kratzert(at)gmail.com) +""" + +import tensorflow as tf +import numpy as np + +class AlexNet(object): + + def __init__(self, x, keep_prob, num_classes, skip_layer, + weights_path = 'DEFAULT'): + + + # Parse input arguments into class variables + self.X = x + self.NUM_CLASSES = num_classes + self.KEEP_PROB = keep_prob + self.SKIP_LAYER = skip_layer + + if weights_path == 'DEFAULT': + self.WEIGHTS_PATH = 'bvlc_alexnet.npy' + else: + self.WEIGHTS_PATH = weights_path + + # Call the create function to build the computational graph of AlexNet + self.create() + + def create(self): + + + # 1st Layer: Conv (w ReLu) -> Pool -> Lrn + conv1 = conv(self.X, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1') + pool1 = max_pool(conv1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1') + norm1 = lrn(pool1, 2, 2e-05, 0.75, name = 'norm1') + + # 2nd Layer: Conv (w ReLu) -> Pool -> Lrn with 2 groups + conv2 = conv(norm1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2') + pool2 = max_pool(conv2, 3, 3, 2, 2, padding = 'VALID', name ='pool2') + norm2 = lrn(pool2, 2, 2e-05, 0.75, name = 'norm2') + + # 3rd Layer: Conv (w ReLu) + conv3 = conv(norm2, 3, 3, 384, 1, 1, name = 'conv3') + + # 4th Layer: Conv (w ReLu) splitted into two groups + conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4') + + # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups + conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5') + pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5') + + # 6th Layer: Flatten -> FC (w ReLu) -> Dropout + flattened = tf.reshape(pool5, [-1, 6*6*256]) + fc6 = fc(flattened, 6*6*256, 4096, name='fc6') + dropout6 = dropout(fc6, self.KEEP_PROB) + self.fc6 = fc6 + + + # 7th Layer: FC (w ReLu) -> Dropout + fc7 = fc(dropout6, 4096, 4096, name = 'fc7') + dropout7 = dropout(fc7, self.KEEP_PROB) + + # 8th Layer: FC and return unscaled activations (for tf.nn.softmax_cross_entropy_with_logits) + self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu = False, name='fc8') + + + + def load_initial_weights(self, session): + """ + As the weights from http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/ come + as a dict of lists (e.g. weights['conv1'] is a list) and not as dict of + dicts (e.g. weights['conv1'] is a dict with keys 'weights' & 'biases') we + need a special load function + """ + + # Load the weights into memory + weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item() + + # Loop over all layer names stored in the weights dict + for op_name in weights_dict: + + # Check if the layer is one of the layers that should be reinitialized + if op_name not in self.SKIP_LAYER: + + with tf.variable_scope(op_name, reuse = True): + + # Loop over list of weights/biases and assign them to their corresponding tf variable + for data in weights_dict[op_name]: + + # Biases + if len(data.shape) == 1: + + var = tf.get_variable('biases', trainable = False) + session.run(var.assign(data)) + + # Weights + else: + + var = tf.get_variable('weights', trainable = False) + session.run(var.assign(data)) + + + +""" +Predefine all necessary layer for the AlexNet +""" +def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name, + padding='SAME', groups=1): + """ + Adapted from: https://github.com/ethereon/caffe-tensorflow + """ + # Get number of input channels + input_channels = int(x.get_shape()[-1]) + + # Create lambda function for the convolution + convolve = lambda i, k: tf.nn.conv2d(i, k, + strides = [1, stride_y, stride_x, 1], + padding = padding) + + with tf.variable_scope(name) as scope: + # Create tf variables for the weights and biases of the conv layer + try: + weights = tf.get_variable('weights', shape = [filter_height, filter_width, input_channels/groups, num_filters]) + biases = tf.get_variable('biases', shape = [num_filters]) + except ValueError: + scope.reuse_variables() + weights = tf.get_variable('weights') + biases = tf.get_variable('biases') + + + if groups == 1: + conv = convolve(x, weights) + + # In the cases of multiple groups, split inputs & weights and + else: + # Split input and weights and convolve them separately + input_groups = tf.split(value=x, num_or_size_splits=groups, axis = 3) + weight_groups = tf.split(value=weights, num_or_size_splits=groups, axis = 3) + # input_groups = tf.split(split_dim=3, num_split=groups, value=x) + # weight_groups = tf.split(split_dim=3, num_split=groups, value=weights) + output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)] + + # Concat the convolved output together again + conv = tf.concat(axis=3, values = output_groups) + # conv = tf.concat(concat_dim = 3, values = output_groups) + + # Add biases + bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list()) + + # Apply relu function + relu = tf.nn.relu(bias, name = scope.name) + + return relu + +def fc(x, num_in, num_out, name, relu = True): + with tf.variable_scope(name) as scope: + try: + # Create tf variables for the weights and biases + weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True) + biases = tf.get_variable('biases', [num_out], trainable=True) + except ValueError: + scope.reuse_variables() + weights = tf.get_variable('weights') + biases = tf.get_variable('biases') + + # Matrix multiply weights and inputs and add bias + act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name) + + if relu == True: + # Apply ReLu non linearity + relu = tf.nn.relu(act) + return relu + else: + return act + + +def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'): + return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1], + strides = [1, stride_y, stride_x, 1], + padding = padding, name = name) + +def lrn(x, radius, alpha, beta, name, bias=1.0): + return tf.nn.local_response_normalization(x, depth_radius = radius, alpha = alpha, + beta = beta, bias = bias, name = name) + +def dropout(x, keep_prob): + return tf.nn.dropout(x, keep_prob) + + \ No newline at end of file diff --git a/datagenerator.py b/datagenerator.py new file mode 100644 index 0000000..ac41ff6 --- /dev/null +++ b/datagenerator.py @@ -0,0 +1,131 @@ +import numpy as np +import cv2 + +""" +This code is highly influenced by the implementation of: +https://github.com/joelthchao/tensorflow-finetune-flickr-style/dataset.py +But changed abit to allow dataaugmentation (yet only horizontal flip) and +shuffling of the data. +The other source of inspiration is the ImageDataGenerator by @fchollet in the +Keras library. But as I needed BGR color format for fine-tuneing AlexNet I +wrote my own little generator. +""" + +class ImageDataGenerator: + def __init__(self, class_list, horizontal_flip=False, shuffle=False, + mean = np.array([104., 117., 124.]), scale_size=(227, 227), + nb_classes = 24): + + + # Init params + self.horizontal_flip = horizontal_flip + self.n_classes = nb_classes + self.shuffle = shuffle + self.mean = mean + self.scale_size = scale_size + self.pointer = 0 + + self.read_class_list(class_list) + + if self.shuffle: + self.shuffle_data() + + def read_class_list(self,class_list): + """ + Scan the image file and get the image paths and labels + """ + with open(class_list) as f: + lines = f.readlines() + self.hand_images = [] + self.head_images = [] + self.labels = [] + for l in lines: + items = l.split() + self.hand_images.append(items[0]) + self.head_images.append(items[1]) + self.labels.append(int(items[2])) + + #store total number of data + self.data_size = len(self.labels) + + def shuffle_data(self): + """ + Random shuffle the images and labels + """ + # images = self.images.copy() + # labels = self.labels.copy() + hand_images = self.hand_images + head_images = self.head_images + labels = self.labels + self.hand_images = [] + self.head_images = [] + self.labels = [] + + #create list of permutated index and shuffle data accoding to list + idx = np.random.permutation(len(labels)) + for i in idx: + self.hand_images.append(hand_images[i]) + self.head_images.append(head_images[i]) + self.labels.append(labels[i]) + + def reset_pointer(self): + """ + reset pointer to begin of the list + """ + self.pointer = 0 + + if self.shuffle: + self.shuffle_data() + + + def next_batch(self, batch_size): + """ + This function gets the next n ( = batch_size) images from the path list + and labels and loads the images into them into memory + """ + # Get next batch of image (path) and labels + hand_paths = self.hand_images[self.pointer:self.pointer + batch_size] + head_paths = self.head_images[self.pointer:self.pointer + batch_size] + labels = self.labels[self.pointer:self.pointer + batch_size] + + #update pointer + self.pointer += batch_size + + # Read images + images_hand = np.ndarray([batch_size, self.scale_size[0], self.scale_size[1], 3]) + images_head = np.ndarray([batch_size, self.scale_size[0], self.scale_size[1], 3]) + # print('len of hand path = ',len(hand_paths)) + # print('len of head path = ',len(head_paths)) + for i in range(len(hand_paths)): + hand_img = cv2.imread(hand_paths[i]) + head_img = cv2.imread(head_paths[i]) + + #flip image at random if flag is selected + if self.horizontal_flip and np.random.random() < 0.5: + hand_img = cv2.flip(hand_img, 1) + if self.horizontal_flip and np.random.random() < 0.5: + head_img = cv2.flip(head_img, 1) + + #rescale image + hand_img = cv2.resize(hand_img, (self.scale_size[0], self.scale_size[1])) + hand_img = hand_img.astype(np.float32) + head_img = cv2.resize(head_img, (self.scale_size[0], self.scale_size[1])) + head_img = head_img.astype(np.float32) + + #subtract mean + hand_img -= self.mean + head_img -= self.mean + + images_hand[i] = hand_img + images_head[i] = head_img + + # Expand labels to one hot encoding + one_hot_labels = np.zeros((batch_size, self.n_classes)) + # print(labels) + # print('batch_size = ',batch_size) + # print(one_hot_labels) + for i in range(len(labels)): + one_hot_labels[i][labels[i]] = 1 + + #return array of images and labels + return images_hand, images_head, one_hot_labels diff --git a/datagenerator_non2string.py b/datagenerator_non2string.py new file mode 100644 index 0000000..56bd69a --- /dev/null +++ b/datagenerator_non2string.py @@ -0,0 +1,110 @@ +import numpy as np +import cv2 + +""" +This code is highly influenced by the implementation of: +https://github.com/joelthchao/tensorflow-finetune-flickr-style/dataset.py +But changed abit to allow dataaugmentation (yet only horizontal flip) and +shuffling of the data. +The other source of inspiration is the ImageDataGenerator by @fchollet in the +Keras library. But as I needed BGR color format for fine-tuneing AlexNet I +wrote my own little generator. +""" + +class ImageDataGenerator_custom: + def __init__(self, class_list, horizontal_flip=False, shuffle=False, + mean = np.array([104., 117., 124.]), scale_size=(227, 227), + nb_classes = 24): + + + # Init params + self.horizontal_flip = horizontal_flip + self.n_classes = nb_classes + self.shuffle = shuffle + self.mean = mean + self.scale_size = scale_size + self.pointer = 0 + + self.read_class_list(class_list) + + if self.shuffle: + self.shuffle_data() + + def read_class_list(self,class_list): + """ + Scan the image file and get the image paths and labels + """ + with open(class_list) as f: + lines = f.readlines() + self.images = [] + self.labels = [] + for l in lines: + items = l.split() + self.images.append(items[0]) + self.labels.append(int(items[1])) + + #store total number of data + self.data_size = len(self.labels) + + def shuffle_data(self): + """ + Random shuffle the images and labels + """ + images = self.images.copy() + labels = self.labels.copy() + self.images = [] + self.labels = [] + + #create list of permutated index and shuffle data accoding to list + idx = np.random.permutation(len(labels)) + for i in idx: + self.images.append(images[i]) + self.labels.append(labels[i]) + + def reset_pointer(self): + """ + reset pointer to begin of the list + """ + self.pointer = 0 + + if self.shuffle: + self.shuffle_data() + + + def next_batch_non2string(self, batch_size): + """ + This function gets the next n ( = batch_size) images from the path list + and labels and loads the images into them into memory + """ + # Get next batch of image (path) and labels + paths = self.images[self.pointer:self.pointer + batch_size] + labels = self.labels[self.pointer:self.pointer + batch_size] + + #update pointer + self.pointer += batch_size + + # Read images + images = np.ndarray([batch_size, self.scale_size[0], self.scale_size[1], 3]) + for i in range(len(paths)): + img = cv2.imread(paths[i]) + + #flip image at random if flag is selected + if self.horizontal_flip and np.random.random() < 0.5: + img = cv2.flip(img, 1) + + #rescale image + img = cv2.resize(img, (self.scale_size[0], self.scale_size[1])) + img = img.astype(np.float32) + + #subtract mean + img -= self.mean + + images[i] = img + + # Expand labels to one hot encoding + one_hot_labels = np.zeros((batch_size, self.n_classes)) + for i in range(len(labels)): + one_hot_labels[i][labels[i]] = 1 + + #return array of images and labels + return images, one_hot_labels \ No newline at end of file diff --git a/finetune_AlexNet.py b/finetune_AlexNet.py new file mode 100644 index 0000000..cdb00a3 --- /dev/null +++ b/finetune_AlexNet.py @@ -0,0 +1,305 @@ +""" +Ref: https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html + +Author: Frederik Kratzert +Edit: Huiting Hong +""" +import os +import numpy as np +import tensorflow as tf +from datetime import datetime +from alexnet import AlexNet +from simplenn import SimpleNN +from datagenerator import ImageDataGenerator +import pandas as pd +from openpyxl import load_workbook + +# Path to the textfiles for the trainings and validation set +train_file = './hand_head_all_train.txt' +val_file = './hand_head_all_test.txt' + +num_classes = 24 +train_layers = ['fc8', 'fc7','fc6','conv5','conv4','conv3'] +# train_layers = ['fc8','fc7'] + + + + +optimizers = ['GD'] +learning_rates = [0.001] +num_epochses = [10,40,100] +batch_sizes = [16,64] +dropout_rates = [0.1,0.5] + +# optimizers = ['Adam','GD'] +# learning_rates = [0.1,0.01,0.001,0.0001] +# num_epochses = [10,40,100] +# batch_sizes = [16,32,64] +# dropout_rates = [0.1,0.3,0.5,0.7,1.0] + +# s_rowN = 0 +# writer_loss = pd.ExcelWriter('./alexnet_multi_choice_loss.xlsx',engine='openpyxl') +# writer_train = pd.ExcelWriter('./alexnet_multi_choice_trainACC.xlsx',engine='openpyxl') +# writer_test = pd.ExcelWriter('./alexnet_multi_choice_testACC.xlsx',engine='openpyxl') +all_choice_loss = [] + +for num_epochs in num_epochses: + for opt in optimizers: + for learning_rate in learning_rates: + for batch_size in batch_sizes: + for dropout_rate in dropout_rates: + + tf.reset_default_graph() + + print('opt = %s , lr = %s , epoch = %d , bt_s = %d , dp = %s ' % (opt, str(learning_rate),num_epochs,batch_size,str(dropout_rate))) + + + # How often we want to write the tf.summary data to disk + display_step = 1 + + # Path for tf.summary.FileWriter and to store model checkpoints + filewriter_path = "C:/Users/GG3BE2/Desktop/winnie/cedl_2017fall/" + checkpoint_path = "C:/Users/GG3BE2/Desktop/winnie/cedl_2017fall/" + + # Create parent path if it doesn't exist + if not os.path.isdir(checkpoint_path): os.mkdir(checkpoint_path) + + + + # TF placeholder for graph input and output + # concate_score = tf.placeholder(tf.float32, [None, 8192]) + x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3]) + y = tf.placeholder(tf.float32, [None, num_classes]) + head_x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3]) + keep_prob = tf.placeholder(tf.float32) + + # Initialize model + model = AlexNet(x, keep_prob, num_classes, train_layers) + model_parall2 = AlexNet(head_x, keep_prob, num_classes, train_layers) + + + # Link variable to model output + score_parall1 = model.fc6 + score_parall2 = model_parall2.fc6 + # model_LastTwo = SimpleNN(tf.concat(1,[score_parall1, score_parall2]), keep_prob, num_classes) + model_LastTwo = SimpleNN(tf.concat([score_parall1, score_parall2],1), keep_prob, num_classes) + + + score = model_LastTwo.fc8 # concate the 2 features (hand and head) + # score = model.fc8 + + + # List of trainable variables of the layers we want to train + var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers] + + # Op for calculating the loss + with tf.name_scope("cross_ent"): + loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = score, labels = y)) + + # Train op + with tf.name_scope("train"): + # Get gradients of all trainable variables + gradients = tf.gradients(loss, var_list) + gradients = list(zip(gradients, var_list)) + + # Create optimizer and apply gradient descent to the trainable variables + if opt == 'Adam': + optimizer = tf.train.AdamOptimizer(learning_rate) + elif opt == 'GD': + optimizer = tf.train.GradientDescentOptimizer(learning_rate) + else: + print('Oops! something wrong!') + train_op = optimizer.apply_gradients(grads_and_vars=gradients) + + # Add gradients to summary + # for gradient, var in gradients: + # tf.summary.histogram(var.name + '/gradient', gradient) + + # Add the variables we train to the summary + # for var in var_list: + # tf.summary.histogram(var.name, var) + + # Add the loss to summary + tf.summary.scalar('cross_entropy', loss) + + # Evaluation op: Accuracy of the model + with tf.name_scope("accuracy"): + print('score = ',score) + print('score-shape = ',score.get_shape()) + correct_pred = tf.equal(tf.argmax(score, 1), tf.argmax(y, 1)) + accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) + + + # Add the accuracy to the summary + tf.summary.scalar('accuracy', accuracy) + + # Merge all summaries together + merged_summary = tf.summary.merge_all() + + # Initialize the FileWriter + writer = tf.summary.FileWriter(filewriter_path) + + # Initialize an saver for store model checkpoints + saver = tf.train.Saver() + + # Initalize the data generator seperately for the training and validation set + train_generator = ImageDataGenerator(train_file, + horizontal_flip = True, shuffle = True) + # train_generator_parall2 = ImageDataGenerator(head_train_file, + # horizontal_flip = True, shuffle = True) + val_generator = ImageDataGenerator(val_file, shuffle = False) + # val_generator_parall2 = ImageDataGenerator(head_val_file, shuffle = False) + + # Get the number of training/validation steps per epoch + train_batches_per_epoch = np.floor(train_generator.data_size / batch_size).astype(np.int16) + val_batches_per_epoch = np.floor(val_generator.data_size / batch_size).astype(np.int16) + + # Start Tensorflow session + with tf.Session() as sess: + + # sess.run(tf.reset_default_graph()) + + # Initialize all variables + sess.run(tf.global_variables_initializer()) + + # Add the model graph to TensorBoard + writer.add_graph(sess.graph) + + # Load the pretrained weights into the non-trainable layer + model.load_initial_weights(sess) + + print("{} Start training...".format(datetime.now())) + print("{} Open Tensorboard at --logdir {}".format(datetime.now(), + filewriter_path)) + + # Loop over number of epochs + + loss_ary = [] + train_acc_ary = [] + test_acc_ary = [] + for epoch in range(num_epochs): + + print("{} Epoch number: {}".format(datetime.now(), epoch+1)) + + step = 1 + + while step < train_batches_per_epoch: # if num_data = 1000, batch-size = 10, we will have to go through 100 batches(here 100 called train_batches_per_epoch), which called 1 epoch + + # Get a batch of images and labels + batch_xs, batch_head_xs, batch_ys = train_generator.next_batch(batch_size) + + # And run the training op + _, loss_val = sess.run([train_op,loss], feed_dict={x: batch_xs, + head_x: batch_head_xs, + y: batch_ys, + keep_prob: dropout_rate}) + # loss_ary.append(loss_val) + # print ('loss = ', loss_val) + # Generate summary with the current batch of data and write to file + if step%display_step == 0: + s = sess.run(merged_summary, feed_dict={x: batch_xs, + head_x: batch_head_xs, + y: batch_ys, + keep_prob: 1.}) + # concate_score: concate_score_ary}) + writer.add_summary(s, epoch*train_batches_per_epoch + step) + + step += 1 + train_accuracy = accuracy.eval(feed_dict={x: batch_xs, + head_x: batch_head_xs, + y: batch_ys, + keep_prob: 1.}) + print(' %d th epoch, training accuracy %g' % (epoch, train_accuracy)) + + # Validate the model on the entire validation set + print("{} Start validation".format(datetime.now())) + test_acc = 0. + test_count = 0 + for _ in range(val_batches_per_epoch): + batch_tx, batch_head_tx, batch_ty = val_generator.next_batch(batch_size) + + acc = sess.run(accuracy, feed_dict={x: batch_tx, + head_x: batch_head_tx, + y: batch_ty, + keep_prob: 1.}) + + test_acc += acc + test_count += 1 + test_acc /= test_count + print("{} Validation Accuracy = {:.4f}".format(datetime.now(), test_acc)) + + # Reset the file pointer of the image data generator + val_generator.reset_pointer() + train_generator.reset_pointer() + + print("{} Saving checkpoint of model...".format(datetime.now())) + + # train_acc_ary.append(train_accuracy) + # test_acc_ary.append(test_acc) + + ## loss_ary.append(train_accuracy) + ## loss_ary.append(test_acc) + # loss_ary = np.array([s_rowN,3*s_rowN]) + # train_acc_ary = np.array([s_rowN+1,s_rowN+1,s_rowN+1]) + # test_acc_ary = np.array(range(s_rowN)) + ''' + loss_ary = np.asarray([loss_ary]) + train_acc_ary = np.array([train_acc_ary]) + test_acc_ary = np.array([test_acc_ary]) + s_rowN +=1 + + # if isinstance(all_choice_loss, list): + # all_choice_loss = loss_ary + # else: + # all_choice_loss = np.concatenate((all_choice_loss,loss_ary),axis=0) + + #, engine='openpyxl') + + if os.path.isfile('./alexnet_multi_choice_loss.xlsx'): + # print('hi, second save') + print('file already exist, append behind the row.') + writer_loss.book = load_workbook('./alexnet_multi_choice_loss.xlsx') + writer_loss.sheets = dict((ws.title, ws) for ws in writer_loss.book.worksheets) + writer_train.book = load_workbook('./alexnet_multi_choice_trainACC.xlsx') + writer_train.sheets = dict((ws.title, ws) for ws in writer_train.book.worksheets) + writer_test.book = load_workbook('./alexnet_multi_choice_testACC.xlsx') + writer_test.sheets = dict((ws.title, ws) for ws in writer_test.book.worksheets) + + print('save loss dataframe') + df_loss = pd.DataFrame(loss_ary) + df_loss.to_excel(writer_loss, 'Main' , startrow = s_rowN , header=None) + writer_loss.save() + + # if os.path.isfile('./alexnet_multi_choice_trainACC.xlsx'): + # print('hi, second save') + + + print('save train_acc dataframe') + df_tr = pd.DataFrame(train_acc_ary) + df_tr.to_excel(writer_train, 'Main' , startrow = s_rowN , header=None) + writer_train.save() + + # if os.path.isfile('./alexnet_multi_choice_testACC.xlsx'): + # print('hi, second save') + + + + print('save test_acc dataframe') + df_ts = pd.DataFrame(test_acc_ary) + df_ts.to_excel(writer_test, 'Main' , startrow = s_rowN , header=None) + writer_test.save() + + ''' + + #save checkpoint of the model + # checkpoint_name = os.path.join(checkpoint_path, 'model_epoch'+str(epoch+1)+'.ckpt') + # save_path = saver.save(sess, checkpoint_name) + + # print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name)) + +# writer = pd.ExcelWriter('alexnet_multi_choice_loss.xlsx', engine='xlsxwriter') +# df = pd.DataFrame(all_choice_loss) +# df.to_excel(writer, sheet_name='Sheet1') +# writer.save() + + diff --git a/finetune_InceptionV3.py b/finetune_InceptionV3.py new file mode 100644 index 0000000..07b98a4 --- /dev/null +++ b/finetune_InceptionV3.py @@ -0,0 +1,285 @@ +# Ref: https://keras-cn.readthedocs.io/en/latest/other/application/ +# Edit: Huiting Hong + +from keras.applications.inception_v3 import InceptionV3,preprocess_input +from keras.preprocessing import image +from keras.models import Model +from keras.layers import Dense, GlobalAveragePooling2D, Dropout +from keras import backend as K +from datagenerator_non2string import ImageDataGenerator_custom +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau +from keras.models import load_model +from sklearn.model_selection import train_test_split + +import cv2 +import numpy as np + + +datagen = ImageDataGenerator(featurewise_center=False, + samplewise_center=False, + featurewise_std_normalization=False, + samplewise_std_normalization=False, + zca_whitening=False, + rotation_range=13., + width_shift_range=0.1, + height_shift_range=0.1, + shear_range=0.1, + zoom_range=0.1, + channel_shift_range=0., + fill_mode='nearest', + cval=0., + horizontal_flip=True, + vertical_flip=False) + +def image_augmentation(X_train, Y_train, batch_size, datagen=datagen): + + datagen.fit(X_train) + + # fits the model on batches with real-time data augmentation: + return datagen.flow(X_train, Y_train, batch_size=batch_size).next() + +def read_as_img(paths,batch_size): + scale_size = (277,277) + mean = np.array([104., 117., 124.]) + images = np.ndarray([batch_size, scale_size[0], scale_size[1], 3]) + for i in range(len(paths)): + img = cv2.imread(paths[i]) + + #rescale image + img = cv2.resize(img, (scale_size[0], scale_size[1])) + img = img.astype(np.float32) + + #subtract mean + img -= mean + + images[i] = img + return images + +def onehot(labels, n_classes, batch_size): + # n_classes = 24 + one_hot_labels = np.zeros((batch_size, n_classes)) + for i in range(len(labels)): + one_hot_labels[i][labels[i]] = 1 + + return one_hot_labels + +def DataGenerator(X, y, bt_size, n_classes, data_aug=True): + while 1: + p = np.random.permutation(len(X)) + X, y = X[p],y[p] + bt_index = len(X)//bt_size + for i in range(bt_index): + X_batch = X[i*bt_size:(i+1)*bt_size] + y_batch = y[i*bt_size:(i+1)*bt_size] + X_batch = read_as_img(X_batch,bt_size) + y_batch = onehot(y_batch,n_classes,bt_size) + + if data_aug: + X_batch, y_batch = image_augmentation(X_batch, y_batch, bt_size) + else: + X_batch = preprocess_input(X_batch) + yield (X_batch, y_batch) + + + +# class LossHistory(keras.callbacks.Callback): +# def on_train_begin(self, logs={}): +# self.losses = [] + +# def on_batch_end(self, batch, logs={}): +# self.losses.append(logs.get('loss')) + +# Specify data directory +train_val_file = './hand_all_train.txt' +test_file = './hand_all_test.txt' + +# Specify class#, epoch#, bt-size +n_classes = 24 +epochs_toplayer = 5 +epochs_InceptionAndToplayer = 10 +batch_size = 16 + +# Create the base pre-trained model +base_model = InceptionV3(weights='imagenet', include_top=False) + +# Add a global spatial average pooling layer +x = base_model.output +x = GlobalAveragePooling2D()(x) +x = Dropout(0.5)(x) +x = Dense(256, activation='relu')(x) +predictions = Dense(n_classes, activation='softmax')(x) + +# The model we are going train +model = Model(inputs=base_model.input, outputs=predictions) + +# Freeze all convolutional InceptionV3 layers and train only top layers which we randomly initialize +for layer in base_model.layers: + layer.trainable = False + +# Compile the model +model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])#'categorical_crossentropy') + + +# Initalize the data generator seperately for the training and validation set +train_val_generator = ImageDataGenerator_custom(train_val_file) +test_generator = ImageDataGenerator_custom(test_file, shuffle = False) + +train_val_img = np.asarray(train_val_generator.images) +train_val_label = np.asarray(train_val_generator.labels) + +X_train, X_val, y_train, y_val = train_test_split(train_val_img, train_val_label, test_size=0.33) + +# print('train_x length = ',len(X_train)) +# print('val_x length = ',len(X_val)) + +# Train the model on the new data for a few epochs +checkpointer = ModelCheckpoint(filepath='./best_fstlayer_model.hdf5', verbose=1, save_best_only=True) +model.fit_generator (DataGenerator(X_train,y_train,batch_size,n_classes), + steps_per_epoch=len(X_train) // batch_size, + epochs=epochs_toplayer, + validation_data=DataGenerator(X_val,y_val,batch_size,n_classes,False), + nb_val_samples=len(X_val) // batch_size, + callbacks=[checkpointer]) + +print('finish finetune on the top layer!') + + +# load model trained on top-layers +# model = load_model('best_model_epo30.hdf5') + + +# Start fine-tuning convolutional layers from inception V3. +# freeze the bottom 249 layers and train the remaining top layers. +for layer in model.layers[:249]: + layer.trainable = False +for layer in model.layers[249:]: + layer.trainable = True + +# recompile model +from keras.optimizers import SGD +model.compile(optimizer=SGD(lr=0.001, momentum=0.9), loss='categorical_crossentropy',metrics=['accuracy'])#'categorical_crossentropy') + + +checkpointer = ModelCheckpoint(filepath='./best_model_epo40.hdf5', verbose=1, save_best_only=True) +reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, + patience=3, min_lr=0.00001) +history = model.fit_generator(DataGenerator(X_train,y_train,batch_size,n_classes), + steps_per_epoch=len(X_train) // batch_size, + epochs=epochs_InceptionAndToplayer, + validation_data=DataGenerator(X_val,y_val,batch_size,n_classes), + validation_steps=len(X_val) // batch_size, + callbacks=[checkpointer,reduce_lr])#LossHistory()]) + +model = load_model('best_model_epo40.hdf5') +print('load best model for testing') + +x_test_len = len(test_generator.images) +x_test_lst = np.asarray(test_generator.images) +x_test = read_as_img(x_test_lst,x_test_len) +y_test = onehot(np.asarray(test_generator.labels),n_classes,x_test_len) + +batch_size = 50 + +y_pre = model.predict(x_test,batch_size=batch_size,verbose=1) +# y_pre = np.argmax(y_pre[0]) + + +# y_test_tmp = np.argmax(y_test,axis=1) +# y_pre_tmp = np.argmax(y_pre,axis=1) +# acc_n = 0 +# for i in range(len(y_pre_tmp)): +# if y_pre_tmp[i] == y_test_tmp[i]: +# acc_n += 1 +# print('acc = ',acc_n/len(y_pre_tmp)) + +## Draw Precision Recall Curve +from sklearn.metrics import average_precision_score +from sklearn.metrics import precision_recall_curve +import matplotlib.pyplot as plt +from itertools import cycle +from matplotlib import colors as mcolors + +# For each class +precision = dict() +recall = dict() +average_precision = dict() +for i in range(n_classes): + precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], + y_pre[:, i]) + average_precision[i] = average_precision_score(y_test[:, i], y_pre[:, i]) + +# A "micro-average": quantifying score on all classes jointly +precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), + y_pre.ravel()) +average_precision["micro"] = average_precision_score(y_test, y_pre, + average="micro") + + +# Plot Precision-Recall curve for each class and iso-f1 curves +# setup plot details +colors_dict = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS) +color_lst = [] +for key in colors_dict: + color_lst.append(key) + +# colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal','navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal','navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal','navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal','navy', 'turquoise', 'darkorange', 'cornflowerblue' ]) +colors = cycle(color_lst[10:34]) + +plt.figure(figsize=(10, 8)) +f_scores = np.linspace(0.2, 0.8, num=4) +lines = [] +labels = [] +for f_score in f_scores: + x = np.linspace(0.01, 1) + y = f_score * x / (2 * x - f_score) + l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2) + plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02)) + +lines.append(l) +labels.append('iso-f1 curves') +l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2) +lines.append(l) +labels.append('micro-average Precision-recall (area = {0:0.2f})' + ''.format(average_precision["micro"])) + +for i, color in zip(range(n_classes), colors): + l, = plt.plot(recall[i], precision[i], color=color, lw=2) + lines.append(l) + labels.append('Precision-recall for class {0} (area = {1:0.2f})' + ''.format(i, average_precision[i])) + +fig = plt.gcf() +fig.subplots_adjust(bottom=0.25) +plt.xlim([0.0, 1.0]) +plt.ylim([0.0, 1.05]) +plt.xlabel('Recall') +plt.ylabel('Precision') +plt.title('Extension of Precision-Recall curve to multi-class') +plt.legend(lines, labels, loc=(0.5, -.38), prop=dict(size=14)) + +plt.show() + +# loss, acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0) +# print('\nTesting loss: {}, acc: {}\n'.format(loss, acc)) + +## list all data in history +# print(history.history.keys()) +# # summarize history for accuracy +# plt.plot(history.history['acc']) +# plt.plot(history.history['val_acc']) +# plt.title('model accuracy') +# plt.ylabel('accuracy') +# plt.xlabel('epoch') +# plt.legend(['train', 'test'], loc='upper left') +# plt.show() +# # summarize history for loss +# plt.plot(history.history['loss']) +# plt.plot(history.history['val_loss']) +# plt.title('model loss') +# plt.ylabel('loss') +# plt.xlabel('epoch') +# plt.legend(['train', 'test'], loc='upper left') +# plt.show() + + diff --git a/results/2-stream.png b/results/2-stream.png new file mode 100644 index 0000000..f25f1d4 Binary files /dev/null and b/results/2-stream.png differ diff --git a/results/InceptionV3.png b/results/InceptionV3.png new file mode 100644 index 0000000..abc6096 Binary files /dev/null and b/results/InceptionV3.png differ diff --git a/results/index.md b/results/index.md index 96ce61c..b206eae 100644 --- a/results/index.md +++ b/results/index.md @@ -1,47 +1,142 @@ -# Your Name (id) - -#Project 5: Deep Classification +# 洪彗庭 106061532 CEDL-hw1 ## Overview -The project is related to -> quote +In this homework we implement 2 models on doing object-classification, the highest accuracy we reach is **0.6667** in InceptionV3 model. We will go through details on preprocessing data, model structure and some tricks on training in the following. Since I am still not pretty familiar with tensorfow, I choose to use keras on the second model implementation to speed up the develop process. + +* 2-streams [Alexnet](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) implementation in tensorflow: [finetune_AlexNet.py](../finetune_AlexNet.py) +* 1-stream [InceptionV3](https://arxiv.org/pdf/1512.00567.pdf) implementation in keras: [finetune_InceptionV3.py](../finetune_InceptionV3.py) + +I mainly modified the code from [here](https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html) for Alexnet and [here](https://keras.io/applications/) for InceptionV3. + +##### Other files: +* [simplenn.py](../simplenn.py): for building 2 simple fc layers. +* [PreprocessData.py](../PreprocessData.py): preprosess data and label directory as we need. +* [datagenerator_non2string.py](../datagenerator_non2string.py), [datagenerator.py](../datagenerator.py): preprocess of data +* [alexnet.py](../alexnet.py): model of AlexNet + +## Results +| Model | Accuracy(10th epoch) | Accuracy(40th epoch) | +|-------|----------|----------| +| Alexnet- 2-streams (hand + head, object label)| 0.5658 | 0.6136 | +| InceptionV3- 1-stream (hand, object label)| 0.6549 | **0.6667** | +### Precision Recall Curve +
+
+
+Therefore, simply concate the two streams output(hand, head) from fc6 and on top of it add 2 simple fully-connected layers.
+In the Discussion part we will discuss more on the result of 1-stream versus 2-streams.
+
+2. **InceptionV3**:
+The InceptionV3 model was proposed in 2015 and achieve 3.5% top-5 error rate ImageNet dataset. It imporves several parts compared with InceptionV2, such as the idea of factorization of convolutions. Compare between Alexnet and Inception model, the Inception model use less number of parameters but improves the efficiency in using parameters. The structure looks as follows:
+
+I didn't do any modification on the InceptionV3 model, but just add 2 fully-connected layers on top of the InceptionV3 model.
+
+### Training Detail
+1. **AlexNet**:
+I didn't fintune all layers, since sometimes it will lose the advantage of loading pretrain parameters into the netwrok. I **freeze the first 2 bottom layers** (i.e. conv1, conv2) and finetune from conv3 to fc6 and also finetune on the additional 2 layers I add above the concate result.
+2. **InceptionV3**:
+**First**: finetune the 2 layers I add above the whole structure (i.e. **freeze all layers in the InceptionV3**)
+**Second**: finetune the 2 layers I add on top and 2-stages of InceptionV3(i.e. **freeze the bottom 249 layers**)
+In this way, we can first avoid that since the layers we initialize is too bad (think of it as random generates), it prones to ruin the parameters if we directly finetune them with loaded weight InceptionV3 model. Also, on the second time of finetuning, it can converge more easily since we have already trained the first top 2 layers which are initially pretty worse.
+
+
+## Discussion
+1. Preprocess of data (shuffle do important ! )
+Initially I divide the train/val data in a wrong way, which I didn't apply shuffle on data before divide into train-set and validation-set. The result between non-shuffle and shuffle data is as follows:
+
+| Best-val-loss / Best-val-Acc | non-shuffle | shuffle |
+|-------|----------|----------|
+| Model-InceptionV3| 1.7323 / 0.6119 | 0.1381 / 0.9579 |
+
+Especially the data we get this time is the sequence frames of the vedio, so the drawback of un-shuffle data will be more obvious in this task.
+
+2. One stream v.s. Two streams
+
+| Accuracy | 1-stream | 2-streams |
+|-------|----------|----------|
+| Model-AlexNet| 0.4175 | 0.5658 |
+
+One thing need to notice is that I can't confirm that the 2-streams result will definitely beat the result of 1-stream, since the setting of 1-stream and 2-streams are as follows:
+
+| | learning-rate | finetune-layers | epoch | batch-size |
+|-------|----------|----------|--------|------|
+| 1-stream| 0.001 | fc7, fc8 | 10 | 128 |
+| 2-streams| 0.001 | conv3, conv4, conv5, fc6, fc7, fc8 | 10 | 16 |
-## Installation
-* Other required packages.
-* How to compile from source?
+The setting on the two is a little bit different (finetune-layers and batch-size), so I am not 100 percent for sure that 2-streams is better than 2-stream.
-### Results
+## Acknowledgement
+Thanks the awesome tutorial of finetuning on AlexNet done by [Frederik Kratzert](https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html).
+Also thanks [Zayn Liu](https://github.com/refu0523) giving me so many advice on finishing this assignment.
-
-
-
-
-
- |
-
-
-
-
-
- |
-