diff --git a/data_input.py b/data_input.py new file mode 100644 index 0000000..1588ea9 --- /dev/null +++ b/data_input.py @@ -0,0 +1,214 @@ +# Coder: Wenxin Xu +# Github: https://github.com/wenxinxu/ResNeXt-in-tensorflow +# ============================================================================== +import tarfile +from six.moves import urllib +import sys +import numpy as np +import cPickle +import os +from os import listdir +from os.path import isfile, join +import random +import skimage.io as io +import skimage.transform +import tensorflow as tf + +IMG_RAW_WIDTH = 1920 +IMG_RAW_HEIGHT = 1080 + +IMG_TMP_WIDTH = IMG_RAW_WIDTH / 2 # temporal size saved in tfrecord +IMG_TMP_HEIGHT = IMG_RAW_HEIGHT / 2 + +IMG_TEST_WIDTH = 224 +IMG_TEST_HEIGHT = 224 + +IMG_WIDTH = 224 +IMG_HEIGHT = 224 + +IMG_DEPTH = 3 + +NUM_FA_CLASS = 2 +NUM_GES_CLASS = 13 +NUM_OBJ_CLASS = 24 + +TRAIN_EPOCH_SIZE = 14992 +TEST_EPOCH_SIZE = 12776 + +def whitening_image(image_np, mode='test'): + ''' + Performs per_image_whitening + :param image_np: a 4D numpy array representing a batch of images + :return: the image numpy array after whitened + ''' + for i in range(len(image_np)): + mean = np.mean(image_np[i, ...]) + # Use adjusted standard deviation here, in case the std == 0. + if mode is 'test': + std = np.max([np.std(image_np[i, ...]), 1.0/np.sqrt(IMG_TEST_HEIGHT * IMG_TEST_WIDTH * IMG_DEPTH)]) + else: + std = np.max([np.std(image_np[i, ...]), 1.0/np.sqrt(IMG_HEIGHT * IMG_WIDTH * IMG_DEPTH)]) + image_np[i,...] = (image_np[i, ...] - mean) / std + return image_np + +def read_path_and_label(train_or_test_folder): + ''' + input: 'train' or 'test'. Specify which folder want to read + output: (string, string, float, float, float) + [(hand_path, head_path, FA_label, ges_label, obj_label), + (hand_path, head_path, FA_label, ges_label, obj_label), + ... + (hand_path, head_path, FA_label, ges_label, obj_label)] + ''' + def find_num_files(location, cur_folder_idx): + ''' + location: 'house', 'lab', 'office' + cur_folder_idx: current folder index + train_or_test_folder: choose train or test folder + ''' + + current_path = root_path + '/' + location + '/' + cur_folder_idx + '/Lhand/' + num_files = len([f for f in listdir(current_path) if isfile(join(current_path, f))]) + + return num_files + + def read_labels(location, cur_folder_idx, left_or_right, offset): + ''' + location: 'house', 'lab', 'office' + cur_folder_idx: current folder index + left_or_right: left or right hand + offset: the offset of cur_folder_idx + ''' + + # root_path = '/Disk2/cedl/handcam/labels' # @ AI + root_path = './dataset/labels' # @ my PC + + current_path = root_path + '/' + location + '/' + post_fix = left_or_right + str(offset + cur_folder_idx) + '.npy' + + label_fa = np.load(current_path + 'FA_' + post_fix) + label_ges = np.load(current_path + 'ges_' + post_fix) + label_obj = np.load(current_path + 'obj_' + post_fix) + + return label_fa, label_ges, label_obj + + + location_list = ['house', 'lab', 'office'] + num_folders_per_location = [3, 4, 3] + hand_list = [('Lhand', 'left'), ('Rhand', 'right')] + + imgs_hand_path_list = [] + imgs_head_path_list = [] + labels_fa = [] + labels_ges = [] + labels_obj = [] + + + # root_path = '/Disk2/cedl/handcam/frames/' + train_or_test_folder # @ AI + root_path = './dataset/frames/' + train_or_test_folder # @ my PC + + for location, num_folders in zip(location_list, num_folders_per_location): + for i in xrange(num_folders): + num_files = find_num_files(location, str(i+1)) + for which_hand, L_or_R in hand_list: + for j in xrange(num_files): + # hand + current_path = root_path + '/' + location + '/' + str(i+1) + '/' + which_hand + '/' + imgs_hand_path_list.extend([current_path + 'Image' + str(j+1) + '.png']) + # head + current_path = root_path + '/' + location + '/' + str(i+1) + '/head/' + imgs_head_path_list.extend([current_path + 'Image' + str(j+1) + '.png']) + # Labels + # offset: label file idx. + # 0 for training data; num_folders_per_location for testing data + if train_or_test_folder is 'train': + offset = 0 + elif train_or_test_folder is 'test': + offset = num_folders + label_fa, label_ges, label_obj = read_labels(location, i+1, L_or_R, offset) + labels_fa.extend(label_fa) + labels_ges.extend(label_ges) + labels_obj.extend(label_obj) + + example = zip(imgs_hand_path_list, imgs_head_path_list, labels_fa, labels_ges, labels_obj) + example = random.sample(example, len(example)) # shuffle the list + + return example + +def read_in_imgs(imgs_path_list, mode): + """ + This function reads all training or validation data, and returns the + images as numpy arrays + :param address_list: a list of paths of image files + :return: concatenated numpy array of data. Data are in 4D arrays: [num_images, + image_height, image_width, image_depth] + """ + + if mode is 'test': + height = IMG_TEST_HEIGHT + width = IMG_TEST_WIDTH + else: # for valid or train + height = IMG_HEIGHT + width = IMG_WIDTH + + images = np.array([]).reshape([0, height, width, IMG_DEPTH]) + + for imgs_path in imgs_path_list: + img = io.imread(imgs_path) + img = skimage.transform.resize(img, [height, width], order=3, mode='reflect') + if mode is 'train': + img = horizontal_flip(image=img, axis=1) # 50% chance to flip the image when training + img = np.reshape(img, [1, height, width, IMG_DEPTH]) + # Concatenate along axis 0 by default + images = np.concatenate((images, img)) + + return images + + +def tfrecords_maker(example, file_name = 'training'): + + def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + def _int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + tfrecords_filename = file_name + '_data.tfrecords' + + writer = tf.python_io.TFRecordWriter(tfrecords_filename) + + i = 0 + for img_hand_path, img_head_path, label_fa, label_ges, label_obj in example: + + img_hand = np.array(io.imread(img_hand_path)) + img_head = np.array(io.imread(img_head_path)) + + # half the image size to save storage + img_hand = skimage.transform.resize(img_hand, [IMG_TMP_HEIGHT, IMG_TMP_WIDTH], order=3, mode='reflect') + img_head = skimage.transform.resize(img_head, [IMG_TMP_HEIGHT, IMG_TMP_WIDTH], order=3, mode='reflect') + + img_hand = img_hand * 255.0 + img_head = img_head * 255.0 + img_hand = img_hand.astype(np.uint8) + img_head = img_head.astype(np.uint8) + + image_hand_raw = img_hand.tostring() + image_head_raw = img_head.tostring() + + _example = tf.train.Example(features=tf.train.Features(feature={ + 'image_hand_raw': _bytes_feature(image_hand_raw), + 'image_head_raw': _bytes_feature(image_head_raw), + 'label_fa': _int64_feature(int(label_fa)), + 'label_ges': _int64_feature(int(label_ges)), + 'label_obj': _int64_feature(int(label_obj))})) + + writer.write(_example.SerializeToString()) + i = i + 1 + if i % 50 ==0: + print '%d / %d' % (i, TRAIN_EPOCH_SIZE) + writer.close() + +if __name__ == '__main__': + # To save the training data to tfrecord format + train_data_list = read_path_and_label('train') + tfrecords_maker(train_data_list, 'training') \ No newline at end of file diff --git a/dataset/placeholder b/dataset/placeholder new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/dataset/placeholder @@ -0,0 +1 @@ + diff --git a/hyper_parameters.py b/hyper_parameters.py new file mode 100644 index 0000000..b90348c --- /dev/null +++ b/hyper_parameters.py @@ -0,0 +1,83 @@ +# Coder: Wenxin Xu +# Github: https://github.com/wenxinxu/ResNeXt-in-tensorflow +# ============================================================================== +import tensorflow as tf + +FLAGS = tf.app.flags.FLAGS + +tf.app.flags.DEFINE_string('mode', 'test', '''Specify that the main code is for training or testing''') + +## The following flags define hyper-parameters that specifically characterize ResNeXt +tf.app.flags.DEFINE_integer('cardinality', 3, '''Cadinality, number of paths in each block''') +tf.app.flags.DEFINE_integer('block_unit_depth', 32, '''the depth(# filters) of each split. 64 for cifar10 +in Figure 7 of the paper''') +tf.app.flags.DEFINE_integer('num_fc_units', 256, '''Number of neurons in the fc layer''') +tf.app.flags.DEFINE_string('bottleneck_implementation', 'b', '''To use Figure 3b or 3c to +implement''') + + +## The following flags are related to save paths, tensorboard outputs and screen outputs + +tf.app.flags.DEFINE_string('version', 'GG123', '''A version number defining the directory to +save +logs and checkpoints''') +tf.app.flags.DEFINE_integer('report_freq', 200, '''Steps takes to output errors on the screen +and write summaries''') +tf.app.flags.DEFINE_integer('save_freq', 200, '''Steps takes to save the current ckpt''') +tf.app.flags.DEFINE_integer('max_to_keep', 400, '''Max # ckpt to keep''') +tf.app.flags.DEFINE_float('train_ema_decay', 0.95, '''The decay factor of the train error's +moving average shown on tensorboard''') + +## The following flags define hyper-parameters regards training + +tf.app.flags.DEFINE_integer('train_steps', 80000, '''Total steps that you want to train''') +tf.app.flags.DEFINE_boolean('is_full_validation', False, '''Validation w/ full validation set or +a random batch''') +tf.app.flags.DEFINE_integer('train_batch_size', 10, '''Train batch size''') +tf.app.flags.DEFINE_integer('validation_batch_size', 72, '''Validation batch size, must be multiplier of 24 ''') +tf.app.flags.DEFINE_integer('test_batch_size', 20, '''Test batch size''') + + +# tf.app.flags.DEFINE_float('init_lr', 0.001, '''Initial learning rate''') +# tf.app.flags.DEFINE_float('lr_decay_factor', 0.001, '''How much to decay the learning rate each +# time''') +tf.app.flags.DEFINE_float('k', 0.4, '''k * loss_ges + (1-k) * loss_obj''') +tf.app.flags.DEFINE_float('init_lr', 0.001, '''Initial learning rate''') +tf.app.flags.DEFINE_float('lr_decay_factor', 1, '''How much to decay the learning rate each +time''') + +## The following flags define hyper-parameters modifying the training network +tf.app.flags.DEFINE_integer('num_resnext_blocks', 3, '''How many blocks do you want, +total layers = 3n + 2, the paper used n=3, 29 layers, as demo''') +tf.app.flags.DEFINE_float('weight_decay', 5e-4, '''scale for l2 regularization''') + + +## The following flags are related to data-augmentation + +tf.app.flags.DEFINE_integer('padding_size', 2, '''In data augmentation, layers of zero padding on +each side of the image''') + + +## If you want to load a checkpoint and continue training + +tf.app.flags.DEFINE_boolean('is_use_ckpt', True, '''Whether to load a checkpoint and continue +training''') + +tf.app.flags.DEFINE_string('ckpt_path', 'logs_oh,mfc_ges+obj_ver2_c=3_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.4/model.ckpt-4600', '''Checkpoint +directory to restore to continue TRAIN''') + +tf.app.flags.DEFINE_string('test_ckpt_path', 'logs_GG123_c=3_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.4/model.ckpt-30600', '''Checkpoint +directory to restore to TEST''') + + +lr_curve_file_name = 'c='+str(FLAGS.cardinality) + '_'\ + 'd='+str(FLAGS.block_unit_depth) + '_'\ + 'n='+str(FLAGS.num_resnext_blocks) + '_'\ + 'lr='+str(FLAGS.init_lr) + '_'\ + 'lrd='+str(FLAGS.lr_decay_factor) + '_'\ + 'wd='+str(FLAGS.weight_decay) + '_'\ + 'k='+str(FLAGS.k) +lr_curve_file_name = FLAGS.version + '_' + lr_curve_file_name +train_dir = 'logs_' + lr_curve_file_name + '/' + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..9e4c3f8 --- /dev/null +++ b/main.py @@ -0,0 +1,675 @@ +# Coder: Wenxin Xu +# Github: https://github.com/wenxinxu/ResNeXt-in-tensorflow +# ============================================================================== +from data_input import * +from resNeXt import * +from datetime import datetime +import time +import pandas as pd +import skimage.io as io +import skimage.transform +# from data_io import * + +class Train(object): + ''' + This Object is responsible for all the training and validation process + ''' + def __init__(self): + # Set up all the placeholders + self.placeholders() + + + def placeholders(self): + ''' + There are five placeholders in total. + image_placeholder and label_placeholder are for train images and labels + vali_image_placeholder and vali_label_placeholder are for validation imgaes and labels + lr_placeholder is for learning rate. Feed in learning rate each time of training + implements learning rate decay easily + ''' + self.image_placeholder = tf.placeholder(dtype=tf.float32, + shape=[FLAGS.train_batch_size, IMG_HEIGHT, + IMG_WIDTH, IMG_DEPTH]) + self.label_placeholder_ges= tf.placeholder(dtype=tf.int32, shape=[FLAGS.train_batch_size]) + self.label_placeholder_obj = tf.placeholder(dtype=tf.int32, shape=[FLAGS.train_batch_size]) + + self.vali_image_placeholder = tf.placeholder(dtype=tf.float32, shape=[FLAGS.validation_batch_size, + IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH]) + self.vali_label_placeholder_ges = tf.placeholder(dtype=tf.int32, shape=[FLAGS.validation_batch_size]) + self.vali_label_placeholder_obj = tf.placeholder(dtype=tf.int32, shape=[FLAGS.validation_batch_size]) + + self.lr_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) + + + + def build_train_validation_graph(self): + ''' + This function builds the train graph and validation graph at the same time. + + ''' + global_step = tf.Variable(0, trainable=False) + validation_step = tf.Variable(0, trainable=False) + + # Logits of training data and valiation data come from the same graph. The inference of + # validation data share all the weights with train data. This is implemented by passing + # reuse=True to the variable scopes of train graph + logits_ges, logits_obj = inference(self.image_placeholder, FLAGS.num_resnext_blocks, reuse=False) + vali_logits_ges, vali_logits_obj = inference(self.vali_image_placeholder, FLAGS.num_resnext_blocks, reuse=True) + + # The following codes calculate the train loss, which is consist of the + # softmax cross entropy and the relularization loss + regu_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) + + + self.loss_ges = self.loss(logits_ges, self.label_placeholder_ges, 'train_ges') + self.loss_obj = self.loss(logits_obj, self.label_placeholder_obj, 'train_obj') + loss = FLAGS.k * self.loss_ges + (1.0-FLAGS.k) * self.loss_obj + self.full_loss = tf.add_n([loss] + regu_losses) + + predictions_obj = tf.nn.softmax(logits_obj) + self.train_top1_error_obj = self.top_k_error(predictions_obj, self.label_placeholder_obj, 1) + predictions_ges = tf.nn.softmax(logits_ges) + self.train_top1_error_ges = self.top_k_error(predictions_ges, self.label_placeholder_ges, 1) + + # Validation loss + self.vali_loss_ges = self.loss(vali_logits_ges, self.vali_label_placeholder_ges, 'val_ges') + self.vali_loss_obj= self.loss(vali_logits_obj, self.vali_label_placeholder_obj, 'val_obj') + self.vali_loss = FLAGS.k * self.vali_loss_ges + (1.0-FLAGS.k) * self.vali_loss_obj + + vali_predictions_obj = tf.nn.softmax(vali_logits_obj) + self.vali_top1_error_obj = self.top_k_error(vali_predictions_obj, self.vali_label_placeholder_obj, 1) + vali_predictions_ges = tf.nn.softmax(vali_logits_ges) + self.vali_top1_error_ges = self.top_k_error(vali_predictions_ges, self.vali_label_placeholder_ges, 1) + + self.train_op, self.train_ema_op = self.train_operation(global_step, self.full_loss, + self.train_top1_error_obj) + self.val_op = self.validation_op(validation_step, self.vali_top1_error_obj, self.vali_loss) + + def train(self): + ''' + This is the main function for training + ''' + + # Build the graph for train and validation + self.build_train_validation_graph() + + # Initialize a saver to save checkpoints. Merge all summaries, so we can run all + # summarizing operations by running summary_op. Initialize a new session + saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.max_to_keep) + summary_op = tf.summary.merge_all() + init = tf.global_variables_initializer() + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + sess = tf.Session(config=config) + + # If you want to load from a checkpoint + if FLAGS.is_use_ckpt is True: + saver.restore(sess, FLAGS.ckpt_path) + print 'Restored from checkpoint...' + starting_step = int(FLAGS.ckpt_path.split('-')[-1]) + print 'Starting step = %d' % starting_step + else: + sess.run(init) + starting_step = 0 + + # This summary writer object helps write summaries on tensorboard + summary_writer = tf.summary.FileWriter(train_dir, sess.graph) + lr_curve_file_name = 'c='+str(FLAGS.cardinality) + '_'\ + 'd='+str(FLAGS.block_unit_depth) + '_'\ + 'n='+str(FLAGS.num_resnext_blocks) + '_'\ + 'lr='+str(FLAGS.init_lr) + '_'\ + 'lrd='+str(FLAGS.lr_decay_factor) + '_'\ + 'wd='+str(FLAGS.weight_decay) + '_'\ + 'k='+str(FLAGS.k) + lr_curve_file_name = FLAGS.version + '_' + lr_curve_file_name + + # These lists are used to save a csv file at last + step_list = [] + train_error_list_ges = [] + train_error_list_obj = [] + val_error_list_ges = [] + val_error_list_obj = [] + + train_loss_list_ges = [] + train_loss_list_obj = [] + val_loss_list_ges = [] + val_loss_list_obj = [] + + # Prepare the validation batch data + print 'Prepare the validation batch data...' + print '----------------------------' + vali_data_list = read_path_and_label('test') # load all paths to validation images into the memory + validation_batch_hand, validation_batch_head, validation_batch_label_ges, validation_batch_label_ges, validation_batch_label_obj = \ + self.generate_data_batch_for_vali(vali_data_list, FLAGS.validation_batch_size) + + print 'Start training...' + print '----------------------------' + + # Define the procedure of tfRecord data -> tensor data + tfrecords_filename = 'training_data.tfrecords' + filename_queue = tf.train.string_input_producer([tfrecords_filename], num_epochs=None) + # Define the procedure of getting a batch of data + # (Even when reading in multiple threads, share the filename queue.) + images_hand, images_head, labels_fa, labels_ges, labels_obj = self.read_and_decode(filename_queue) + + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess=sess, coord=coord) + + for step in xrange(starting_step, FLAGS.train_steps): + + # Get a batch of data. (tensor data -> numpy data) + train_batch_hand, train_batch_label_obj, train_batch_label_ges = sess.run([images_hand, labels_obj, labels_ges]) + + # lr decay + current_lr = float(FLAGS.init_lr) * np.power(float(FLAGS.lr_decay_factor), (float(step) / float(FLAGS.train_steps))) + if step % FLAGS.report_freq == 0: + print 'Learning rate decayed to %.8f'%current_lr + + # Want to validate once before training. You may check the theoretical validation + # loss first + if step % FLAGS.report_freq == 0: + _, validation_error_value_ges, validation_error_value_obj, validation_loss_value, valid_loss_ges, valid_loss_obj = sess.run([self.val_op, self.vali_top1_error_ges, self.vali_top1_error_obj, self.vali_loss, self.vali_loss_ges, self.vali_loss_obj], + {self.image_placeholder: train_batch_hand, # train_batch_data + self.label_placeholder_obj: train_batch_label_obj, # train_batch_labels + self.label_placeholder_ges: train_batch_label_ges, # train_batch_labels + self.vali_image_placeholder: validation_batch_hand, # validation_batch_data + self.vali_label_placeholder_obj: validation_batch_label_obj, # validation_batch_labels + self.vali_label_placeholder_ges: validation_batch_label_ges, # validation_batch_labels + self.lr_placeholder: current_lr}) + + valid_loss_ges = valid_loss_ges * FLAGS.k + valid_loss_obj = valid_loss_obj * (1.0-FLAGS.k) + + val_error_list_ges.append(validation_error_value_ges) + val_error_list_obj.append(validation_error_value_obj) + val_loss_list_ges.append(valid_loss_ges) + val_loss_list_obj.append(valid_loss_obj) + + start_time = time.time() + + _, _, train_loss_value, train_error_value_ges, train_error_value_obj, train_loss_ges, train_loss_obj = sess.run([self.train_op, self.train_ema_op, + self.full_loss, self.train_top1_error_ges, self.train_top1_error_obj, self.loss_ges, self.loss_obj], + {self.image_placeholder: train_batch_hand, # train_batch_data + self.label_placeholder_obj: train_batch_label_obj, # train_batch_labels + self.label_placeholder_ges: train_batch_label_ges, # train_batch_labels + self.vali_image_placeholder: validation_batch_hand, # validation_batch_data + self.vali_label_placeholder_obj: validation_batch_label_obj, # validation_batch_labels + self.vali_label_placeholder_ges: validation_batch_label_ges, # validation_batch_labels + self.lr_placeholder: current_lr}) + train_loss_ges = train_loss_ges * FLAGS.k + train_loss_obj = train_loss_obj * (1.0-FLAGS.k) + + duration = time.time() - start_time + + + if step % FLAGS.report_freq == 0: + summary_str = sess.run(summary_op, {self.image_placeholder: train_batch_hand, # train_batch_data + self.label_placeholder_obj: train_batch_label_obj, # train_batch_labels + self.label_placeholder_ges: train_batch_label_ges, # train_batch_labels + self.vali_image_placeholder: validation_batch_hand, # validation_batch_data + self.vali_label_placeholder_obj: validation_batch_label_obj, # validation_batch_labels + self.vali_label_placeholder_ges: validation_batch_label_ges, # validation_batch_labels + self.lr_placeholder: current_lr}) + summary_writer.add_summary(summary_str, step) + + num_examples_per_step = FLAGS.train_batch_size + examples_per_sec = num_examples_per_step / duration + sec_per_batch = float(duration) + + format_str = ('%s: step %d, total_loss = %.4f, obj_loss = %.4f, ges_loss = %.4f (%.1f examples/sec; %.3f ' 'sec/batch)') + print format_str % (datetime.now(), step, train_loss_value, train_loss_obj, train_loss_ges, examples_per_sec, + sec_per_batch) + print 'Train top1 error-> OBJ: %.2f'%train_error_value_obj, ', GES: %.2f'%train_error_value_ges + print 'Validation top1 error-> OBJ: %.2f'%validation_error_value_obj, ', GES: %.2f'%validation_error_value_ges + print 'Validation total_loss = %.4f, obj_loss = %.4f, ges_loss = %.4f'%(validation_loss_value, valid_loss_obj, valid_loss_ges) + print '----------------------------' + + step_list.append(step) + train_error_list_ges.append(train_error_value_ges) + train_error_list_obj.append(train_error_value_obj) + train_loss_list_ges.append(train_loss_ges) + train_loss_list_obj.append(train_loss_obj) + + # Save checkpoints every FLAGS.save_freq steps + if step % FLAGS.save_freq == 0 or (step + 1) == FLAGS.train_steps: + checkpoint_path = os.path.join(train_dir, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=step) + + df = pd.DataFrame(data={'step':step_list, + 'train_error_obj':train_error_list_obj, + 'validation_error_obj': val_error_list_obj, + 'train_error_ges':train_error_list_ges, + 'validation_error_ges': val_error_list_ges, + 'train_loss_ges':train_loss_list_ges, + 'train_loss_obj':train_loss_list_obj, + 'validation_loss_ges':val_loss_list_ges, + 'validation_loss_obj':val_loss_list_obj}) + df.to_csv(train_dir + lr_curve_file_name + '_error.csv') + coord.request_stop() + coord.join(threads) + return + + + def test(self, test_image_array, REUSE=False): + ''' + This function is used to evaluate the test data. Please finish pre-precessing in advance + :param test_image_array: 4D numpy array with shape [num_test_images, img_height, img_width, + img_depth] + :return: the softmax probability with shape [num_test_images, num_labels] + ''' + num_test_images = len(test_image_array) + num_batches = num_test_images // FLAGS.test_batch_size + remain_images = num_test_images % FLAGS.test_batch_size + print '%i test batches in total...' % num_batches + + # Create the test image and labels placeholders + self.test_image_placeholder = tf.placeholder(dtype=tf.float32, shape=[FLAGS.test_batch_size, + IMG_TEST_HEIGHT, IMG_TEST_WIDTH, IMG_DEPTH]) + + # Build the test graph + logits_ges, logits_obj = inference(self.test_image_placeholder, FLAGS.num_resnext_blocks, reuse=REUSE) + predictions = tf.nn.softmax(logits_obj) + + # Initialize a new session and restore a checkpoint + saver = tf.train.Saver(tf.global_variables()) + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + sess = tf.Session(config=config) + + saver.restore(sess, FLAGS.test_ckpt_path) + print 'Model restored from ', FLAGS.test_ckpt_path + + prediction_array = np.array([]).reshape(-1, NUM_OBJ_CLASS) + # Test by batches + for step in range(num_batches): + if step % 10 == 0: + print '%i batches finished!' %step + offset = step * FLAGS.test_batch_size + test_image_batch = test_image_array[offset:offset+FLAGS.test_batch_size, ...] + + batch_prediction_array = sess.run(predictions, + feed_dict={self.test_image_placeholder: test_image_batch}) + + prediction_array = np.concatenate((prediction_array, batch_prediction_array)) + + # If test_batch_size is not a divisor of num_test_images + if remain_images != 0: + self.test_image_placeholder = tf.placeholder(dtype=tf.float32, shape=[remain_images, + IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH]) + # Build the test graph + logits_ges, logits_obj = inference(self.test_image_placeholder, FLAGS.num_resnext_blocks, reuse=False) + predictions = tf.nn.softmax(logits_obj) + + test_image_batch = test_image_array[-remain_images:, ...] + + batch_prediction_array = sess.run(predictions, feed_dict={ + self.test_image_placeholder: test_image_batch}) + + prediction_array = np.concatenate((prediction_array, batch_prediction_array)) + + return prediction_array + + def loss(self, logits, labels, task): + ''' + Calculate the cross entropy loss given logits and true labels + :param logits: 2D tensor with shape [batch_size, num_labels] + :param labels: 1D tensor with shape [batch_size] + :param task: obj or fa or ges + :return: loss tensor with shape [1] + ''' + labels = tf.cast(labels, tf.int64) + cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, + name='cross_entropy_per_example_' + task) + cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_' + task) + return cross_entropy_mean + + def top_k_error(self, predictions, labels, k): + ''' + Calculate the top-k error + :param predictions: 2D tensor with shape [batch_size, num_labels] + :param labels: 1D tensor with shape [batch_size, 1] + :param k: int + :return: tensor with shape [1] + ''' + batch_size = predictions.get_shape().as_list()[0] + in_top1 = tf.to_float(tf.nn.in_top_k(predictions, labels, k=1)) + num_correct = tf.reduce_sum(in_top1) + return (batch_size - num_correct) / float(batch_size) + + def read_and_decode(self, filename_queue, mode='train'): + ''' + [For queue loading used] + Read and decode tfrecord data + ''' + reader = tf.TFRecordReader() + + _, serialized_example = reader.read(filename_queue) + + features = tf.parse_single_example( + serialized_example, + # Defaults are not specified since both keys are required. + features={ + 'image_hand_raw': tf.FixedLenFeature([], tf.string), + 'image_head_raw': tf.FixedLenFeature([], tf.string), + 'label_fa': tf.FixedLenFeature([], tf.int64), + 'label_ges': tf.FixedLenFeature([], tf.int64), + 'label_obj': tf.FixedLenFeature([], tf.int64) + }) + + # Convert from a scalar string tensor (whose single string has + # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape + # [mnist.IMAGE_PIXELS]. + image_hand = tf.decode_raw(features['image_hand_raw'], tf.uint8) + image_head = tf.decode_raw(features['image_head_raw'], tf.uint8) + + label_fa = tf.cast(features['label_fa'], tf.int32) + label_ges = tf.cast(features['label_ges'], tf.int32) + label_obj = tf.cast(features['label_obj'], tf.int32) + + Image_shape = tf.stack([IMG_TMP_HEIGHT, IMG_TMP_WIDTH, IMG_DEPTH]) + image_hand = tf.reshape(image_hand, Image_shape) + image_head = tf.reshape(image_head, Image_shape) + + img_width = 256 + img_height = 256 + resized_image_hand = tf.image.resize_images(image_hand, + tf.cast([img_height, img_width], tf.int32)) + resized_image_head = tf.image.resize_images(image_head, + tf.cast([img_height, img_width], tf.int32)) + + # Flip an image at 50% possibility and random crop + if mode is 'train': + resized_image_hand = tf.image.random_flip_left_right(resized_image_hand) + resized_image_head = tf.image.random_flip_left_right(resized_image_head) + resized_image_hand = tf.random_crop(resized_image_hand, [IMG_HEIGHT, IMG_WIDTH, 3]) + resized_image_head = tf.random_crop(resized_image_head, [IMG_HEIGHT, IMG_WIDTH, 3]) + elif mode is 'test': + resized_image_hand = tf.image.crop_to_bounding_box(resized_image_hand, 16, 16, 224, 224) + resized_image_head = tf.image.crop_to_bounding_box(resized_image_head, 16, 16, 224, 224) + + # Linearly scales image to have zero mean and unit norm. + resized_image_hand = tf.image.per_image_standardization(resized_image_hand) + resized_image_head = tf.image.per_image_standardization(resized_image_head) + + if mode is 'train': + images_hand, images_head, labels_fa, labels_ges, labels_obj = \ + tf.train.shuffle_batch([resized_image_hand, resized_image_head, label_fa, label_ges, label_obj], + batch_size=FLAGS.train_batch_size, + capacity=100, + num_threads=4, + min_after_dequeue=10) + elif mode is 'test': + images_hand, images_head, labels_fa, labels_ges, labels_obj = \ + tf.train.batch([resized_image_hand, resized_image_head, label_fa, label_ges, label_obj], + batch_size=FLAGS.test_batch_size, + capacity=150, + num_threads=4) + + return images_hand, images_head, labels_fa, labels_ges, labels_obj + + def generate_data_batch(self, data_list, batch_size, mode, offset = 0): + ''' + [For queue loading NOT used] + This function helps generate a batch of train data, and horizontally flip + and whiten them at the same time + :param train_data: 4D numpy array + :param train_labels: 1D numpy array + :param batch_size: int + :param mode: string. Indicate the data_list is for train, valid or test + :return: augmented train batch data and labels. 4D numpy array and 1D numpy array + ''' + if mode is 'test': + batch_data = data_list[offset:offset+batch_size] + else: + offset = np.random.choice(len(data_list) - batch_size, 1)[0] # randomly choosed offset + batch_data = data_list[offset:offset+batch_size] + + batch_path_hand = [ele[0] for ele in batch_data] + batch_path_head = [ele[1] for ele in batch_data] + + batch_hand_imgs = read_in_imgs(batch_path_hand, mode) + batch_head_imgs = read_in_imgs(batch_path_head, mode) + + batch_hand_imgs = whitening_image(batch_hand_imgs, mode) + batch_head_imgs = whitening_image(batch_head_imgs, mode) + + batch_label_fa = [ele[2] for ele in batch_data] + batch_label_ges = [ele[3] for ele in batch_data] + batch_label_obj = [ele[4] for ele in batch_data] + + return batch_hand_imgs, batch_head_imgs, batch_label_fa, batch_label_ges, batch_label_obj + + def generate_data_batch_for_vali(self, data_list, batch_size): + ''' + [For queue loading NOT used] + This function helps generate a batch of validation data, and horizontally flip and whiten them at the same time. + Find (batch_size/24) samples per class for validation. + :param train_data: 4D numpy array + :param train_labels: 1D numpy array + :param batch_size: int + :param mode: string. Indicate the data_list is for train, valid or test + :return: augmented train batch data and labels. 4D numpy array and 1D numpy array + ''' + assert batch_size % 24 == 0 + + batch_data = [] + + one_data = data_list[0] + batch_data.append(one_data) + + j = 1 + while (1): + one_data = data_list[j] + single_label = one_data[4] + batch_label_obj = [ele[4] for ele in batch_data] + if batch_label_obj.count(single_label) < batch_size/24: + batch_data.append(one_data) + if len(batch_data) == batch_size: + break + j += 1 + + batch_path_hand = [ele[0] for ele in batch_data] + batch_path_head = [ele[1] for ele in batch_data] + mode = 'valid' + batch_hand_imgs = read_in_imgs(batch_path_hand, mode) + batch_head_imgs = read_in_imgs(batch_path_head, mode) + + batch_hand_imgs = whitening_image(batch_hand_imgs, mode) + batch_head_imgs = whitening_image(batch_head_imgs, mode) + + batch_label_fa = [ele[2] for ele in batch_data] + batch_label_ges = [ele[3] for ele in batch_data] + batch_label_obj = [ele[4] for ele in batch_data] + + return batch_hand_imgs, batch_head_imgs, batch_label_fa, batch_label_ges, batch_label_obj + + + def train_operation(self, global_step, total_loss, top1_error): + ''' + Defines train operations + :param global_step: tensor variable with shape [1] + :param total_loss: tensor with shape [1] + :param top1_error: tensor with shape [1] + :return: two operations. Running train_op will do optimization once. Running train_ema_op + will generate the moving average of train error and train loss for tensorboard + ''' + # Add train_loss, current learning rate and train error into the tensorboard summary ops + tf.summary.scalar('learning_rate', self.lr_placeholder) + tf.summary.scalar('train_loss', total_loss) + tf.summary.scalar('train_top1_error', top1_error) + + # The ema object help calculate the moving average of train loss and train error + ema = tf.train.ExponentialMovingAverage(FLAGS.train_ema_decay, global_step) + train_ema_op = ema.apply([total_loss, top1_error]) + tf.summary.scalar('train_top1_error_avg', ema.average(top1_error)) + tf.summary.scalar('train_loss_avg', ema.average(total_loss)) + + # opt = tf.train.MomentumOptimizer(learning_rate=self.lr_placeholder, momentum=0.9) + opt = tf.train.AdamOptimizer(learning_rate=self.lr_placeholder) + train_op = opt.minimize(total_loss, global_step=global_step) + return train_op, train_ema_op + + + def validation_op(self, validation_step, top1_error, loss): + ''' + Defines validation operations + :param validation_step: tensor with shape [1] + :param top1_error: tensor with shape [1] + :param loss: tensor with shape [1] + :return: validation operation + ''' + + # This ema object help calculate the moving average of validation loss and error + + # ema with decay = 0.0 won't average things at all. This returns the original error + ema = tf.train.ExponentialMovingAverage(0.0, validation_step) + ema2 = tf.train.ExponentialMovingAverage(0.95, validation_step) + + + val_op = tf.group(validation_step.assign_add(1), ema.apply([top1_error, loss]), + ema2.apply([top1_error, loss])) + top1_error_val = ema.average(top1_error) + top1_error_avg = ema2.average(top1_error) + loss_val = ema.average(loss) + loss_val_avg = ema2.average(loss) + + # Summarize these values on tensorboard + tf.summary.scalar('val_top1_error', top1_error_val) + tf.summary.scalar('val_top1_error_avg', top1_error_avg) + tf.summary.scalar('val_loss', loss_val) + tf.summary.scalar('val_loss_avg', loss_val_avg) + return val_op + + def testing(self): + # Read testing data + BUFFER_SIZE = 100 + test_data_list = read_path_and_label('test') + print 'Prepare the testing batch data...' + print '----------------------------' + + NUMBER_OF_BUFFER = len(test_data_list) / BUFFER_SIZE + REMINDER = len(test_data_list) % BUFFER_SIZE + + prediction_array = np.array([]).reshape(-1, NUM_OBJ_CLASS) + label_array = np.array([]).reshape(-1) + + reuse = False + cnt = 0.0 + for i in xrange(NUMBER_OF_BUFFER+1): + if i == NUMBER_OF_BUFFER: + if REMINDER == 0: + break + else: + offset = NUMBER_OF_BUFFER * BUFFER_SIZE + batch_size = REMINDER + else: + offset = i * BUFFER_SIZE + batch_size = BUFFER_SIZE + + if i>0: + reuse = True + + print 'current step:%d, total step:%d'%(i, NUMBER_OF_BUFFER) + + test_batch_hand, _, _, _, test_batch_label_obj = \ + self.generate_data_batch(test_data_list, batch_size, 'test', offset) + + print 'batch is ready' + # Start the testing session + prob_map = self.test(test_batch_hand, reuse) + prediction = np.argmax(prob_map, axis=1) + for pred, label in zip(prediction, test_batch_label_obj): + if int(pred)==int(label): + cnt = cnt + 1.0 + print float(cnt) / float(BUFFER_SIZE) / float(i+1) + prediction_array = np.concatenate((prediction_array, batch_prediction_array)) + label_array = np.concatenate((label_array, test_batch_label_obj)) + + np.save('map', prediction_array) + np.save('label', label_array) + + accuracy = float(cnt) / float(len(test_data_list)) + return accuracy + + def parallel_testing(self): + ''' + This function is used to evaluate the test data. + ''' + + # Read testing data + test_data_list = read_path_and_label('test') + print 'Prepare the testing batch data...' + print '----------------------------' + + NUMBER_OF_BATCH = len(test_data_list) / FLAGS.test_batch_size + REMINDER = len(test_data_list) % FLAGS.test_batch_size + print '%i test batches in total...' % NUMBER_OF_BATCH + + # Create the test image and labels placeholders + self.test_image_placeholder = tf.placeholder(dtype=tf.float32, shape=[FLAGS.test_batch_size, + IMG_TEST_HEIGHT, IMG_TEST_WIDTH, IMG_DEPTH]) + # Build the test graph + _, logits_obj = inference(self.test_image_placeholder, FLAGS.num_resnext_blocks, reuse=False) + predictions = tf.nn.softmax(logits_obj) + + # Initialize a new session and restore a checkpoint + saver = tf.train.Saver(tf.global_variables()) + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + sess = tf.Session(config=config) + + saver.restore(sess, FLAGS.test_ckpt_path) + print 'Model restored from ', FLAGS.test_ckpt_path + + # Define the procedure of tfRecord data -> tensor data + tfrecords_filename = 'testing_data.tfrecords' + filename_queue = tf.train.string_input_producer([tfrecords_filename], num_epochs=None) + # Define the procedure of getting a batch of data + # (Even when reading in multiple threads, share the filename queue.) + images_hand, images_head, labels_fa, labels_ges, labels_obj = self.read_and_decode(filename_queue, 'test') + + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess=sess, coord=coord) + + prediction_array = np.array([]).reshape(-1, NUM_OBJ_CLASS) + label_array = np.array([]).reshape(-1) + + # Test by batches + for step in range(NUMBER_OF_BATCH): + if step % 10 == 0: + print '%i batches finished!' %step + test_batch_hand, test_batch_label_obj = sess.run([images_hand, labels_obj]) + test_batch_label_obj = np.reshape(test_batch_label_obj, [FLAGS.test_batch_size]) + + batch_prediction_array = sess.run(predictions, + feed_dict={self.test_image_placeholder: test_batch_hand}) + + prediction_array = np.concatenate((prediction_array, batch_prediction_array)) + label_array = np.concatenate((label_array, test_batch_label_obj)) + + coord.request_stop() + coord.join(threads) + + np.save('map', prediction_array) + np.save('label', label_array) + + prediction_array = np.argmax(prediction_array, axis=1) + cnt = 0.0 + for pred, label in zip(prediction_array, label_array): + if int(pred)==int(label): + cnt = cnt + 1.0 + accuracy = float(cnt) / float(len(prediction_array)) + return accuracy + +# Initialize the Train object +train = Train() + +print 'MODE: ' + FLAGS.mode +'ing' + +if FLAGS.mode == 'test': + # print train.parallel_testing() # if testing tfRecord is avalable + print train.testing() # if is not avalable +else: + # Start the training session + train.train() + diff --git a/plotPRcurve.py b/plotPRcurve.py new file mode 100644 index 0000000..95bc007 --- /dev/null +++ b/plotPRcurve.py @@ -0,0 +1,76 @@ +from sklearn.metrics import precision_recall_curve +from sklearn.metrics import average_precision_score +from itertools import cycle +import matplotlib +matplotlib.rcParams['backend'] = "Qt4Agg" # to enable the plt.show() +import matplotlib.pyplot as plt +import numpy as np + + +n_classes = 24 + +x = np.load('map.npy') +y_tmp = np.load('label.npy') +y = np.zeros(x.shape, dtype=float) + +# convert to one-hot vector +for i in range(y.shape[0]): + idx = y_tmp[i] + idx = int(idx) + y[i, idx] = 1.0 + +# For each class +precision = dict() +recall = dict() +average_precision = dict() +for i in range(n_classes): + precision[i], recall[i], _ = precision_recall_curve(y[:, i], + x[:, i]) + average_precision[i] = average_precision_score(y[:, i], x[:, i]) + +# A "micro-average": quantifying score on all classes jointly +precision["micro"], recall["micro"], _ = precision_recall_curve(y.ravel(), + x.ravel()) +average_precision["micro"] = average_precision_score(y, x, + average="micro") +print('Average precision score, micro-averaged over all classes: {0:0.2f}' + .format(average_precision["micro"])) + + +# setup plot details +colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal']) + +plt.figure(figsize=(8, 7)) +f_scores = np.linspace(0.2, 0.8, num=4) +lines = [] +labels = [] +for f_score in f_scores: + x = np.linspace(0.01, 1) + y = f_score * x / (2 * x - f_score) + l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2) + plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02)) + +lines.append(l) +labels.append('iso-f1 curves') +l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2) +lines.append(l) +labels.append('micro-average Precision-recall (area = {0:0.2f})' + ''.format(average_precision["micro"])) + +for i, color in zip(range(n_classes), colors): + l, = plt.plot(recall[i], precision[i], color=color, lw=2) + lines.append(l) + labels.append('Precision-recall for class {0} (area = {1:0.2f})' + ''.format(i, average_precision[i])) + +fig = plt.gcf() +fig.subplots_adjust(bottom=0.25) +plt.xlim([0.0, 1.0]) +plt.ylim([0.0, 1.05]) +plt.xlabel('Recall') +plt.ylabel('Precision') +plt.title('Extension of Precision-Recall curve to multi-class') +plt.legend(lines, labels, loc=(1.0, 0.3), prop=dict(size=7)) + + +plt.show() \ No newline at end of file diff --git a/resNeXt.py b/resNeXt.py new file mode 100644 index 0000000..fe14c48 --- /dev/null +++ b/resNeXt.py @@ -0,0 +1,316 @@ +# Coder: Wenxin Xu +# Source paper: https://arxiv.org/abs/1611.05431 +# Github: https://github.com/wenxinxu/ResNeXt-in-tensorflow +# ============================================================================== +''' +This is main body of the ResNext structure +''' + +import numpy as np +from hyper_parameters import * +from data_input import IMG_WIDTH, IMG_HEIGHT, NUM_FA_CLASS, NUM_GES_CLASS, NUM_OBJ_CLASS + +BN_EPSILON = 0.001 + +def activation_summary(x): + ''' + Add histogram and sparsity summaries of a tensor to tensorboard + :param x: A Tensor + :return: Add histogram summary and scalar summary of the sparsity of the tensor + ''' + tensor_name = x.op.name + tf.summary.histogram(tensor_name + '/activations', x) + tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) + + +def create_variables(name, shape, initializer=tf.contrib.layers.xavier_initializer(), is_fc_layer=False): + ''' + Create a variable with tf.get_variable() + :param name: A string. The name of the new variable + :param shape: A list of dimensions + :param initializer: User Xavier as default. + :param is_fc_layer: Want to create fc layer variable? May use different weight_decay for fc + layers. + :return: The created variable + ''' + + ## TODO: to allow different weight decay to fully connected layer and conv layer + if is_fc_layer is True: + regularizer = tf.contrib.layers.l2_regularizer(scale=FLAGS.weight_decay) + else: + regularizer = tf.contrib.layers.l2_regularizer(scale=FLAGS.weight_decay) + + new_variables = tf.get_variable(name=name, shape=shape, initializer=initializer, + regularizer=regularizer) + return new_variables + + +def fc_layer(input_layer, num_labels, NAME): + ''' + Generate the output layer + :param input_layer: 2D tensor + :param num_labels: int. How many output labels in total? (10 for cifar10 and 100 for cifar100) + :return: output layer Y = WX + B + ''' + input_dim = input_layer.get_shape().as_list()[-1] + fc_w = create_variables(name=NAME + '_weights', shape=[input_dim, num_labels], is_fc_layer=True, + initializer=tf.uniform_unit_scaling_initializer(factor=1.0)) + fc_b = create_variables(name=NAME + '_bias', shape=[num_labels], initializer=tf.zeros_initializer()) + + fc_h = tf.matmul(input_layer, fc_w) + fc_b + return fc_h + + +def batch_normalization_layer(input_layer, dimension): + ''' + Helper function to do batch normalziation + :param input_layer: 4D tensor + :param dimension: input_layer.get_shape().as_list()[-1]. The depth of the 4D tensor + :return: the 4D tensor after being normalized + ''' + mean, variance = tf.nn.moments(input_layer, axes=[0, 1, 2]) + beta = tf.get_variable('beta', dimension, tf.float32, + initializer=tf.constant_initializer(0.0, tf.float32)) + gamma = tf.get_variable('gamma', dimension, tf.float32, + initializer=tf.constant_initializer(1.0, tf.float32)) + bn_layer = tf.nn.batch_normalization(input_layer, mean, variance, beta, gamma, BN_EPSILON) + + return bn_layer + + +def conv_bn_relu_layer(input_layer, filter_shape, stride, relu=True): + ''' + A helper function to conv, batch normalize and relu the input tensor sequentially + :param input_layer: 4D tensor + :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number] + :param stride: stride size for conv + :param relu: boolean. Relu after BN? + :return: 4D tensor. Y = Relu(batch_normalize(conv(X))) + ''' + + out_channel = filter_shape[-1] + filter = create_variables(name='conv', shape=filter_shape) + + conv_layer = tf.nn.conv2d(input_layer, filter, strides=[1, stride, stride, 1], padding='SAME') + bn_layer = batch_normalization_layer(conv_layer, out_channel) + + if relu is True: + output = tf.nn.relu(bn_layer) + else: + output = bn_layer + return output + + +def split(input_layer, stride): + ''' + The split structure in Figure 3b of the paper. It takes an input tensor. Conv it by [1, 1, + 64] filter, and then conv the result by [3, 3, 64]. Return the + final resulted tensor, which is in shape of [batch_size, input_height, input_width, 64] + + :param input_layer: 4D tensor in shape of [batch_size, input_height, input_width, + input_channel] + :param stride: int. 1 or 2. If want to shrink the image size, then stride = 2 + :return: 4D tensor in shape of [batch_size, input_height, input_width, input_channel/64] + ''' + + input_channel = input_layer.get_shape().as_list()[-1] + num_filter = FLAGS.block_unit_depth + # according to Figure 7, they used 64 as # filters for all cifar10 task + + with tf.variable_scope('bneck_reduce_size'): + conv = conv_bn_relu_layer(input_layer, filter_shape=[1, 1, input_channel, num_filter], + stride=stride) + with tf.variable_scope('bneck_conv'): + conv = conv_bn_relu_layer(conv, filter_shape=[3, 3, num_filter, num_filter], stride=1) + + return conv + + +def bottleneck_b(input_layer, stride): + ''' + The bottleneck strucutre in Figure 3b. Concatenates all the splits + :param input_layer: 4D tensor in shape of [batch_size, input_height, input_width, + input_channel] + :param stride: int. 1 or 2. If want to shrink the image size, then stride = 2 + :return: 4D tensor in shape of [batch_size, output_height, output_width, output_channel] + ''' + split_list = [] + for i in range(FLAGS.cardinality): + with tf.variable_scope('split_%i'%i): + splits = split(input_layer=input_layer, stride=stride) + split_list.append(splits) + + # Concatenate splits and check the dimension + concat_bottleneck = tf.concat(values=split_list, axis=3, name='concat') + + return concat_bottleneck + + +def bottleneck_c1(input_layer, stride): + ''' + The bottleneck strucutre in Figure 3c. Grouped convolutions + :param input_layer: 4D tensor in shape of [batch_size, input_height, input_width, + input_channel] + :param stride: int. 1 or 2. If want to shrink the image size, then stride = 2 + :return: 4D tensor in shape of [batch_size, output_height, output_width, output_channel] + ''' + input_channel = input_layer.get_shape().as_list()[-1] + bottleneck_depth = FLAGS.block_unit_depth + with tf.variable_scope('bottleneck_c_l1'): + l1 = conv_bn_relu_layer(input_layer=input_layer, + filter_shape=[1, 1, input_channel, bottleneck_depth], + stride=stride) + with tf.variable_scope('group_conv'): + filter = create_variables(name='depthwise_filter', shape=[3, 3, bottleneck_depth, FLAGS.cardinality]) + l2 = tf.nn.depthwise_conv2d(input=l1, + filter=filter, + strides=[1, 1, 1, 1], + padding='SAME') + return l2 + + +def bottleneck_c(input_layer, stride): + ''' + The bottleneck strucutre in Figure 3c. Grouped convolutions + :param input_layer: 4D tensor in shape of [batch_size, input_height, input_width, + input_channel] + :param stride: int. 1 or 2. If want to shrink the image size, then stride = 2 + :return: 4D tensor in shape of [batch_size, output_height, output_width, output_channel] + ''' + input_channel = input_layer.get_shape().as_list()[-1] + bottleneck_depth = FLAGS.block_unit_depth * FLAGS.cardinality + with tf.variable_scope('bottleneck_c_l1'): + l1 = conv_bn_relu_layer(input_layer=input_layer, + filter_shape=[1, 1, input_channel, bottleneck_depth], + stride=stride) + with tf.variable_scope('group_conv'): + filter = create_variables(name='depthwise_filter', shape=[3, 3, bottleneck_depth, FLAGS.cardinality]) + l2 = conv_bn_relu_layer(input_layer=l1, + filter_shape=[3, 3, bottleneck_depth, bottleneck_depth], + stride=1) + return l2 + + +def resnext_block(input_layer, output_channel): + ''' + The block structure in Figure 3b. Takes a 4D tensor as input layer and splits, concatenates + the tensor and restores the depth. Finally adds the identity and ReLu. + :param input_layer: 4D tensor in shape of [batch_size, input_height, input_width, + input_channel] + :param output_channel: int, the number of channels of the output + :return: 4D tensor in shape of [batch_size, output_height, output_width, output_channel] + ''' + input_channel = input_layer.get_shape().as_list()[-1] + + # When it's time to "shrink" the image size, we use stride = 2 + if input_channel * 2 == output_channel: + increase_dim = True + stride = 2 + elif input_channel == output_channel: + increase_dim = False + stride = 1 + else: + raise ValueError('Output and input channel does not match in residual blocks!!!') + + if FLAGS.bottleneck_implementation == 'b': + concat_bottleneck = bottleneck_b(input_layer, stride) + else: + assert FLAGS.bottleneck_implementation == 'c' + concat_bottleneck = bottleneck_c(input_layer, stride) + + bottleneck_depth = concat_bottleneck.get_shape().as_list()[-1] + assert bottleneck_depth == FLAGS.block_unit_depth * FLAGS.cardinality + + # Restore the dimension. Without relu here + restore = conv_bn_relu_layer(input_layer=concat_bottleneck, + filter_shape=[1, 1, bottleneck_depth, output_channel], + stride=1, relu=False) + + # When the channels of input layer and conv2 does not match, we add zero pads to increase the + # depth of input layers + if increase_dim is True: + pooled_input = tf.nn.avg_pool(input_layer, ksize=[1, 2, 2, 1], + strides=[1, 2, 2, 1], padding='VALID') + padded_input = tf.pad(pooled_input, [[0, 0], [0, 0], [0, 0], [input_channel // 2, + input_channel // 2]]) + else: + padded_input = input_layer + + # According to section 4 of the paper, relu is played after adding the identity. + output = tf.nn.relu(restore + padded_input) + + return output + + +def inference(input_tensor_batch, n, reuse): + ''' + The main function that defines the ResNeXt. total layers = 1 + 3n + 3n + 3n +1 = 9n + 2 + :param input_tensor_batch: 4D tensor + :param n: num_resnext_blocks. The paper used n=3, 29 layers as demo + :param reuse: To build train graph, reuse=False. To build validation graph and share weights + with train graph, resue=True + :return: last layer in the network. Not softmax-ed + ''' + layers = [] + with tf.variable_scope('conv0', reuse=reuse): + conv0 = conv_bn_relu_layer(input_tensor_batch, [3, 3, 3, 64], 1) + activation_summary(conv0) + layers.append(conv0) + + for i in range(n): + with tf.variable_scope('conv1_%d' %i, reuse=reuse): + conv1 = resnext_block(layers[-1], 64) + activation_summary(conv1) + layers.append(conv1) + + for i in range(n): + with tf.variable_scope('conv2_%d' %i, reuse=reuse): + conv2 = resnext_block(layers[-1], 128) + activation_summary(conv2) + layers.append(conv2) + + for i in range(n): + with tf.variable_scope('conv3_%d' %i, reuse=reuse): + conv3 = resnext_block(layers[-1], 256) + layers.append(conv3) + # Insure that the shape is as expected + # assert conv3.get_shape().as_list()[1:] == [8, 8, 256] + # assert conv3.get_shape().as_list()[1:] == [IMG_HEIGHT/4, IMG_WIDTH/4, 256] + + with tf.variable_scope('fc', reuse=reuse): + ''' + 256 -> FLAGS.num_fc_units -> NUM_GES_CLASS + -> FLAGS.num_fc_units -> NUM_OBJ_CLASS + ''' + global_pool = tf.reduce_mean(layers[-1], [1, 2]) + assert global_pool.get_shape().as_list()[-1:] == [256] + + fc_obj_layer = fc_layer(global_pool, FLAGS.num_fc_units, NAME='fc1_obj') + fc_obj_layer = tf.nn.relu(fc_obj_layer) + layers.append(fc_obj_layer) + output_obj = fc_layer(fc_obj_layer, NUM_OBJ_CLASS, NAME='fc2_obj') + + fc_ges_layer = fc_layer(global_pool, FLAGS.num_fc_units, NAME='fc1_ges') + fc_ges_layer = tf.nn.relu(fc_ges_layer) + layers.append(fc_ges_layer) + output_ges = fc_layer(fc_ges_layer, NUM_GES_CLASS, NAME='fc2_ges') + + layers.append(output_obj) + layers.append(output_ges) + + return layers[-1], layers[-2] + + +def test_graph(train_dir='logs'): + ''' + Run this function to look at the graph structure on tensorboard. A fast way! + :param train_dir: + ''' + input_tensor = tf.constant(np.ones([128, 32, 32, 3]), dtype=tf.float32) + result = inference(input_tensor, FLAGS.num_resnext_blocks, reuse=False) + init = tf.initialize_all_variables() + sess = tf.Session() + sess.run(init) + summary_writer = tf.train.SummaryWriter(train_dir, sess.graph) + +# test_graph() \ No newline at end of file diff --git a/results/arch.png b/results/arch.png new file mode 100644 index 0000000..fb3764b Binary files /dev/null and b/results/arch.png differ diff --git a/results/figure_1-1.png b/results/figure_1-1.png new file mode 100644 index 0000000..7a5d46c Binary files /dev/null and b/results/figure_1-1.png differ diff --git a/results/figure_1-2.png b/results/figure_1-2.png new file mode 100644 index 0000000..81c28b3 Binary files /dev/null and b/results/figure_1-2.png differ diff --git a/results/index.md b/results/index.md index 96ce61c..5df0775 100644 --- a/results/index.md +++ b/results/index.md @@ -1,47 +1,74 @@ -# Your Name (id) - -#Project 5: Deep Classification +# CEDL2017 HW1 Report: Deep Classification (105061585) ## Overview -The project is related to -> quote - +The project is related to classification using Tensorflow and modified from the code of [wenxinxu/ResNeXt-in-tensorflow](https://github.com/wenxinxu/ResNeXt-in-tensorflow) +- `main.py`: Do training or testing. +- `data_input.py`: Processing data io. +- `hyper_parameters.py`: Set up the hyper parameters. +- `resNeXt.py`: Define the [ResNext](https://arxiv.org/pdf/1611.05431.pdf) model. +- `plotPRcurve.py`: Plot the precision and recall curve. ## Implementation -1. One - * item - * item -2. Two +### Model Architectures +This project use the [ResNext](https://arxiv.org/pdf/1611.05431.pdf) as the backbone model. +And appends fc layers to do classification. The input is only hand image. + +![](./arch.png) +The full model is trained from scratch. +### Loss +Jointly minimize the sum of cross-entroy losses of the 2 taskes. +We choose one of FA or ges to act as auxiliary loss. ``` -Code highlights +loss = k * loss_fa_or_ges + (1-k) * loss_obj ``` +### Other detail +* Random left-right fliping the image at 50% probability while training +* Random crop to size of W*H +* Image whitening: Linearly scales image to have zero mean and unit norm. + ## Installation -* Other required packages. -* How to compile from source? +* [pandas](http://pandas.pydata.org/) +* [scikit-image](http://scikit-image.org/docs/dev/install.html) -### Results +1. Download this project code. +2. Download [dataset](https://drive.google.com/drive/folders/0BwCy2boZhfdBdXdFWnEtNWJYRzQ)(`frames/` and `labels/`) and place them to this project folder `dataset/`. +3. Convert the training data to tfRecord format to speed up the training (cost 46 GB space). `python data_input` +4. For training from script: `python main.py --mode=train --version='model_1'` The training logs, checkpoints, and error.csv file will be saved in the folder with name logs_$version +4. For continuing training: +* replace the **is_use_ckpt** FLAGS in `hyper_parameters.py` to **True** +* and replace the **ckpt_path** FLAGS in `hyper_parameters.py` with your path to the ckpt (ex: 'logs_oh,mfc_c=2_d=64_n=2_lr=0.1_lrd=0.0004_wd=0.0007_k=0.5/model.ckpt-39800') +* then `python main.py --mode=train` +4. For testing: +* replace the **test_ckpt_path** FLAGS in `hyper_parameters.py` with your path to the ckpt (ex: 'logs_onlyhand_c=3_b=15/model.ckpt-39999') +* then run `python main.py --mode=test` +5. Plot the PR curve: after run the testing, run `python plotPRcurve.py` - - - - - - - +### Results +- c: cardinality +- d: depth +- n: number of resblock in a conv. stage +- lr: learning rate +- lrd: decay rate of learning rate +- wd: weight decay rate (L2) +- k: loss = k * loss_fa_or_ges + (1-k) * loss_obj +- H: image height +- W: image width +- m: # neurons in fc layer +- batch size: 15 +- epoch: ~ 20 -
- - - - -
- - - - -
+| Label | Hyper-para | Accuracy | +|-------|----------|----------| +|obj + FA| `W=192_H=108_m=128_c=4_d=32_n=3_lr=0.05_lrd=0.004_wd=0.0007_k=0.5`| 0.512 | +|obj + FA| `W=192_H=108_m=128_c=2_d=64_n=2_lr=0.05_lrd=0.0004_wd=0.0007_k=0.5`| 0.529 | +|obj + ges| `W=192_H=108_m=128_c=4_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.5`| 0.548 | +|obj + ges| `W=224_H=224_m=256_c=3_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.4`| 0.562 | +obj + ges W=192_H=108_m_128_c=4_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.5: +![](./figure_1-1.png) +obj + ges W=224_H=224_m=256_c=3_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.4: +![](./figure_1-2.png)