CEDL2017 · x7177214 · Sep 28, 2017 · Oct 1, 2017 · Oct 12, 2017 · Oct 12, 2017
diff --git a/data_input.py b/data_input.py
@@ -0,0 +1,214 @@
+# Coder: Wenxin Xu
+# Github: https://github.com/wenxinxu/ResNeXt-in-tensorflow
+# ==============================================================================
+import tarfile
+from six.moves import urllib
+import sys
+import numpy as np
+import cPickle
+import os
+from os import listdir
+from os.path import isfile, join
+import random
+import skimage.io as io
+import skimage.transform 
+import tensorflow as tf
+
+IMG_RAW_WIDTH = 1920
+IMG_RAW_HEIGHT = 1080
+
+IMG_TMP_WIDTH = IMG_RAW_WIDTH / 2 # temporal size saved in tfrecord
+IMG_TMP_HEIGHT = IMG_RAW_HEIGHT / 2
+
+IMG_TEST_WIDTH = 224
+IMG_TEST_HEIGHT = 224
+
+IMG_WIDTH = 224
+IMG_HEIGHT = 224
+
+IMG_DEPTH = 3
+
+NUM_FA_CLASS = 2
+NUM_GES_CLASS = 13
+NUM_OBJ_CLASS = 24
+
+TRAIN_EPOCH_SIZE = 14992
+TEST_EPOCH_SIZE = 12776
+
+def whitening_image(image_np, mode='test'):
+    '''
+    Performs per_image_whitening
+    :param image_np: a 4D numpy array representing a batch of images
+    :return: the image numpy array after whitened
+    '''
+    for i in range(len(image_np)):
+        mean = np.mean(image_np[i, ...])
+        # Use adjusted standard deviation here, in case the std == 0.
+        if mode is 'test':
+            std = np.max([np.std(image_np[i, ...]), 1.0/np.sqrt(IMG_TEST_HEIGHT * IMG_TEST_WIDTH * IMG_DEPTH)])
+        else:
+            std = np.max([np.std(image_np[i, ...]), 1.0/np.sqrt(IMG_HEIGHT * IMG_WIDTH * IMG_DEPTH)])
+        image_np[i,...] = (image_np[i, ...] - mean) / std
+    return image_np
+
+def read_path_and_label(train_or_test_folder):
+    '''
+    input: 'train' or 'test'. Specify which folder want to read
+    output: (string, string, float, float, float)
+            [(hand_path, head_path, FA_label, ges_label, obj_label),
+             (hand_path, head_path, FA_label, ges_label, obj_label),
+             ...
+             (hand_path, head_path, FA_label, ges_label, obj_label)]
+    '''
+    def find_num_files(location, cur_folder_idx):
+        '''
+        location: 'house', 'lab', 'office'
+        cur_folder_idx: current folder index
+        train_or_test_folder: choose train or test folder
+        '''
+
+        current_path = root_path + '/' + location + '/' + cur_folder_idx + '/Lhand/'
+        num_files = len([f for f in listdir(current_path) if isfile(join(current_path, f))])
+
+        return num_files
+
+    def read_labels(location, cur_folder_idx, left_or_right, offset):
+        '''
+        location: 'house', 'lab', 'office'
+        cur_folder_idx: current folder index
+        left_or_right: left or right hand
+        offset: the offset of cur_folder_idx
+        '''
+
+        # root_path = '/Disk2/cedl/handcam/labels' # @ AI
+        root_path = './dataset/labels' # @ my PC
+
+        current_path = root_path + '/' + location + '/'
+        post_fix = left_or_right + str(offset + cur_folder_idx) + '.npy'
+
+        label_fa = np.load(current_path + 'FA_' + post_fix)
+        label_ges = np.load(current_path + 'ges_' + post_fix)
+        label_obj = np.load(current_path + 'obj_' + post_fix)
+
+        return label_fa, label_ges, label_obj
+
+
+    location_list = ['house', 'lab', 'office']
+    num_folders_per_location = [3, 4, 3]
+    hand_list = [('Lhand', 'left'), ('Rhand', 'right')]
+
+    imgs_hand_path_list = []
+    imgs_head_path_list = []
+    labels_fa = []
+    labels_ges = []
+    labels_obj = []
+
+
+    # root_path = '/Disk2/cedl/handcam/frames/' + train_or_test_folder # @ AI
+    root_path = './dataset/frames/' + train_or_test_folder # @ my PC
+
+    for location, num_folders in zip(location_list, num_folders_per_location):
+        for i in xrange(num_folders):
+            num_files = find_num_files(location, str(i+1))
+            for which_hand, L_or_R in hand_list:
+                for j in xrange(num_files):
+                    # hand
+                    current_path = root_path + '/' + location + '/' + str(i+1) + '/' + which_hand + '/' 
+                    imgs_hand_path_list.extend([current_path + 'Image' + str(j+1) + '.png'])
+                    # head
+                    current_path = root_path + '/' + location + '/' + str(i+1) + '/head/'
+                    imgs_head_path_list.extend([current_path + 'Image' + str(j+1) + '.png'])
+                # Labels
+                # offset: label file idx. 
+                # 0 for training data; num_folders_per_location for testing data
+                if train_or_test_folder is 'train':
+                    offset = 0
+                elif train_or_test_folder is 'test':
+                    offset = num_folders
+                label_fa, label_ges, label_obj = read_labels(location, i+1, L_or_R, offset) 
+                labels_fa.extend(label_fa)
+                labels_ges.extend(label_ges)
+                labels_obj.extend(label_obj)
+
+    example = zip(imgs_hand_path_list, imgs_head_path_list, labels_fa, labels_ges, labels_obj)
+    example = random.sample(example, len(example)) # shuffle the list
+
+    return example
+
+def read_in_imgs(imgs_path_list, mode):
+    """
+    This function reads all training or validation data, and returns the
+    images as numpy arrays
+    :param address_list: a list of paths of image files
+    :return: concatenated numpy array of data. Data are in 4D arrays: [num_images,
+    image_height, image_width, image_depth]
+    """
+
+    if mode is 'test':
+        height = IMG_TEST_HEIGHT
+        width = IMG_TEST_WIDTH
+    else: # for valid or train
+        height = IMG_HEIGHT
+        width = IMG_WIDTH
+
+    images = np.array([]).reshape([0, height, width, IMG_DEPTH])
+
+    for imgs_path in imgs_path_list:
+        img = io.imread(imgs_path) 
+        img = skimage.transform.resize(img, [height, width], order=3, mode='reflect')
+        if mode is 'train':
+            img = horizontal_flip(image=img, axis=1) # 50% chance to flip the image when training
+        img = np.reshape(img, [1, height, width, IMG_DEPTH])
+        # Concatenate along axis 0 by default
+        images = np.concatenate((images, img))
+
+    return images
+
+
+def tfrecords_maker(example, file_name = 'training'):
+
+    def _bytes_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    def _int64_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    tfrecords_filename = file_name + '_data.tfrecords'
+
+    writer = tf.python_io.TFRecordWriter(tfrecords_filename)
+
+    i = 0
+    for img_hand_path, img_head_path, label_fa, label_ges, label_obj in example:
+
+        img_hand = np.array(io.imread(img_hand_path)) 
+        img_head = np.array(io.imread(img_head_path))
+
+        # half the image size to save storage
+        img_hand = skimage.transform.resize(img_hand, [IMG_TMP_HEIGHT, IMG_TMP_WIDTH], order=3, mode='reflect')
+        img_head = skimage.transform.resize(img_head, [IMG_TMP_HEIGHT, IMG_TMP_WIDTH], order=3, mode='reflect')
+
+        img_hand = img_hand * 255.0
+        img_head = img_head * 255.0
+        img_hand = img_hand.astype(np.uint8)
+        img_head = img_head.astype(np.uint8)
+
+        image_hand_raw = img_hand.tostring()
+        image_head_raw = img_head.tostring()
+
+        _example = tf.train.Example(features=tf.train.Features(feature={
+            'image_hand_raw': _bytes_feature(image_hand_raw),
+            'image_head_raw': _bytes_feature(image_head_raw),
+            'label_fa': _int64_feature(int(label_fa)),
+            'label_ges': _int64_feature(int(label_ges)),
+            'label_obj': _int64_feature(int(label_obj))}))
+
+        writer.write(_example.SerializeToString())
+        i = i + 1
+        if i % 50 ==0:
+            print '%d / %d' % (i, TRAIN_EPOCH_SIZE)
+    writer.close()
+
+if __name__ == '__main__':
+    # To save the training data to tfrecord format
+    train_data_list = read_path_and_label('train')
+    tfrecords_maker(train_data_list, 'training')
diff --git a/dataset/placeholder b/dataset/placeholder
@@ -0,0 +1 @@
+
diff --git a/hyper_parameters.py b/hyper_parameters.py
@@ -0,0 +1,83 @@
+# Coder: Wenxin Xu
+# Github: https://github.com/wenxinxu/ResNeXt-in-tensorflow
+# ==============================================================================
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string('mode', 'test', '''Specify that the main code is for training or testing''')
+
+## The following flags define hyper-parameters that specifically characterize ResNeXt
+tf.app.flags.DEFINE_integer('cardinality', 3, '''Cadinality, number of paths in each block''')
+tf.app.flags.DEFINE_integer('block_unit_depth', 32, '''the depth(# filters) of each split. 64 for cifar10
+in Figure 7 of the paper''')
+tf.app.flags.DEFINE_integer('num_fc_units', 256, '''Number of neurons in the fc layer''')
+tf.app.flags.DEFINE_string('bottleneck_implementation', 'b', '''To use Figure 3b or 3c to
+implement''')
+
+
+## The following flags are related to save paths, tensorboard outputs and screen outputs
+
+tf.app.flags.DEFINE_string('version', 'GG123', '''A version number defining the directory to
+save
+logs and checkpoints''')
+tf.app.flags.DEFINE_integer('report_freq', 200, '''Steps takes to output errors on the screen
+and write summaries''')
+tf.app.flags.DEFINE_integer('save_freq', 200, '''Steps takes to save the current ckpt''')
+tf.app.flags.DEFINE_integer('max_to_keep', 400, '''Max # ckpt to keep''')
+tf.app.flags.DEFINE_float('train_ema_decay', 0.95, '''The decay factor of the train error's
+moving average shown on tensorboard''')
+
+## The following flags define hyper-parameters regards training
+
+tf.app.flags.DEFINE_integer('train_steps', 80000, '''Total steps that you want to train''')
+tf.app.flags.DEFINE_boolean('is_full_validation', False, '''Validation w/ full validation set or
+a random batch''')
+tf.app.flags.DEFINE_integer('train_batch_size', 10, '''Train batch size''')
+tf.app.flags.DEFINE_integer('validation_batch_size', 72, '''Validation batch size, must be multiplier of 24 ''')
+tf.app.flags.DEFINE_integer('test_batch_size', 20, '''Test batch size''')
+
+
+# tf.app.flags.DEFINE_float('init_lr', 0.001, '''Initial learning rate''')
+# tf.app.flags.DEFINE_float('lr_decay_factor', 0.001, '''How much to decay the learning rate each
+# time''')
+tf.app.flags.DEFINE_float('k', 0.4, '''k * loss_ges + (1-k) * loss_obj''')
+tf.app.flags.DEFINE_float('init_lr', 0.001, '''Initial learning rate''')
+tf.app.flags.DEFINE_float('lr_decay_factor', 1, '''How much to decay the learning rate each
+time''')
+
+## The following flags define hyper-parameters modifying the training network
+tf.app.flags.DEFINE_integer('num_resnext_blocks', 3, '''How many blocks do you want,
+total layers = 3n + 2, the paper used n=3, 29 layers, as demo''')
+tf.app.flags.DEFINE_float('weight_decay', 5e-4, '''scale for l2 regularization''')
+
+
+## The following flags are related to data-augmentation
+
+tf.app.flags.DEFINE_integer('padding_size', 2, '''In data augmentation, layers of zero padding on
+each side of the image''')
+
+
+## If you want to load a checkpoint and continue training
+
+tf.app.flags.DEFINE_boolean('is_use_ckpt', True, '''Whether to load a checkpoint and continue
+training''')
+
+tf.app.flags.DEFINE_string('ckpt_path', 'logs_oh,mfc_ges+obj_ver2_c=3_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.4/model.ckpt-4600', '''Checkpoint
+directory to restore to continue TRAIN''')
+
+tf.app.flags.DEFINE_string('test_ckpt_path', 'logs_GG123_c=3_d=32_n=3_lr=0.001_lrd=1_wd=0.0005_k=0.4/model.ckpt-30600', '''Checkpoint
+directory to restore to TEST''')
+
+
+lr_curve_file_name = 'c='+str(FLAGS.cardinality) + '_'\
+    'd='+str(FLAGS.block_unit_depth) + '_'\
+    'n='+str(FLAGS.num_resnext_blocks) + '_'\
+    'lr='+str(FLAGS.init_lr) + '_'\
+    'lrd='+str(FLAGS.lr_decay_factor) + '_'\
+    'wd='+str(FLAGS.weight_decay) + '_'\
+    'k='+str(FLAGS.k)
+lr_curve_file_name = FLAGS.version + '_' + lr_curve_file_name
+train_dir = 'logs_' + lr_curve_file_name + '/'
+
+