diff --git a/config.yaml b/config.yaml index 46c6632..4a37794 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ -run-title: "SoftmaxMnist" +run-title: "TripletMnist" paths: - save: "Models/13" - load: "Models/11/model.78-0.0392.h5" + save: "Models/14" + load: "" train: lr: 0.001 @@ -10,8 +10,9 @@ train: epochs: 100 batch-size: 400 + k_batch: 40 - loss: "categorical-crossentropy" + loss: "semi-hard-triplet-loss" alpha: 0.2 beta: 0.1 scale: 64 @@ -21,6 +22,7 @@ train: patience: 5 min_lr: 1.0e-5 + shuffle: False resume: False data: @@ -28,5 +30,5 @@ data: imchannel: 1 num_classes: 10 - val_split: 0.1 - + samples_per_id: 6000 + val_split: 0.1 \ No newline at end of file diff --git a/data.py b/data.py index 219aaad..87bebf7 100644 --- a/data.py +++ b/data.py @@ -1,3 +1,8 @@ +from __future__ import print_function + +import os +import yaml +import argparse import numpy as np from keras.utils import np_utils @@ -9,25 +14,18 @@ def __init__(self, config, one_hot = False): self.one_hot = one_hot def load(self): - (X_train, self.y_train), (X_test, self.y_test) = mnist.load_data() - - self.input_shape = (-1, self.config["imsize"], self.config["imsize"], self.config["imchannel"]) - X_train = np.reshape(X_train, self.input_shape) - X_test = np.reshape(X_test, self.input_shape) + (X_data, self.y_data), (X_test, self.y_test) = mnist.load_data() - self.mean = np.mean(X_train, axis=0) - self.std = np.std(X_train, axis=0) - self.std = (self.std==0) * 1e-16 + self.std + self.input_shape = (-1, self.config["data"]["imsize"], self.config["data"]["imsize"], self.config["data"]["imchannel"]) + self.X_data = np.reshape(X_data, self.input_shape) + self.X_test = np.reshape(X_test, self.input_shape) - self.X_train = self.preprocess(X_train) - self.X_test = self.preprocess(X_test) - if self.one_hot: - self.y_train = np_utils.to_categorical(self.y_train, self.config["num_classes"]) - self.y_test = np_utils.to_categorical(self.y_test, self.config["num_classes"]) + self.y_data = np_utils.to_categorical(self.y_data, self.config["data"]["num_classes"]) + self.y_test = np_utils.to_categorical(self.y_test, self.config["data"]["num_classes"]) - self.num_train = int(self.y_train.shape[0] * (1-self.config["val_split"])) - self.num_val = int(self.y_train.shape[0] * (self.config["val_split"])) + self.num_train = int(self.y_data.shape[0] * (1-self.config["data"]["val_split"])) + self.num_val = int(self.y_data.shape[0] * (self.config["data"]["val_split"])) self.num_test = self.y_test.shape[0] @@ -39,22 +37,39 @@ def preprocess(self, data): def order_data_triplet_loss(self): data = {} - - for label in range(self.config["num_classes"]): - mask = self.y_train==label + samples_per_id = self.config["data"]["samples_per_id"] + for label in range(self.config["data"]["num_classes"]): + mask = self.y_data==label data[label] = [i for i, x in enumerate(mask) if x] + if len(data[label]) < samples_per_id: + data[label].extend(np.random.choice(data[label], samples_per_id - len(data[label]), replace=False)) + data[label] = data[label][:samples_per_id] - p_batch = self.config["batch-size"] // self.config["k_batch"] - k_batch = self.config["k_batch"] - - X_train, y_train = [], [] - for i in range(p_batch): + k_batch = self.config["train"]["k_batch"] + X_data, y_data = [], [] + for i in range(samples_per_id // k_batch): for label in data: - X_train.extend(self.X_train[data[label][i*k_batch:(i+1)*k_batch]]) - y_train += [label] * k_batch + X_data.extend(self.X_data[data[label][i*k_batch:(i+1)*k_batch]]) + y_data += [label] * k_batch + + self.X_data = np.array(X_data) + self.y_data = np.array(y_data) + + def split_data(self): + self.X_train = self.X_data[:self.num_train] + self.y_train = self.y_data[:self.num_train] + + self.X_val = self.X_data[self.num_train:] + self.y_val = self.y_data[self.num_train:] + + self.mean = np.mean(self.X_train, axis=0) + self.std = np.std(self.X_train, axis=0) + self.std = (self.std==0) * 1e-16 + self.std - self.X_train = X_train - self.y_train = y_train + self.X_train = self.preprocess(self.X_train) + self.X_val = self.preprocess(self.X_val) + self.X_test = self.preprocess(self.X_test) + del self.X_data, self.y_data def get_random_batch(self, k = 100): X_batch, y_batch = [], [] @@ -65,3 +80,43 @@ def get_random_batch(self, k = 100): X_batch = np.reshape(X_batch, self.input_shape) return X_batch, np.array(y_batch) + +class DataGenerator(object): + def __init__(self, config): + self.shuffle = config["train"]["shuffle"] + self.batch_size = config["train"]["batch-size"] + + def generate(self, X, y): + ''' Generates batches of samples ''' + # Infinite loop + while 1: + # Generate order of exploration of dataset + indexes = self.__get_exploration_order(len(y)) + # Generate batches + batches = np.arange(len(indexes)//self.batch_size) + np.random.shuffle(batches) + + for batch in batches: + # Find list of ids + batch_indecies = indexes[batch*self.batch_size:(batch+1)*self.batch_size] + yield X[batch_indecies], y[batch_indecies] + + def __get_exploration_order(self, data_size): + ''' Generates order of exploration ''' + idxs = np.arange(data_size) + if self.shuffle == True: + np.random.shuffle(idxs) + return idxs + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Model Paramaters') + parser.add_argument('-c', '--config', type=str, default="config.yaml", help='path of config file') + args = parser.parse_args() + + with open(args.config, 'r') as file: + config = yaml.load(file) + + dataloader = DataLoader(config) + dataloader.load() + dataloader.order_data_triplet_loss() diff --git a/loss.py b/loss.py index 6b99964..b3095f9 100644 --- a/loss.py +++ b/loss.py @@ -3,7 +3,7 @@ import tensorflow as tf from keras import backend as K -def __anchor_center_loss(embeddings, margin, batch_size = 240, k = 4): +def __anchor_center_loss(embeddings, margin, batch_size = 400, k = 40): """Computes the anchor-center loss Minimizes intra-class distances. Assumes embeddings are ordered diff --git a/train.py b/train.py index 1f66d59..49c4ce4 100644 --- a/train.py +++ b/train.py @@ -11,7 +11,7 @@ from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard from loss import * -from data import DataLoader +from data import DataLoader, DataGenerator from model import get_model, simple_resnet def get_loss_function(func): @@ -40,30 +40,53 @@ def get_loss_function(func): with open(os.path.join(paths["save"], config["run-title"] + ".yaml"), 'w') as outfile: yaml.dump(config, outfile) - dataloader = DataLoader(data, train["loss"]=="categorical-crossentropy") + print("[INFO] Loading Data") + dataloader = DataLoader(config, train["loss"]=="categorical-crossentropy") dataloader.load() + if train["loss"] in ["intra-enhanced-triplet-loss", "semi-hard-triplet-loss"]: + print("[INFO] Ordering Data") + dataloader.order_data_triplet_loss() + dataloader.split_data() + print("[INFO] Creating Generators") + train_gen = DataGenerator(config).generate(dataloader.X_train, dataloader.y_train) + val_gen = DataGenerator(config).generate(dataloader.X_val, dataloader.y_val) + + print("[INFO] Building Model") input_shape = (data["imsize"], data["imsize"], data["imchannel"]) model = get_model(input_shape, config, top=train["loss"]=="categorical-crossentropy") # model = simple_resnet(input_shape) if train["resume"]: + print("[INFO] Loading Weights") model.load_weights(paths["load"], by_name=True) metric = large_margin_cos_acc(train) if train["loss"]=="large-margin-cosine-loss" else 'acc' loss_func = get_loss_function(train["loss"]) optim = getattr(optimizers, train["optim"])(train["lr"]) - model.compile(loss=loss_func, optimizer=optim, metrics=[metric]) + model.compile(loss=loss_func, optimizer=optim, metrics=[]) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=train["lr_reduce_factor"], patience=train["patience"], min_lr=train["min_lr"]) checkpoint = ModelCheckpoint(os.path.join(paths["save"],"model.{epoch:02d}-{val_loss:.4f}.h5"), monitor='val_loss', save_best_only=True, mode='min') tensorboard = TensorBoard(log_dir=os.path.join('./Graph',config["run-title"]), histogram_freq=0, write_graph=True, write_images=True) - model.fit(dataloader.X_train, dataloader.y_train, - epochs=train["epochs"], - batch_size=train["batch-size"], - verbose=1, - shuffle=True, - validation_split=data["val_split"], + print("[INFO] Start Training") + model.fit_generator( + generator = train_gen, + steps_per_epoch = dataloader.num_train//train["batch-size"], + validation_data = val_gen, + validation_steps= dataloader.num_val//train["batch-size"], + shuffle = False, + workers = 0, + epochs = train["epochs"], callbacks=[checkpoint, reduce_lr, tensorboard] - ) \ No newline at end of file + ) + + # model.fit(dataloader.X_train, dataloader.y_train, + # epochs=train["epochs"], + # batch_size=train["batch-size"], + # verbose=1, + # shuffle=train["shuffle"], + # validation_split=data["val_split"], + # callbacks=[checkpoint, reduce_lr, tensorboard] + # ) \ No newline at end of file diff --git a/visualize.py b/visualize.py index 353d46b..398c21b 100644 --- a/visualize.py +++ b/visualize.py @@ -18,7 +18,7 @@ def scatter(x, labels, config): plt.switch_backend('agg') fig, ax = plt.subplots() - ax.scatter(x[:,0], x[:,1], lw=0, s=40, alpha=0.3, c=palette[labels.astype(np.int)]) + ax.scatter(x[:,0], x[:,1], lw=0, s=40, alpha=0.1, c=palette[labels.astype(np.int)]) for idx in range(config["data"]["num_classes"]): xtext, ytext = np.median(x[labels == idx, :], axis=0) @@ -51,8 +51,6 @@ def scatter(x, labels, config): #embeddings = X_batch.reshape(-1, 784) embeddings = model.predict(X_batch, batch_size=config["train"]["batch-size"], verbose=1) - tsne = TSNE(n_components=2, perplexity=30, verbose=1) + tsne = TSNE(n_components=2, perplexity=30, verbose=1, n_iter=5000) tsne_embeds = tsne.fit_transform(embeddings) - scatter(tsne_embeds, y_batch, config) - - + scatter(tsne_embeds, y_batch, config) \ No newline at end of file