Skip to content

Commit

Permalink
added triplet-softmax
Browse files Browse the repository at this point in the history
  • Loading branch information
BKHMSI committed Oct 10, 2018
1 parent cbdd0e3 commit bed5b43
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 40 deletions.
32 changes: 28 additions & 4 deletions ReadMe.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
# Discriminating Power of Different Loss Functions on MNIST
The purpose of this repository is to explore the discriminating power of different loss functions beyond traditional softmax on the MNIST dataset.
The purpose of this repository is to explore the discriminating power of different loss functions for classification, beyond the traditional softmax loss, on the MNIST dataset. The idea for maxmium discriminating capability is maximizing inter-class variance and minimizing intra-class variance.

## Categorical Cross-Entropy
## Evaluation Setup
I used the MNIST dataset provided by ```keras.datasets.mnist``` splitting the training data into 10% for validation (6000) and the rest for training (54,000). I saved the model with the lowest validation loss after ~100 epochs and evaluated it on the provided testing set (10,000). The generated embeddings where then reduced to 2-dimensions for visualization using t-SNE after 5000 iterations with a perplexity of 30. The aim from this viusalization is to determine the relative discriminating power of the loss functions by analyzing the inter and intra class variance on the different clusters formed.

## Results
The different loss functions used are mathematically defined below along with the results of each on the testing set.
First let's visualize t-SNE embeddings on the raw pixel data to give us a baseline of how the data is distributed before any clustering work is done.

### Raw Pixels


### Categorical Cross-Entropy (Softmax)
TBD

## Semi-Hard Triplet Loss
### Semi-Hard Triplet Loss
TBD

## Large Margin Cosine Loss
### Large Margin Cosine Loss
TBD

### Intra-Enhanced Triplet Loss
TBD

### Semi-Hard Triplet Loss + Softmax
TBD

### A-Softmax
TBD

### Contrastive Loss
TBD

|> In any experiment using triplet loss, the data was ordered such that within each mini-batch there must be k samples for each class.
20 changes: 13 additions & 7 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
run-title: "TripletMnist"
run-title: "Triplet-SoftmaxMnist"

paths:
save: "Models/14"
load: ""
save: "Models/23"
load: "Models/23/model.13-0.0036.h5"

train:
lr: 0.001
Expand All @@ -12,17 +12,19 @@ train:
batch-size: 400
k_batch: 40

loss: "semi-hard-triplet-loss"
loss: "triplet-softmax"
alpha: 0.2
beta: 0.1
scale: 64
lambda_1: 0.5
lambda_2: 0.1
scale: 30
reg_lambda: 0.01

lr_reduce_factor: 0.5
patience: 5
min_lr: 1.0e-5

shuffle: False
shuffle: True
resume: False

data:
Expand All @@ -31,4 +33,8 @@ data:
num_classes: 10

samples_per_id: 6000
val_split: 0.1
val_split: 0.1

tsne:
n_iter: 5000
perplexity: 30
37 changes: 25 additions & 12 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@ def load(self):
self.num_val = int(self.y_data.shape[0] * (self.config["data"]["val_split"]))
self.num_test = self.y_test.shape[0]

if self.config["train"]["loss"] in ["intra-enhanced-triplet-loss", "semi-hard-triplet-loss"]:
print("[INFO] Ordering Data")
self.order_data_triplet_loss()

self.split_data()

self.mean = np.mean(self.X_train, axis=0)
self.std = np.std(self.X_train, axis=0)
self.std = (self.std==0) * 1e-16 + self.std

self.X_train = self.preprocess(self.X_train)
self.X_val = self.preprocess(self.X_val)
self.X_test = self.preprocess(self.X_test)


def preprocess(self, data):
data = data.astype('float32')
Expand Down Expand Up @@ -62,18 +76,11 @@ def split_data(self):
self.X_val = self.X_data[self.num_train:]
self.y_val = self.y_data[self.num_train:]

self.mean = np.mean(self.X_train, axis=0)
self.std = np.std(self.X_train, axis=0)
self.std = (self.std==0) * 1e-16 + self.std

self.X_train = self.preprocess(self.X_train)
self.X_val = self.preprocess(self.X_val)
self.X_test = self.preprocess(self.X_test)
del self.X_data, self.y_data

def get_random_batch(self, k = 100):
X_batch, y_batch = [], []
for label in range(self.config["num_classes"]):
for label in range(self.config["data"]["num_classes"]):
X_mask = self.X_test[self.y_test==label]
X_batch.extend(np.array([X_mask[np.random.choice(len(X_mask), k, replace=False)]]) if k <= len(X_mask) and k >= 0 else X_mask)
y_batch += [label] * k if k <= len(X_mask) and k >= 0 else [label] * len(X_mask)
Expand All @@ -85,6 +92,8 @@ class DataGenerator(object):
def __init__(self, config):
self.shuffle = config["train"]["shuffle"]
self.batch_size = config["train"]["batch-size"]
self.loss = config["train"]["loss"]
self.num_classes = config["data"]["num_classes"]

def generate(self, X, y):
''' Generates batches of samples '''
Expand All @@ -94,12 +103,17 @@ def generate(self, X, y):
indexes = self.__get_exploration_order(len(y))
# Generate batches
batches = np.arange(len(indexes)//self.batch_size)
np.random.shuffle(batches)
if not self.shuffle: np.random.shuffle(batches)

for batch in batches:
# Find list of ids
batch_indecies = indexes[batch*self.batch_size:(batch+1)*self.batch_size]
yield X[batch_indecies], y[batch_indecies]
if self.loss == "triplet-softmax":
y_1 = y[batch_indecies]
y_2 = np_utils.to_categorical(y_1, self.num_classes)
yield X[batch_indecies], [y_1, y_2]
else:
yield X[batch_indecies], y[batch_indecies]

def __get_exploration_order(self, data_size):
''' Generates order of exploration '''
Expand All @@ -118,5 +132,4 @@ def __get_exploration_order(self, data_size):
config = yaml.load(file)

dataloader = DataLoader(config)
dataloader.load()
dataloader.order_data_triplet_loss()
dataloader.load()
9 changes: 6 additions & 3 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ def __head(embedding):
return out

x = __body(input_img)
if top: x = __head(x)

model = Model(inputs=input_img, outputs=x)
if config["train"]["loss"] in ["triplet-softmax"] and top:
y = __head(x)
model = Model(inputs=input_img, outputs=[x, y])
else:
if top: x = __head(x)
model = Model(inputs=input_img, outputs=x)
return model

def simple_resnet(input_shape):
Expand Down
19 changes: 8 additions & 11 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@

def get_loss_function(func):
return {
'large-margin-cosine-loss': large_margin_cos_loss(config["train"]),
'intra-enhanced-triplet-loss': intra_enhanced_triplet_loss(config["train"]),
'semi-hard-triplet-loss': semi_hard_triplet_loss(config["train"]["alpha"]),
'categorical-crossentropy': losses.categorical_crossentropy,
}.get(func, losses.categorical_crossentropy)
'triplet-softmax': ([semi_hard_triplet_loss(config["train"]["alpha"]), 'categorical_crossentropy'], [1, train["lambda_2"]]),
'large-margin-cosine-loss': (large_margin_cos_loss(config["train"]), None),
'intra-enhanced-triplet-loss': (intra_enhanced_triplet_loss(config["train"]), None),
'semi-hard-triplet-loss': (semi_hard_triplet_loss(config["train"]["alpha"]), None),
'categorical-crossentropy': (losses.categorical_crossentropy, None),
}.get(func, (losses.categorical_crossentropy, None))

if __name__ == "__main__":

Expand All @@ -43,10 +44,6 @@ def get_loss_function(func):
print("[INFO] Loading Data")
dataloader = DataLoader(config, train["loss"]=="categorical-crossentropy")
dataloader.load()
if train["loss"] in ["intra-enhanced-triplet-loss", "semi-hard-triplet-loss"]:
print("[INFO] Ordering Data")
dataloader.order_data_triplet_loss()
dataloader.split_data()

print("[INFO] Creating Generators")
train_gen = DataGenerator(config).generate(dataloader.X_train, dataloader.y_train)
Expand All @@ -62,9 +59,9 @@ def get_loss_function(func):
model.load_weights(paths["load"], by_name=True)

metric = large_margin_cos_acc(train) if train["loss"]=="large-margin-cosine-loss" else 'acc'
loss_func = get_loss_function(train["loss"])
loss_func, loss_weights = get_loss_function(train["loss"])
optim = getattr(optimizers, train["optim"])(train["lr"])
model.compile(loss=loss_func, optimizer=optim, metrics=[])
model.compile(loss=loss_func, loss_weights=loss_weights, optimizer=optim, metrics=[])

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=train["lr_reduce_factor"], patience=train["patience"], min_lr=train["min_lr"])
checkpoint = ModelCheckpoint(os.path.join(paths["save"],"model.{epoch:02d}-{val_loss:.4f}.h5"), monitor='val_loss', save_best_only=True, mode='min')
Expand Down
6 changes: 3 additions & 3 deletions visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def scatter(x, labels, config):

plt.switch_backend('agg')
fig, ax = plt.subplots()
ax.scatter(x[:,0], x[:,1], lw=0, s=40, alpha=0.1, c=palette[labels.astype(np.int)])
ax.scatter(x[:,0], x[:,1], lw=0, s=40, alpha=0.2, c=palette[labels.astype(np.int)])

for idx in range(config["data"]["num_classes"]):
xtext, ytext = np.median(x[labels == idx, :], axis=0)
Expand All @@ -38,7 +38,7 @@ def scatter(x, labels, config):
paths = config["paths"]
data = config["data"]

dataloader = DataLoader(data)
dataloader = DataLoader(config)
dataloader.load()

input_shape = (data["imsize"], data["imsize"], data["imchannel"])
Expand All @@ -51,6 +51,6 @@ def scatter(x, labels, config):
#embeddings = X_batch.reshape(-1, 784)
embeddings = model.predict(X_batch, batch_size=config["train"]["batch-size"], verbose=1)

tsne = TSNE(n_components=2, perplexity=30, verbose=1, n_iter=5000)
tsne = TSNE(n_components=2, perplexity=config["tsne"]["perplexity"], verbose=1, n_iter=config["tsne"]["n_iter"])
tsne_embeds = tsne.fit_transform(embeddings)
scatter(tsne_embeds, y_batch, config)

0 comments on commit bed5b43

Please sign in to comment.