diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9342dd3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +data/* +*.pth +*.npy +*.pyc diff --git a/code/main.py b/code/main.py new file mode 100644 index 0000000..06cda94 --- /dev/null +++ b/code/main.py @@ -0,0 +1,303 @@ +from __future__ import print_function +from __future__ import division + +import os +import pdb +import csv +import time +import random +import numpy as np +import matplotlib.pyplot as plt + +import torch +import torchvision +import torchvision.transforms as transforms + +from PIL import Image + +from torch.utils.data.dataset import Dataset +from torch.utils.data import DataLoader + +from torch import nn +from torch.autograd import Variable +from torch.optim import Adam + +from resnet import resnet50 # homemade resnet pre-trained models :) +from resnet import resnet34 # homemade resnet pre-trained models :) + +USE_GPU = True + +TRAIN_HEAD_DIRS = ['../data/frames/train/house/1/head/', '../data/frames/train/house/1/head/', + '../data/frames/train/house/2/head/', '../data/frames/train/house/2/head/', + '../data/frames/train/house/3/head/', '../data/frames/train/house/3/head/', + '../data/frames/train/lab/1/head/', '../data/frames/train/lab/1/head/', + '../data/frames/train/lab/2/head/', '../data/frames/train/lab/2/head/', + '../data/frames/train/lab/3/head/', '../data/frames/train/lab/3/head/', + '../data/frames/train/lab/4/head/', '../data/frames/train/lab/4/head/', + '../data/frames/train/office/1/head/', '../data/frames/train/office/1/head/', + '../data/frames/train/office/2/head/', '../data/frames/train/office/2/head/', + '../data/frames/train/office/3/head/', '../data/frames/train/office/3/head/',] + +TRAIN_HAND_DIRS = ['../data/frames/train/house/1/Lhand/', '../data/frames/train/house/1/Rhand/', + '../data/frames/train/house/2/Lhand/', '../data/frames/train/house/2/Rhand/', + '../data/frames/train/house/3/Lhand/', '../data/frames/train/house/3/Rhand/', + '../data/frames/train/lab/1/Lhand/', '../data/frames/train/lab/1/Rhand/', + '../data/frames/train/lab/2/Lhand/', '../data/frames/train/lab/2/Rhand/', + '../data/frames/train/lab/3/Lhand/', '../data/frames/train/lab/3/Rhand/', + '../data/frames/train/lab/4/Lhand/', '../data/frames/train/lab/4/Rhand/', + '../data/frames/train/office/1/Lhand/', '../data/frames/train/office/1/Rhand/', + '../data/frames/train/office/2/Lhand/', '../data/frames/train/office/2/Rhand/', + '../data/frames/train/office/3/Lhand/', '../data/frames/train/office/3/Rhand/',] + +TEST_HEAD_DIRS = ['../data/frames/test/house/1/head/', '../data/frames/test/house/1/head/', + '../data/frames/test/house/2/head/', '../data/frames/test/house/2/head/', + '../data/frames/test/house/3/head/', '../data/frames/test/house/3/head/', + '../data/frames/test/lab/1/head/', '../data/frames/test/lab/1/head/', + '../data/frames/test/lab/2/head/', '../data/frames/test/lab/2/head/', + '../data/frames/test/lab/3/head/', '../data/frames/test/lab/3/head/', + '../data/frames/test/lab/4/head/', '../data/frames/test/lab/4/head/', + '../data/frames/test/office/1/head/', '../data/frames/test/office/1/head/', + '../data/frames/test/office/2/head/', '../data/frames/test/office/2/head/', + '../data/frames/test/office/3/head/', '../data/frames/test/office/3/head/',] + +TEST_HAND_DIRS = ['../data/frames/test/house/1/Lhand/', '../data/frames/test/house/1/Rhand/', + '../data/frames/test/house/2/Lhand/', '../data/frames/test/house/2/Rhand/', + '../data/frames/test/house/3/Lhand/', '../data/frames/test/house/3/Rhand/', + '../data/frames/test/lab/1/Lhand/', '../data/frames/test/lab/1/Rhand/', + '../data/frames/test/lab/2/Lhand/', '../data/frames/test/lab/2/Rhand/', + '../data/frames/test/lab/3/Lhand/', '../data/frames/test/lab/3/Rhand/', + '../data/frames/test/lab/4/Lhand/', '../data/frames/test/lab/4/Rhand/', + '../data/frames/test/office/1/Lhand/', '../data/frames/test/office/1/Rhand/', + '../data/frames/test/office/2/Lhand/', '../data/frames/test/office/2/Rhand/', + '../data/frames/test/office/3/Lhand/', '../data/frames/test/office/3/Rhand/',] + +TRAIN_FA_LABELS = ['../data/labels/house/FA_left1.npy', '../data/labels/house/FA_right1.npy', + '../data/labels/house/FA_left2.npy', '../data/labels/house/FA_right2.npy', + '../data/labels/house/FA_left3.npy', '../data/labels/house/FA_right3.npy', + '../data/labels/lab/FA_left1.npy', '../data/labels/lab/FA_right1.npy', + '../data/labels/lab/FA_left2.npy', '../data/labels/lab/FA_right2.npy', + '../data/labels/lab/FA_left3.npy', '../data/labels/lab/FA_right3.npy', + '../data/labels/lab/FA_left4.npy', '../data/labels/lab/FA_right4.npy', + '../data/labels/office/FA_left1.npy', '../data/labels/office/FA_right1.npy', + '../data/labels/office/FA_left2.npy', '../data/labels/office/FA_right2.npy', + '../data/labels/office/FA_left3.npy', '../data/labels/office/FA_right3.npy',] + +TRAIN_OBJ_LABELS = ['../data/labels/house/obj_left1.npy', '../data/labels/house/obj_right1.npy', + '../data/labels/house/obj_left2.npy', '../data/labels/house/obj_right2.npy', + '../data/labels/house/obj_left3.npy', '../data/labels/house/obj_right3.npy', + '../data/labels/lab/obj_left1.npy', '../data/labels/lab/obj_right1.npy', + '../data/labels/lab/obj_left2.npy', '../data/labels/lab/obj_right2.npy', + '../data/labels/lab/obj_left3.npy', '../data/labels/lab/obj_right3.npy', + '../data/labels/lab/obj_left4.npy', '../data/labels/lab/obj_right4.npy', + '../data/labels/office/obj_left1.npy', '../data/labels/office/obj_right1.npy', + '../data/labels/office/obj_left2.npy', '../data/labels/office/obj_right2.npy', + '../data/labels/office/obj_left3.npy', '../data/labels/office/obj_right3.npy',] + +TEST_FA_LABELS = ['../data/labels/house/FA_left4.npy', '../data/labels/house/FA_right4.npy', + '../data/labels/house/FA_left5.npy', '../data/labels/house/FA_right5.npy', + '../data/labels/house/FA_left6.npy', '../data/labels/house/FA_right6.npy', + '../data/labels/lab/FA_left5.npy', '../data/labels/lab/FA_right5.npy', + '../data/labels/lab/FA_left6.npy', '../data/labels/lab/FA_right6.npy', + '../data/labels/lab/FA_left7.npy', '../data/labels/lab/FA_right7.npy', + '../data/labels/lab/FA_left8.npy', '../data/labels/lab/FA_right8.npy', + '../data/labels/office/FA_left4.npy', '../data/labels/office/FA_right4.npy', + '../data/labels/office/FA_left5.npy', '../data/labels/office/FA_right5.npy', + '../data/labels/office/FA_left6.npy', '../data/labels/office/FA_right6.npy',] + +TEST_OBJ_LABELS = ['../data/labels/house/obj_left4.npy', '../data/labels/house/obj_right4.npy', + '../data/labels/house/obj_left5.npy', '../data/labels/house/obj_right5.npy', + '../data/labels/house/obj_left6.npy', '../data/labels/house/obj_right6.npy', + '../data/labels/lab/obj_left5.npy', '../data/labels/lab/obj_right5.npy', + '../data/labels/lab/obj_left6.npy', '../data/labels/lab/obj_right6.npy', + '../data/labels/lab/obj_left7.npy', '../data/labels/lab/obj_right7.npy', + '../data/labels/lab/obj_left8.npy', '../data/labels/lab/obj_right8.npy', + '../data/labels/office/obj_left4.npy', '../data/labels/office/obj_right4.npy', + '../data/labels/office/obj_left5.npy', '../data/labels/office/obj_right5.npy', + '../data/labels/office/obj_left6.npy', '../data/labels/office/obj_right6.npy',] + +SCENES = ['house', 'lab', 'office'] + +EPOCH = 300 +BATCH_SIZE = 64 + +MSG_DISPLAY_FREQ = 20 + +LOSS = np.array([]) + +class HandcamDataset: + + def __init__(self, head_dirs, hand_dirs, fa_nps, obj_nps, transform=None): + # check the input params + assert len(head_dirs) == len(hand_dirs) == len(fa_nps) == len(obj_nps) + # retrieve all the filenames + self.data = [] + for (head_dir, hand_dir, fa_np, obj_np) in zip(head_dirs, hand_dirs, fa_nps, obj_nps): + fa_labels = np.load(fa_np) + obj_labels = np.load(obj_np) + head_filenames = os.listdir(head_dir) + head_filenames = sorted(head_filenames, + key=lambda pid: int(pid.split('Image')[1].split('.')[0])) + hand_filenames = os.listdir(hand_dir) + hand_filenames = sorted(hand_filenames, + key=lambda pid: int(pid.split('Image')[1].split('.')[0])) + for (head_filename, hand_filename, fa_label, obj_label) in zip(head_filenames, hand_filenames, fa_labels, obj_labels): + self.data.append({'head_filename':head_dir+head_filename, + 'hand_filename':hand_dir+hand_filename, + 'fa_label':fa_label, + 'obj_label':obj_label},) + # store transformation settings + self.transform = transform + + def __getitem__(self, index): + scene = SCENES.index(self.data[index]['head_filename'].split('/')[4]) + head_img = Image.open(self.data[index]['head_filename']) + head_img = head_img.convert('RGB') + head_img = head_img.resize((224, 224), resample=Image.LANCZOS) + hand_img = Image.open(self.data[index]['hand_filename']) + hand_img = hand_img.convert('RGB') + hand_img = hand_img.resize((224, 224), resample=Image.LANCZOS) + if self.transform is not None: + head_img = self.transform(head_img) + hand_img = self.transform(hand_img) + assert isinstance(head_img, torch.FloatTensor) # img must be torch.FloatTensor + assert isinstance(hand_img, torch.FloatTensor) # img must be torch.FloatTensor + fa_label = torch.LongTensor([long(self.data[index]['fa_label'])]) # label must be torch.LongTensor + obj_label = torch.LongTensor([long(self.data[index]['obj_label'])]) # label must be torch.LongTensor + return scene, head_img, hand_img, fa_label, obj_label + + def __len__(self): + return len(self.data) + + +class HanNet(nn.Module): + + def __init__(self, pretrained=False, num_classes=1000): + super(HanNet, self).__init__() + self.headstream = resnet34(pretrained=pretrained) + self.handstream = resnet34(pretrained=pretrained) + self.fc = nn.Linear(512 * 2, num_classes) + + def forward(self, x): + x = torch.cat((self.headstream(x[0]), self.handstream(x[1])), dim=1) + x = self.fc(x) + return x + + +def train(train_loader, model, criterion, optimizer, epoch): + + batch_time = 0.0 + + # switch to train mode + model.train() + + end = time.time() + + running_loss = 0.0 + global LOSS + + for i, (_, head_inputs, hand_inputs, fa_labels, obj_labels) in enumerate(train_loader): + + fa_labels = torch.squeeze(fa_labels, 1) + obj_labels = torch.squeeze(obj_labels, 1) + + if USE_GPU: + head_inputs = Variable(head_inputs).cuda(async=True) + hand_inputs = Variable(hand_inputs).cuda(async=True) + fa_labels = Variable(fa_labels).cuda(async=True) + obj_labels = Variable(obj_labels).cuda(async=True) + else: + head_inputs = Variable(head_inputs) + hand_inputs = Variable(hand_inputs) + fa_labels = Variable(fa_labels) + obj_labels = Variable(obj_labels) + + outputs = model([head_inputs, hand_inputs]) + + loss = criterion(outputs[:, 0:2], fa_labels) + criterion(outputs[:, 2:26], obj_labels) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + running_loss += loss.data[0] + + batch_time += time.time() - end + end = time.time() + + if i%MSG_DISPLAY_FREQ == (MSG_DISPLAY_FREQ-1): + print("Epoch: [{}][{}/{}]\t Loss: {:.8f}\t Time {:.3f}".format(epoch, i+1, len(train_loader), running_loss/MSG_DISPLAY_FREQ, batch_time/MSG_DISPLAY_FREQ)) + LOSS = np.append(LOSS, running_loss/MSG_DISPLAY_FREQ) + running_loss = 0.0 + batch_time = 0.0 + + np.save('loss_{:03}.npy'.format(epoch), LOSS) + + +def test(test_loader, model, epoch): + + # switch to evaluate mode + model.eval() + + confusion_matrix = np.zeros((3, 24, 24)) + + for i, (scenes, head_inputs, hand_inputs, _, obj_labels) in enumerate(test_loader): + + obj_labels = torch.squeeze(obj_labels, 1) + + if USE_GPU: + head_inputs = Variable(head_inputs, volatile=True).cuda(async=True) + hand_inputs = Variable(hand_inputs, volatile=True).cuda(async=True) + obj_labels = Variable(obj_labels, volatile=True).cuda(async=True) + else: + head_inputs = Variable(head_inputs, volatile=True) + hand_inputs = Variable(hand_inputs, volatile=True) + obj_labels = Variable(obj_labels, volatile=True) + + outputs = model([head_inputs, hand_inputs]) + + _, predictions = torch.max(outputs[:, 2:25], 1) + + for j in range(predictions.data.size(0)): + + scene = scenes[j] + prediction = predictions.data[j] + label = obj_labels.data[j] + + confusion_matrix[scene][prediction][label] += 1 + + print("Acc: {:.3}".format(np.sum(np.trace(confusion_matrix, axis1=1, axis2=2))/np.sum(confusion_matrix))) + np.save('cm_{:03}.npy'.format(epoch), confusion_matrix) + +def main(): + + transformations = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ]) + + train_dataset = HandcamDataset(TRAIN_HEAD_DIRS, TRAIN_HAND_DIRS, TRAIN_FA_LABELS, TRAIN_OBJ_LABELS, transformations) + train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) + + test_dataset = HandcamDataset(TEST_HEAD_DIRS, TEST_HAND_DIRS, TRAIN_FA_LABELS, TRAIN_OBJ_LABELS, transformations) + test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) + + print("=> using pre-trained model HanNet") + model = HanNet(pretrained=True, num_classes=2+24) + + if USE_GPU: + model = model.cuda() + + if USE_GPU: + criterion = nn.CrossEntropyLoss().cuda() + else: + criterion = nn.CrossEntropyLoss() + + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + + for epoch in range(EPOCH): + # train(train_loader, model, criterion, optimizer, epoch) + test(test_loader, model, epoch) + torch.save(model, 'model_{:03}.pth'.format(epoch)) + +if __name__ == '__main__': + main() diff --git a/code/resnet.py b/code/resnet.py new file mode 100644 index 0000000..39f9d5d --- /dev/null +++ b/code/resnet.py @@ -0,0 +1,235 @@ +import pdb +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +from torch.nn.parameter import Parameter + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + # x = self.fc(x) + + return x + + # homemade pre-trained model loading function :) + def load_pretrained_model(self, pretrained_state_dict): + + custom_state_dict = self.state_dict() + + for name, param in pretrained_state_dict.items(): + + if name not in custom_state_dict: + raise KeyError("unexpected key '{}' in state_dict".format(name)) + + if isinstance(param, Parameter): + param = param.data + + try: + custom_state_dict[name].copy_(param) + except: + print("skip loading key '{}' due to inconsistent size".format(name)) + + self.load_state_dict(custom_state_dict) + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_pretrained_model(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_pretrained_model(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_pretrained_model(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_pretrained_model(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_pretrained_model(model_zoo.load_url(model_urls['resnet152'])) + return model diff --git a/results/all_cnf.png b/results/all_cnf.png new file mode 100644 index 0000000..92c06c9 Binary files /dev/null and b/results/all_cnf.png differ diff --git a/results/architecture.svg b/results/architecture.svg new file mode 100644 index 0000000..3223a20 --- /dev/null +++ b/results/architecture.svg @@ -0,0 +1,29 @@ + + + + background + + + + + + + Layer 1 + + ResNet50 + + FA + + OBJ + fc + fc + + + frame + + + free/active + + obj cat. + + \ No newline at end of file diff --git a/results/house_cnf.png b/results/house_cnf.png new file mode 100644 index 0000000..65d4652 Binary files /dev/null and b/results/house_cnf.png differ diff --git a/results/index.md b/results/index.md index 96ce61c..7eed2a2 100644 --- a/results/index.md +++ b/results/index.md @@ -1,47 +1,58 @@ -# Your Name (id) +# 趙浚宏 (106061611) -#Project 5: Deep Classification +# Project 5: Deep Classification ## Overview -The project is related to -> quote +The project is related to [Recognition from Hand Cameras: +A Revisit with Deep Learning](https://drive.google.com/file/d/0BwCy2boZhfdBM0ZDTV9lZW1rZzg/view). +> We revisit the study of a wrist-mounted camera system (re- +ferred to as HandCam) for recognizing activities of hands. HandCam has two unique properties as compared to egocentric systems [1, 2] (referred +to as HeadCam): (1) it avoids the need to detect hands; (2) it more +consistently observes the activities of hands. By taking advantage of +these properties, we propose a deep-learning-based method to recognize +hand states (free vs. active hands, hand gestures, object categories), and +discover object categories. ## Implementation -1. One - * item - * item -2. Two -``` -Code highlights -``` +* **Network Architecture** + + + ![](architecture.svg) + + +* **Multitask Loss** + + ``` + Loss = \lambda_1 * CrossEntropy(FA_OUTPUT, FA_LABEL) + \lambda_2 * CrossEntropt(OBJ_OUTPUT, OBJ_LABEL) + ``` + +* **Trained 30 epochs** + +* **Adam Optimizer lr=1e-3** ## Installation -* Other required packages. -* How to compile from source? - -### Results - - - - - - - - - - -
- - - - -
- - - - -
+* numpy 1.13.3 +* pytorch 0.2.0 + +## Results + +* House Scene + + ![](house_cnf.png) + +* Lab Scene + + ![](lab_cnf.png) + +* Office Scene + + ![](office_cnf.png) + + +* All + + ![](all_cnf.png) diff --git a/results/lab_cnf.png b/results/lab_cnf.png new file mode 100644 index 0000000..62779d7 Binary files /dev/null and b/results/lab_cnf.png differ diff --git a/results/office_cnf.png b/results/office_cnf.png new file mode 100644 index 0000000..23fbfdd Binary files /dev/null and b/results/office_cnf.png differ