-
Notifications
You must be signed in to change notification settings - Fork 17
/
SemBranch.py
104 lines (78 loc) · 3.16 KB
/
SemBranch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import torch.nn as nn
class BasicBlockSem(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride, padding):
super(BasicBlockSem, self).__init__()
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
self.bn = nn.BatchNorm2d(out_planes)
self.relu = nn.ReLU(inplace=True)
self.ca = ChannelAttention(out_planes)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
# Channel Attention Module
out = self.ca(out) * out
out = self.relu(out)
return out
class ChannelAttention(nn.Module):
def __init__(self, in_planes, ratio=16):
super(ChannelAttention, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc1 = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
self.fc2 = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)
self.relu1 = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
out = avg_out + max_out
return self.sigmoid(out)
class SemBranch(nn.Module):
"""
Generate Model Architecture
"""
def __init__(self, scene_classes, semantic_classes=151):
super(SemBranch, self).__init__()
# Semantic Branch
self.in_block_sem = nn.Sequential(
nn.Conv2d(semantic_classes + 1, 64, kernel_size=7, stride=2, padding=3, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
)
self.in_block_sem_1 = BasicBlockSem(64, 128, kernel_size=3, stride=2, padding=1)
self.in_block_sem_2 = BasicBlockSem(128, 256, kernel_size=3, stride=2, padding=1)
self.in_block_sem_3 = BasicBlockSem(256, 512, kernel_size=3, stride=2, padding=1)
# Semantic Scene Classification Layers
self.dropout = nn.Dropout(0.3)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc_SEM = nn.Linear(512, scene_classes)
# Loss
self.criterion = nn.CrossEntropyLoss()
def forward(self, x, sem):
# Semantic Branch
y = self.in_block_sem(sem)
y2 = self.in_block_sem_1(y)
y3 = self.in_block_sem_2(y2)
y4 = self.in_block_sem_3(y3)
# Semantic Classification Layer
act_sem = self.avgpool(y4)
act_sem = act_sem.view(act_sem.size(0), -1)
act_sem = self.dropout(act_sem)
act_sem = self.fc_SEM(act_sem)
act = act_sem
e5 = y4
act_rgb = act_sem
return act, e5, act_rgb, act_sem
def loss(self, x, target):
"""
Funtion to comput the loss
:param x: Predictions obtained by the network
:param target: Ground-truth scene recognition labels
:return: Loss value
"""
# Check inputs
assert (x.shape[0] == target.shape[0])
# Classification loss
loss = self.criterion(x, target.long())
return loss