Skip to content

Commit

Permalink
bin func
Browse files Browse the repository at this point in the history
  • Loading branch information
prodangp committed Apr 23, 2023
1 parent 3dcbd74 commit 882200a
Show file tree
Hide file tree
Showing 5 changed files with 405 additions and 177 deletions.
136 changes: 136 additions & 0 deletions architectures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import torch
import torchaudio
import torch.nn as nn


class SimpleCNN(nn.Module):
def __init__(self):
super().__init__()
conv_layers = []
self.name = 'SimpleCNN'
# First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization
self.conv1 = nn.Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
self.relu1 = nn.ReLU()
self.bn1 = nn.BatchNorm2d(8)
conv_layers += [self.conv1, self.relu1, self.bn1]

# Second Convolution Block
self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
self.relu2 = nn.ReLU()
self.bn2 = nn.BatchNorm2d(16)
conv_layers += [self.conv2, self.relu2, self.bn2]

# Third Convolution Block
self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
self.relu3 = nn.ReLU()
self.bn3 = nn.BatchNorm2d(32)
conv_layers += [self.conv3, self.relu3, self.bn3]

# Fourth Convolution Block
self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
self.relu4 = nn.ReLU()
self.bn4 = nn.BatchNorm2d(64)
conv_layers += [self.conv4, self.relu4, self.bn4]

# Linear Classifier
self.ap = nn.AdaptiveAvgPool2d(output_size=1)
self.lin = nn.Linear(in_features=64, out_features=8)

self.conv = nn.Sequential(*conv_layers)

def forward(self, x):
x = self.conv1(x)
y_1 = x.copy()
x = self.ap(x)
x = x.view(x.shape[0], -1)
x = self.lin(x)
return x, y_1 # y_1 output of the 1st layer


class Res2DBlock(nn.Module):
expansion = 1 #we don't use the block.expansion here

def __init__(self, inplanes, planes, stride=1,padding = 1):
super().__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size = 3, stride=stride,
padding=padding, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size = 3, stride=1,
padding=padding, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = nn.Sequential(
nn.Conv2d(inplanes, planes, 1, stride, bias=False),
nn.BatchNorm2d(planes))
self.stride = stride

def forward(self, x):
identity = x
out = self.conv1(x)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out


class ResNet(nn.Module):

def __init__(self, FN=16, num_classes=8, p_dropout=None):
super().__init__()

self.FN = FN
if FN == 128:
self.name = 'ResNet34-XL'
elif FN == 64:
self.name = 'ResNet34-L'
elif FN == 32:
self.name = 'ResNet34-M'
elif FN == 16:
self.name = 'ResNet34-S'
else:
self.name = 'ResNet34'
layers = [3, 4, 6, 3]
self.c1 = nn.Conv2d(1, FN, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(FN)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(FN, FN, layers[0])
self.layer2 = self._make_layer(FN, FN * 2, layers[1], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d(7)
self.fc = nn.Linear(FN * 98, num_classes)
self.p_dropout = p_dropout
if p_dropout:
self.dropout = nn.Dropout(p=p_dropout)

def _make_layer(self, inplanes, planes, blocks, stride=1):
layers = []
layers.append(Res2DBlock(inplanes, planes, stride))

self.inplanes = planes

for _ in range(1, blocks):
layers.append(Res2DBlock(self.inplanes, planes))

return nn.Sequential(*layers)

def forward(self, x):
x = self.c1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x = self.layer1(x)
x = self.layer2(x)

x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
if self.p_dropout:
x = self.dropout(x)

return x


50 changes: 50 additions & 0 deletions dataloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import torch
import torchaudio
from torch.utils.data import Dataset


class FMA2D_spec(Dataset):
def __init__(self, data_dir, track_ids, labels_onehot, transforms=True, augment_prob=0.5, max_mask_pct=0.3, n_freq_masks=2,
n_time_masks=2):
self.data_dir = data_dir
self.filenames = os.listdir(data_dir)
self.track_ids = track_ids
self.labels_onehot = labels_onehot
self.transforms = transforms
self.augment_prob = augment_prob
self.max_mask_pct = max_mask_pct
self.n_freq_masks = n_freq_masks
self.n_time_masks = n_time_masks

def __getitem__(self, index):
tid = self.track_ids[index]
# load the spectrogram data
spec_path = os.path.join('./data/spectrograms/' + "{:06d}".format(tid) + '.pt')
try:
spec = torch.load(spec_path)
except Exception as e:
return self.__getitem__(index + 1)
if self.transforms is True and torch.rand(1) < self.augment_prob:
spec = self.spectro_augment(spec)
# get label
label = torch.from_numpy(self.labels_onehot.loc[tid].values).float()
return spec, label

def __len__(self):
return len(self.track_ids)

def spectro_augment(self, spec):
_, n_mels, n_steps = spec.shape
mask_value = spec.mean()
aug_spec = spec

freq_mask_param = self.max_mask_pct * n_mels
for _ in range(self.n_freq_masks):
aug_spec = torchaudio.transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)

time_mask_param = self.max_mask_pct * n_steps
for _ in range(self.n_time_masks):
aug_spec = torchaudio.transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)

return aug_spec
181 changes: 4 additions & 177 deletions info_bottleneck.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,50 +109,7 @@
},
{
"cell_type": "code",
"source": [
"class FMA2D_spec(Dataset): \n",
" def __init__(self, data_dir, track_ids,transforms = True, augment_prob = 0.5, max_mask_pct=0.3, n_freq_masks=2, n_time_masks=2):\n",
" self.data_dir = data_dir\n",
" self.filenames = os.listdir(data_dir)\n",
" self.track_ids = track_ids\n",
" self.transforms = transforms\n",
" self.augment_prob = augment_prob\n",
" self.max_mask_pct = max_mask_pct\n",
" self.n_freq_masks = n_freq_masks\n",
" self.n_time_masks = n_time_masks\n",
" \n",
" def __getitem__(self, index):\n",
" tid = self.track_ids[index]\n",
" # load the spectrogram data\n",
" spec_path = os.path.join('./data/spectrograms/'+\"{:06d}\".format(tid)+'.pt')\n",
" try:\n",
" spec = torch.load(spec_path)\n",
" except Exception as e:\n",
" return self.__getitem__(index + 1)\n",
" if self.transforms is True and torch.rand(1) < self.augment_prob:\n",
" spec = self.spectro_augment(spec)\n",
" #get label\n",
" label = torch.from_numpy(labels_onehot.loc[tid].values).float()\n",
" return spec, label\n",
" \n",
" def __len__(self):\n",
" return len(self.track_ids)\n",
" \n",
" def spectro_augment(self, spec):\n",
" _, n_mels, n_steps = spec.shape\n",
" mask_value = spec.mean()\n",
" aug_spec = spec\n",
"\n",
" freq_mask_param = self.max_mask_pct * n_mels\n",
" for _ in range(self.n_freq_masks):\n",
" aug_spec = torchaudio.transforms.FrequencyMasking(freq_mask_param)(aug_spec, mask_value)\n",
"\n",
" time_mask_param = self.max_mask_pct * n_steps\n",
" for _ in range(self.n_time_masks):\n",
" aug_spec = torchaudio.transforms.TimeMasking(time_mask_param)(aug_spec, mask_value)\n",
"\n",
" return aug_spec"
],
"source": [],
"metadata": {
"id": "kt-lfxuskM6F",
"ExecuteTime": {
Expand All @@ -165,35 +122,7 @@
},
{
"cell_type": "code",
"source": [
"class Res2DBlock(nn.Module):\n",
" expansion = 1 #we don't use the block.expansion here\n",
"\n",
" def __init__(self, inplanes, planes, stride=1,padding = 1):\n",
" super().__init__()\n",
" self.conv1 = nn.Conv2d(inplanes, planes, kernel_size = 3, stride=stride,\n",
" padding=padding, bias=False)\n",
" self.bn1 = nn.BatchNorm2d(planes)\n",
" self.relu = nn.ReLU(inplace=True)\n",
" self.conv2 = nn.Conv2d(planes, planes, kernel_size = 3, stride=1,\n",
" padding=padding, bias=False)\n",
" self.bn2 = nn.BatchNorm2d(planes)\n",
" self.downsample = nn.Sequential(\n",
" nn.Conv2d(inplanes, planes, 1, stride, bias=False),\n",
" nn.BatchNorm2d(planes))\n",
" self.stride = stride\n",
"\n",
" def forward(self, x):\n",
" identity = x\n",
" out = self.conv1(x)\n",
" out = self.relu(out)\n",
" out = self.conv2(out)\n",
" out = self.bn2(out)\n",
" identity = self.downsample(x)\n",
" out += identity\n",
" out = self.relu(out)\n",
" return out"
],
"source": [],
"metadata": {
"id": "RrwLXIrhjfCz",
"ExecuteTime": {
Expand All @@ -206,67 +135,7 @@
},
{
"cell_type": "code",
"source": [
"class ResNet(nn.Module):\n",
"\n",
" def __init__(self, FN=16, num_classes=8, p_dropout=None):\n",
" super().__init__()\n",
" \n",
" self.FN = FN\n",
" if FN == 128:\n",
" self.name = 'ResNet34-XL' \n",
" elif FN == 64:\n",
" self.name = 'ResNet34-L'\n",
" elif FN == 32:\n",
" self.name = 'ResNet34-M'\n",
" elif FN == 16:\n",
" self.name = 'ResNet34-S'\n",
" else:\n",
" self.name ='ResNet34'\n",
" layers = [3, 4, 6, 3]\n",
" self.c1 = nn.Conv2d(1, FN, kernel_size=7, stride=2, padding=3, bias=False)\n",
" self.bn1 = nn.BatchNorm2d(FN)\n",
" self.relu = nn.ReLU(inplace=True)\n",
" self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n",
" self.layer1 = self._make_layer(FN, FN, layers[0])\n",
" self.layer2 = self._make_layer(FN, FN*2, layers[1], stride=2)\n",
" self.avgpool = nn.AdaptiveAvgPool2d(7)\n",
" self.fc = nn.Linear(FN * 98 , num_classes)\n",
" self.p_dropout = p_dropout\n",
" if p_dropout:\n",
" self.dropout = nn.Dropout(p=p_dropout)\n",
"\n",
"\n",
" def _make_layer(self, inplanes, planes, blocks, stride=1):\n",
" layers = []\n",
" layers.append(Res2DBlock(inplanes, planes, stride))\n",
" \n",
" self.inplanes = planes\n",
" \n",
" for _ in range(1, blocks):\n",
" layers.append(Res2DBlock(self.inplanes, planes))\n",
"\n",
" return nn.Sequential(*layers)\n",
" \n",
" \n",
" def forward(self, x):\n",
" x = self.c1(x) \n",
" x = self.bn1(x)\n",
" x = self.relu(x)\n",
" x = self.maxpool(x) \n",
"\n",
" x = self.layer1(x) \n",
" x = self.layer2(x) \n",
"\n",
" x = self.avgpool(x) \n",
" x = torch.flatten(x, 1) \n",
" x = self.fc(x)\n",
" if self.p_dropout:\n",
" x = self.dropout(x)\n",
"\n",
" return x\n",
" "
],
"source": [],
"metadata": {
"id": "l78xEllkjuxF",
"ExecuteTime": {
Expand All @@ -279,49 +148,7 @@
},
{
"cell_type": "code",
"source": [
"class SimpleCNN(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" conv_layers = []\n",
" self.name = 'SimpleCNN'\n",
" # First Convolution Block with Relu and Batch Norm. Use Kaiming Initialization\n",
" self.conv1 = nn.Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))\n",
" self.relu1 = nn.ReLU()\n",
" self.bn1 = nn.BatchNorm2d(8)\n",
" conv_layers += [self.conv1, self.relu1, self.bn1]\n",
"\n",
" # Second Convolution Block\n",
" self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" self.relu2 = nn.ReLU()\n",
" self.bn2 = nn.BatchNorm2d(16)\n",
" conv_layers += [self.conv2, self.relu2, self.bn2]\n",
"\n",
" # Third Convolution Block\n",
" self.conv3 = nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" self.relu3 = nn.ReLU()\n",
" self.bn3 = nn.BatchNorm2d(32)\n",
" conv_layers += [self.conv3, self.relu3, self.bn3]\n",
"\n",
" # Fourth Convolution Block\n",
" self.conv4 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n",
" self.relu4 = nn.ReLU()\n",
" self.bn4 = nn.BatchNorm2d(64)\n",
" conv_layers += [self.conv4, self.relu4, self.bn4]\n",
"\n",
" # Linear Classifier\n",
" self.ap = nn.AdaptiveAvgPool2d(output_size=1)\n",
" self.lin = nn.Linear(in_features=64, out_features=8)\n",
"\n",
" self.conv = nn.Sequential(*conv_layers)\n",
" \n",
" def forward(self, x):\n",
" x = self.conv(x)\n",
" x = self.ap(x)\n",
" x = x.view(x.shape[0], -1)\n",
" x = self.lin(x)\n",
" return x"
],
"source": [],
"metadata": {
"id": "Neaco9ukifUs",
"ExecuteTime": {
Expand Down
Loading

0 comments on commit 882200a

Please sign in to comment.