diff --git a/HW01/zhengquanwei028-HW01.py b/HW01/zhengquanwei028-HW01.py new file mode 100644 index 00000000..8fe5c222 --- /dev/null +++ b/HW01/zhengquanwei028-HW01.py @@ -0,0 +1,276 @@ +# HW1 +# Numerical Operations +import math +import numpy as np + +# Reading/Writing Data +import pandas as pd +import os +import csv +os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' +# For Progress Bar +from tqdm import tqdm + +# Pytorch +import torch +import torch.nn as nn +from torch.utils.data import Dataset, DataLoader, random_split +from torch.optim.lr_scheduler import CosineAnnealingLR,CosineAnnealingWarmRestarts,StepLR, ReduceLROnPlateau + +# For plotting learning curve +from torch.utils.tensorboard import SummaryWriter + +# 功能函数 +def same_seed(seed): + '''Fixes random number generator seeds for reproducibility.''' + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def train_valid_split(data_set, valid_ratio, seed): + '''Split provided training data into training set and validation set''' + valid_set_size = int(valid_ratio * len(data_set)) + train_set_size = len(data_set) - valid_set_size + train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed)) + return np.array(train_set), np.array(valid_set) + + +def predict(test_loader, model, device): + model.eval() # Set your model to evaluation mode. + preds = [] + for x in tqdm(test_loader): + x = x.to(device) + with torch.no_grad(): + pred = model(x) + preds.append(pred.detach().cpu()) + preds = torch.cat(preds, dim=0).numpy() + return preds + + +## 数据 + +class COVID19Dataset(Dataset): + ''' + x: Features. + y: Targets, if none, do prediction. + ''' + def __init__(self, x, y=None): + if y is None: + self.y = y + else: + self.y = torch.FloatTensor(y) + self.x = torch.FloatTensor(x) + + + def __getitem__(self, idx): + if self.y is None: + return self.x[idx] + else: + return self.x[idx], self.y[idx] + + def __len__(self): + return len(self.x) + + +## 神经网络 + +class My_Model(nn.Module): + def __init__(self, input_dim): + super(My_Model, self).__init__() + # TODO: modify model's structure, be aware of dimensions. + self.layers = nn.Sequential( + nn.Linear(input_dim, 64), + nn.Dropout(p=0.2), + nn.ReLU(), + nn.Linear(64, 8), + nn.Dropout(p=0.1), + nn.LeakyReLU(0.6), + nn.Linear(8, 1), + ) + + def forward(self, x): + # x = self.dropout(x) + # print('dropout 输出', x) + x = self.layers(x) + x = x.squeeze(1) # (B, 1) -> (B) + return x + +config = { + 'seed': 404, # Your seed number, you can pick your lucky number. :) + 'select_all': False, # Whether to use all features. + 'valid_ratio': 0.2, # validation_size = train_size * valid_ratio + 'n_epochs': 3000, # Number of epochs. + 'batch_size': 512, + 'learning_rate': 1e-3, + # 'weight_decay': 1e-3, + 'weight_decay': 0, + 'early_stop': 400, # If model has not improved for this many consecutive epochs, stop training. + 'save_path': './models/model.ckpt' # Your model will be saved here. +} + +def select_feat(train_data, valid_data, test_data, select_all=True): + '''Selects useful features to perform regression''' + y_train, y_valid = train_data[:, -1], valid_data[:, -1] + raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data + print('select_feats', raw_x_train[0]) + print('select_feat default idx', raw_x_train.shape[1]) + all_feats = 'id,AL,AK,AZ,AR,CA,CO,CT,FL,GA,ID,IL,IN,IA,KS,KY,LA,MD,MA,MI,MN,MS,MO,NE,NV,NJ,NM,NY,NC,OH,OK,OR,RI,SC,TX,UT,VA,WA,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances'.split(',') + # states_feats = all_feats[1:38] + # illness = np.array(['cli','ili','hh_cmnty_cli','nohh_cmnty_cli','tested_positive']) + # valid_feats = 'AL,AK,AZ,AR,CA,CO,CT,FL,GA,ID,IL,IN,IA,KS,KY,LA,MD,MA,MI,MN,MS,MO,NE,NV,NJ,NM,NY,NC,OH,OK,OR,RI,SC,TX,UT,VA,WA,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,tested_positive'.split(',') + valid_feats = 'cli,ili,hh_cmnty_cli,nohh_cmnty_cli,tested_positive'.split(',') + + valid_idx = [] + for i in list(range(len(all_feats))): + x = all_feats[i] + if x in valid_feats: + valid_idx.append(i) + print(valid_idx) + if select_all: + feat_idx = list(range(raw_x_train.shape[1])) + else: + # feat_idx = [0, 1, 2, 3, 4] # TODO: Select suitable feature columns. + feat_idx = valid_idx + return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], y_train, y_valid + + +def trainer(train_loader, valid_loader, model, config, device): + criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this. + # Define your optimization algorithm. + # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms. + # TODO: L2 regularization (optimizer(weight decay...) or implement by your self). + + # optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9, weight_decay=config['weight_decay']) + # params_dict = [{ + # 'params': model.layers[0].parameters(), + # 'lr': 0.001, + # }, { + # 'params': model.layers[1].parameters(), + # 'lr': 0.01, + # # 'weight_decay': config['weight_decay'], + # }] + optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'],weight_decay=config['weight_decay'], amsgrad=False) + # optimizer = torch.optim.Adam(params_dict) + # 余弦退火 + # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, verbose=True) + # scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, min_lr=1e-7) + # scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-7, verbose=True) + writer = SummaryWriter() # Writer of tensoboard. + + if not os.path.isdir('./models'): + os.mkdir('./models') # Create directory of saving models. + + n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0 + + for epoch in range(n_epochs): + model.train() # Set your model to train mode. + loss_record = [] + + # tqdm is a package to visualize your training progress. + train_pbar = tqdm(train_loader, position=0, leave=True) + + for x, y in train_pbar: + optimizer.zero_grad() # Set gradient to zero. + x, y = x.to(device), y.to(device) # Move your data to device. + pred = model(x) + loss = criterion(pred, y) + loss.backward() # Compute gradient(backpropagation). + optimizer.step() # Update parameters. + step += 1 + loss_record.append(loss.detach().item()) + + # Display current epoch number and loss on tqdm progress bar. + train_pbar.set_description(f'Epoch [{epoch + 1}/{n_epochs}]') + train_pbar.set_postfix({'loss': loss.detach().item()}) + # + # scheduler.step(epoch) + mean_train_loss = sum(loss_record) / len(loss_record) + writer.add_scalar('Loss/train', mean_train_loss, step) + + model.eval() # Set your model to evaluation mode. + loss_record = [] + for x, y in valid_loader: + x, y = x.to(device), y.to(device) + with torch.no_grad(): + pred = model(x) + loss = criterion(pred, y) + loss_record.append(loss.item()) + + mean_valid_loss = sum(loss_record) / len(loss_record) + print(f'Epoch [{epoch + 1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}') + writer.add_scalar('Loss/valid', mean_valid_loss, step) + + if mean_valid_loss < best_loss: + best_loss = mean_valid_loss + torch.save(model.state_dict(), config['save_path']) # Save your best model + print('Saving model with loss {:.3f}...'.format(best_loss)) + early_stop_count = 0 + else: + early_stop_count += 1 + + if early_stop_count >= config['early_stop']: + print('\nModel is not improving, so we halt the training session.') + print('\n Mean valid loss', mean_valid_loss) + print('\n Best loss', best_loss) + return + + + +device = 'cuda' if torch.cuda.is_available() else 'cpu' + + +# Set seed for reproducibility +same_seed(config['seed']) + + +# train_data size: 2699 x 118 (id + 37 states + 16 features x 5 days) +# test_data size: 1078 x 117 (without last day's positive rate) +train_data, test_data = pd.read_csv('./data/covid.train.csv').values, pd.read_csv('./data/covid.test.csv').values +train_data, valid_data = train_valid_split(train_data, config['valid_ratio'], config['seed']) + +# Print out the data size. +print(f"""train_data size: {train_data.shape} +valid_data size: {valid_data.shape} +test_data size: {test_data.shape}""") + +# Select features +x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all']) + +# Print out the number of features. +print(f'number of features: {x_train.shape[1]}') + +# train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \ +# COVID19Dataset(x_valid, y_valid), \ +# COVID19Dataset(x_test) +train_dataset = COVID19Dataset(x_train, y_train) +valid_dataset = COVID19Dataset(x_valid, y_valid) +test_dataset = COVID19Dataset(x_test) + + +# Pytorch data loader loads pytorch dataset into batches. +train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True) +valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True) +test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True) + + +model = My_Model(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device. +trainer(train_loader, valid_loader, model, config, device) + +def save_pred(preds, file): + ''' Save predictions to specified file ''' + with open(file, 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['id', 'tested_positive']) + for i, p in enumerate(preds): + writer.writerow([i, p]) + +# 预测测试集 +model.load_state_dict(torch.load(config['save_path'])) +preds = predict(test_loader, model, device) +save_pred(preds, 'pred.csv') + + diff --git a/HW01/zhengquanwei028-kaggle-02.png b/HW01/zhengquanwei028-kaggle-02.png new file mode 100644 index 00000000..e7d013d2 Binary files /dev/null and b/HW01/zhengquanwei028-kaggle-02.png differ diff --git a/HW01/zhengquanwei028-kaggle.png b/HW01/zhengquanwei028-kaggle.png new file mode 100644 index 00000000..9332db05 Binary files /dev/null and b/HW01/zhengquanwei028-kaggle.png differ diff --git a/HW02/zhengquanwei028-HW02.py b/HW02/zhengquanwei028-HW02.py new file mode 100644 index 00000000..19364672 --- /dev/null +++ b/HW02/zhengquanwei028-HW02.py @@ -0,0 +1,375 @@ +import os +import random +import pandas as pd +import torch +from tqdm import tqdm + +def load_feat(path): + feat = torch.load(path) + return feat + +def shift(x, n): + if n < 0: + left = x[0].repeat(-n, 1) + right = x[:n] + + elif n > 0: + right = x[-1].repeat(n, 1) + left = x[n:] + else: + return x + + return torch.cat((left, right), dim=0) + +def concat_feat(x, concat_n): + assert concat_n % 2 == 1 # n must be odd + if concat_n < 2: + return x + seq_len, feature_dim = x.size(0), x.size(1) + x = x.repeat(1, concat_n) + x = x.view(seq_len, concat_n, feature_dim).permute(1, 0, 2) # concat_n, seq_len, feature_dim + mid = (concat_n // 2) + for r_idx in range(1, mid+1): + x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx) + x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx) + + return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim) + +def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8, train_val_seed=1337): + class_num = 41 # NOTE: pre-computed, should not need change + mode = 'train' if (split == 'train' or split == 'val') else 'test' + + label_dict = {} + if mode != 'test': + phone_file = open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines() + + for line in phone_file: + line = line.strip('\n').split(' ') + label_dict[line[0]] = [int(p) for p in line[1:]] + + if split == 'train' or split == 'val': + # split training and validation data + usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines() + random.seed(train_val_seed) + random.shuffle(usage_list) + percent = int(len(usage_list) * train_ratio) + usage_list = usage_list[:percent] if split == 'train' else usage_list[percent:] + elif split == 'test': + usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines() + else: + raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!') + + usage_list = [line.strip('\n') for line in usage_list] + print('[Dataset] - # phone classes: ' + str(class_num) + ', number of utterances for ' + split + ': ' + str(len(usage_list))) + + max_len = 3000000 + X = torch.empty(max_len, 39 * concat_nframes) + if mode != 'test': + y = torch.empty(max_len, dtype=torch.long) + + idx = 0 + for i, fname in tqdm(enumerate(usage_list)): + feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt')) + cur_len = len(feat) + feat = concat_feat(feat, concat_nframes) + if mode != 'test': + label = torch.LongTensor(label_dict[fname]) + + X[idx: idx + cur_len, :] = feat + if mode != 'test': + y[idx: idx + cur_len] = label + + idx += cur_len + + X = X[:idx, :] + if mode != 'test': + y = y[:idx] + + print(f'[INFO] {split} set') + print(X.shape) + if mode != 'test': + print(y.shape) + return X, y + else: + return X + + + + + +# dataset + +import torch +from torch.utils.data import Dataset +from torch.utils.data import DataLoader + + +class LibriDataset(Dataset): + def __init__(self, X, y=None): + self.data = X + if y is not None: + self.label = torch.LongTensor(y) + else: + self.label = None + + def __getitem__(self, idx): + if self.label is not None: + return self.data[idx], self.label[idx] + else: + return self.data[idx] + + def __len__(self): + return len(self.data) + + + + +# model + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# + +from torch.autograd import Variable +from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence + +class BasicBlock(nn.Module): + def __init__(self, input_dim, output_dim): + super(BasicBlock, self).__init__() + + self.block = nn.Sequential( + nn.Linear(input_dim, output_dim), + nn.ReLU(), + nn.BatchNorm1d(output_dim), + nn.Dropout(0.25), + ) + + def forward(self, x): + x = self.block(x) + return x + + +class Classifier(nn.Module): + def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256): + super(Classifier, self).__init__() + + self.fc = nn.Sequential( + BasicBlock(input_dim, hidden_dim), + *[BasicBlock(hidden_dim, hidden_dim) for _ in range(hidden_layers)], + nn.Linear(hidden_dim, output_dim) + ) + + def forward(self, x): + x = self.fc(x) + return x + +# Hyper-parameters +# config +# data prarameters +concat_nframes = 17 # the number of frames to concat with, n must be odd (total 2k+1 = n frames) +train_ratio = 0.8 # the ratio of data used for training, the rest will be used for validation + +# training parameters +seed = 1024 # random seed +batch_size = 2048 # batch size +num_epoch = 50 # the number of training epoch +learning_rate = 0.0001 # learning rate +model_path = './model.ckpt' # the path where the checkpoint will be saved + +# model parameters +input_dim = 39 * concat_nframes # the input dim of the model, you should not change the value +hidden_layers = 12 # the number of hidden layers +hidden_dim = 1024 # the hidden dim +# RNN + +class RNN_Classifier(nn.Module): + def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256): + super(RNN_Classifier, self).__init__() + self.rnn = nn.RNN(input_size=input_dim, hidden_size=hidden_dim, num_layers=hidden_layers, batch_first=True, dropout=0.25) + self.fc = nn.Linear(hidden_dim, output_dim) + + def forward(self, x): + out, (h_n, c_n) = self.rnn(x, None) + # 应该给 fc 的和 out + out = self.fc(out) + return out + + + +# Prepare dataset and model + +import gc + +# preprocess data +train_X, train_y = preprocess_data(split='train', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio) +val_X, val_y = preprocess_data(split='val', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio) + +# get dataset +train_set = LibriDataset(train_X, train_y) +val_set = LibriDataset(val_X, val_y) + +# remove raw feature to save memory +del train_X, train_y, val_X, val_y +gc.collect() + +# get dataloader +train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) +val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False) + + + +device = 'cuda:0' if torch.cuda.is_available() else 'cpu' +print(f'DEVICE: {device}') + + + +import numpy as np + +#fix seed +def same_seeds(seed): + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + np.random.seed(seed) + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + +# fix random seed +same_seeds(seed) + +# create model, define a loss function, and optimizer +model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device) +# model = RNN_Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device) +criterion = nn.CrossEntropyLoss() +optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) + + +# training + +best_acc = 0.0 +for epoch in range(num_epoch): + train_acc = 0.0 + train_loss = 0.0 + val_acc = 0.0 + val_loss = 0.0 + + # training + model.train() # set the model to training mode + for i, batch in enumerate(tqdm(train_loader)): + features, labels = batch + features = features.to(device) + labels = labels.to(device) + + optimizer.zero_grad() + outputs = model(features) + # outputs = model(torch.unsqueeze(features, dim=1)).to(device) + outputs = torch.squeeze(outputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability + train_acc += (train_pred.detach() == labels.detach()).sum().item() + train_loss += loss.item() + + # validation + if len(val_set) > 0: + model.eval() # set the model to evaluation mode + with torch.no_grad(): + for i, batch in enumerate(tqdm(val_loader)): + features, labels = batch + features = features.to(device) + labels = labels.to(device) + outputs = model(features) + # RNN + # outputs = model(torch.unsqueeze(features, dim=1)).to(device) + # outputs = torch.squeeze(outputs) + loss = criterion(outputs) + _, val_pred = torch.max(outputs, 1) + val_acc += ( + val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability + val_loss += loss.item() + + print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format( + epoch + 1, num_epoch, train_acc / len(train_set), train_loss / len(train_loader), + val_acc / len(val_set), val_loss / len(val_loader) + )) + + # if the model improves, save a checkpoint at this epoch + if val_acc > best_acc: + best_acc = val_acc + torch.save(model.state_dict(), model_path) + print('saving model with acc {:.3f}'.format(best_acc / len(val_set))) + else: + print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format( + epoch + 1, num_epoch, train_acc / len(train_set), train_loss / len(train_loader) + )) + +# if not validating, save the last epoch +if len(val_set) == 0: + torch.save(model.state_dict(), model_path) + print('saving model at last epoch') + + +# save params + +print('===== 打印参数 =====') +print('concat_nframes: ', concat_nframes) +print('seed: ', seed) +print('batch_size: ', batch_size) +print('num_epoch: ', num_epoch) +print('learning_rate: ', learning_rate) +print('learning_rate: ', learning_rate) +print('input_dim: ', input_dim) +print('hidden_layers: ', hidden_layers) +print('hidden_dim: ', hidden_dim) +print('===== end =====') + + +del train_loader, val_loader +gc.collect() + +# Testing + +# load data +test_X = preprocess_data(split='test', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes) +test_set = LibriDataset(test_X, None) +test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False) + + +# load model +model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device) +# model = RNN_Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device) +# model = torch.nn.DataParallel(model) +model.load_state_dict(torch.load(model_path)) + + +# Make prediction. + +test_acc = 0.0 +test_lengths = 0 +pred = np.array([], dtype=np.int32) + +model.eval() +with torch.no_grad(): + for i, batch in enumerate(tqdm(test_loader)): + features = batch + features = features.to(device) + outputs = model(features) + # outputs = model(torch.unsqueeze(features, dim=1)).to(device) + # _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability + _, test_pred = torch.max(torch.squeeze(outputs), 1) + pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0) + + + +with open('prediction.csv', 'w') as f: + f.write('Id,Class\n') + for i, y in enumerate(pred): + f.write('{},{}\n'.format(i, y)) + + + diff --git a/HW03/HW03.py b/HW03/HW03.py new file mode 100644 index 00000000..fafd0a54 --- /dev/null +++ b/HW03/HW03.py @@ -0,0 +1,470 @@ +import torchvision.utils + +_exp_name = "sample" + +# Import necessary packages. +import numpy as np +import pandas as pd +import torch +import os +import torch.nn as nn +import torchvision.transforms as transforms +import torchvision.transforms.functional as functional +from PIL import Image +# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning. +from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset +from torchvision.datasets import DatasetFolder, VisionDataset +from torchvision.models import resnet18, vgg16 + +# This is for the progress bar. +from tqdm.auto import tqdm +import random +import matplotlib.pyplot as plt +import sys + +myseed = 2333 # set a random seed for reproducibility +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False +np.random.seed(myseed) +torch.manual_seed(myseed) +if torch.cuda.is_available(): + torch.cuda.manual_seed_all(myseed) + +# Normally, We don't need augmentations in testing and validation. +# All we need here is to resize the PIL image and transform it into Tensor. +test_tfm = transforms.Compose([ + transforms.Resize((128, 128)), + transforms.ToTensor(), +]) +# normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5]) +normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) +# img_grid = torchvision.utils.make_grid() + +def imshow(imgs): + # imgs = imgs / 2 + 0.5 + plt.imshow(imgs) + plt.show() + +# However, it is also possible to use augmentation in the testing phase. +# You may use train_tfm to produce a variety of images and then test using ensemble methods +# train_tfm = transforms.Compose([ +# # Resize the image into ap fixed shape (height = width = 128) +# transforms.RandomResizedCrop(256, scale=(0.08, 1.0), ratio=(3./4,4./3)), +# # 水平翻转图像 +# transforms.RandomHorizontalFlip(p=0.5), +# +# transforms.Resize((128, 128)), +# # You may add some transforms here. +# # ToTensor() should be the last one of the transforms. +# transforms.ToTensor(), +# # normalize, +# ]) + +train_tfm = transforms.Compose([ + transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET), + # 水平翻转图像 + transforms.RandomHorizontalFlip(p=0.5), + # 旋转 + transforms.RandomRotation(degrees=(0, 180)), + transforms.Resize((128, 128)), + transforms.ToTensor(), + # normalize, +]) + + +class FoodDataset(Dataset): + + def __init__(self, path, tfm=test_tfm, files=None): + super(FoodDataset).__init__() + self.path = path + self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")]) + if files != None: + self.files = files + print(f"One {path} sample", self.files[0]) + self.transform = tfm + + def __len__(self): + return len(self.files) + + def __getitem__(self, idx): + fname = self.files[idx] + im = Image.open(fname) + im = self.transform(im) + # im = self.data[idx] + try: + label = int(fname.split("/")[-1].split("_")[0]) + except: + label = -1 # test has no label + return im, label + + +class Classifier(nn.Module): + def __init__(self): + super(Classifier, self).__init__() + # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) + # torch.nn.MaxPool2d(kernel_size, stride, padding) + # input 維度 [3, 128, 128] + self.cnn = nn.Sequential( + nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128] + nn.BatchNorm2d(64), + nn.ReLU(), + nn.MaxPool2d(2, 2, 0), # [64, 64, 64] + + nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64] + nn.BatchNorm2d(128), + nn.ReLU(), + nn.MaxPool2d(2, 2, 0), # [128, 32, 32] + + nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32] + nn.BatchNorm2d(256), + nn.ReLU(), + nn.MaxPool2d(2, 2, 0), # [256, 16, 16] + + nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16] + nn.BatchNorm2d(512), + nn.ReLU(), + nn.MaxPool2d(2, 2, 0), # [512, 8, 8] + + nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8] + nn.BatchNorm2d(512), + nn.ReLU(), + nn.MaxPool2d(2, 2, 0), # [512, 4, 4] + ) + self.fc = nn.Sequential( + nn.Linear(512 * 4 * 4, 1024), + nn.ReLU(), + nn.Linear(1024, 512), + nn.ReLU(), + nn.Linear(512, 11) + ) + + def forward(self, x): + out = self.cnn(x) + out = out.view(out.size()[0], -1) + return self.fc(out) + + +# ResNet +class Residual_Network(nn.Module): + def __init__(self): + super(Residual_Network, self).__init__() + + self.cnn_layer1 = nn.Sequential( + nn.Conv2d(3, 64, 3, 1, 1), + nn.BatchNorm2d(64), + ) + + self.cnn_layer2 = nn.Sequential( + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + ) + + self.cnn_layer3 = nn.Sequential( + nn.Conv2d(64, 128, 3, 2, 1), + nn.BatchNorm2d(128), + ) + + self.cnn_layer4 = nn.Sequential( + nn.Conv2d(128, 128, 3, 1, 1), + nn.BatchNorm2d(128), + ) + self.cnn_layer5 = nn.Sequential( + nn.Conv2d(128, 256, 3, 2, 1), + nn.BatchNorm2d(256), + ) + self.cnn_layer6 = nn.Sequential( + nn.Conv2d(256, 256, 3, 1, 1), + nn.BatchNorm2d(256), + ) + self.fc_layer = nn.Sequential( + nn.Linear(256 * 32 * 32, 256), + nn.ReLU(), + nn.Linear(256, 11) + ) + self.relu = nn.ReLU() + + def forward(self, x): + # input (x): [batch_size, 3, 128, 128] + # output: [batch_size, 11] + + # Extract features by convolutional layers. + x1 = self.cnn_layer1(x) + + x1 = self.relu(x1) + + x2 = self.cnn_layer2(x1) + + x2 = self.relu(x2) + + x3 = self.cnn_layer3(x2) + + x3 = self.relu(x3) + + x4 = self.cnn_layer4(x3) + + x4 = self.relu(x4) + + x5 = self.cnn_layer5(x4) + + x5 = self.relu(x5) + + x6 = self.cnn_layer6(x5) + + x6 = self.relu(x6) + + # The extracted feature map must be flatten before going to fully-connected layers. + xout = x6.flatten(1) + + # The features are transformed by fully-connected layers to obtain the final logits. + xout = self.fc_layer(xout) + return xout + + + +batch_size = 128 +_dataset_dir = "./food11" +# Construct datasets. +# The argument "loader" tells how torchvision reads the data. +train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm) +train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) +valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm) +valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) + +# "cuda" only when GPUs are available. +device = "cuda" if torch.cuda.is_available() else "cpu" + +# The number of training epochs and patience. +n_epochs = 50 +patience = 300 # If no improvement in 'patience' epochs, early stop +lr = 0.0003 +weight_decay = 1e-5 +# Initialize a model, and put it on the device specified. +# model = Classifier().to(device) + +# model = Residual_Network().to(device) +model = vgg16().to(device) +model2 = resnet18().to(device) + +# For the classification task, we use cross-entropy as the measurement of performance. +criterion = nn.CrossEntropyLoss() + +criterion2 = nn.CrossEntropyLoss() + +# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own. +optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) + +optimizer2 = torch.optim.Adam(model2.parameters(), lr=lr, weight_decay=weight_decay) + +# 设置模型的权重 +weight_1 = 0.4 +weight_2 = 0.6 + +# Initialize trackers, these are not parameters and should not be changed +stale = 0 +best_acc = 0 + + +# 超参数 +print('-- 训练参数 --') +print('seed %d' % myseed) +print('batch_size %d' % batch_size) +print('n_epochs %d' % n_epochs) +print('patience %d' % patience) +print('lr %f' % lr) +print('weight_decay %f' % weight_decay) +print('-------------') +for epoch in range(n_epochs): + + # ---------- Training ---------- + # Make sure the model is in train mode before training. + model.train() + model2.train() + # These are used to record information in training. + train_loss = [] + train_accs = [] + train_loss_2 = [] + train_accs_2 = [] + # 模型集成的准确率 + ensemble_accs = [] + for batch in tqdm(train_loader): + # A batch consists of image data and corresponding labels. + imgs, labels = batch + # imgs = imgs.half() + # print(imgs.shape,labels.shape) + # for im in imgs: + # img_2 = functional.to_pil_image(im) + # plt.imshow(img_2) + # plt.show() + + # sys.exit(0) + # Forward the data. (Make sure data and model are on the same device.) + logits = model(imgs.to(device)) + logits2 = model2(imgs.to(device)) + ensemble_logits = logits * weight_1 + logits2 * weight_2 + # Calculate the cross-entropy loss. + # We don't need to apply softmax before computing cross-entropy as it is done automatically. + loss = criterion(logits, labels.to(device)) + loss2 = criterion2(logits2, labels.to(device)) + # Gradients stored in the parameters in the previous step should be cleared out first. + optimizer.zero_grad() + optimizer2.zero_grad() + # Compute the gradients for parameters. + loss.backward() + loss2.backward() + # Clip the gradient norms for stable training. + grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10) + grad_norm2 = nn.utils.clip_grad_norm_(model2.parameters(), max_norm=10) + + # Update the parameters with computed gradients. + optimizer.step() + optimizer2.step() + # Compute the accuracy for current batch. + acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean() + acc2 = (logits2.argmax(dim=-1) == labels.to(device)).float().mean() + ensemble_acc = (ensemble_logits.argmax(dim=-1) == labels.to(device)).float().mean() + # Record the loss and accuracy. + train_loss.append(loss.item()) + train_accs.append(acc) + train_loss_2.append(loss2.item()) + train_accs_2.append(acc2) + ensemble_accs.append(ensemble_acc) + train_loss = sum(train_loss) / len(train_loss) + train_acc = sum(train_accs) / len(train_accs) + train_loss_2 = sum(train_loss_2) / len(train_loss_2) + train_accs_2 = sum(train_accs_2) / len(train_accs_2) + ensemble_accs = sum(ensemble_accs) / len(ensemble_accs) + # Print the information. + print(f"[ Train VGG16 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}") + print(f"[ Train Resnet18 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss_2:.5f}, acc = {train_accs_2:.5f}") + print(f"[ Train Ensemble | {epoch + 1:03d}/{n_epochs:03d} ] loss = {ensemble_accs:.5f}, acc = {ensemble_accs:.5f}") + # ---------- Validation ---------- + # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally. + model.eval() + model2.eval() + # These are used to record information in validation. + valid_loss = [] + valid_accs = [] + valid_loss_2 = [] + valid_accs_2 = [] + # 模型集成的准确率 + ensemble_accs = [] + # Iterate the validation set by batches. + for batch in tqdm(valid_loader): + # A batch consists of image data and corresponding labels. + imgs, labels = batch + # imgs = imgs.half() + + # We don't need gradient in validation. + # Using torch.no_grad() accelerates the forward process. + with torch.no_grad(): + logits = model(imgs.to(device)) + logits2 = model2(imgs.to(device)) + ensemble_logits = logits * weight_1 + logits2 * weight_2 + # We can still compute the loss (but not the gradient). + loss = criterion(logits, labels.to(device)) + loss2 = criterion2(logits2, labels.to(device)) + # Compute the accuracy for current batch. + acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean() + acc2 = (logits2.argmax(dim=-1) == labels.to(device)).float().mean() + ensemble_acc = (ensemble_logits.argmax(dim=-1) == labels.to(device)).float().mean() + # Record the loss and accuracy. + valid_loss.append(loss.item()) + valid_accs.append(acc) + valid_loss_2.append(loss2.item()) + valid_accs_2.append(acc2) + ensemble_accs.append(ensemble_acc) + # break + + # The average loss and accuracy for entire validation set is the average of the recorded values. + valid_loss = sum(valid_loss) / len(valid_loss) + valid_acc = sum(valid_accs) / len(valid_accs) + valid_loss_2 = sum(valid_loss_2) / len(valid_loss_2) + valid_accs_2 = sum(valid_accs_2) / len(valid_accs_2) + ensemble_accs = sum(ensemble_accs) / len(ensemble_accs) + + # Print the information. + print(f"[ Valid VGG16 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}") + print(f"[ Valid Resnet18 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss_2:.5f}, acc = {valid_accs_2:.5f}") + print(f"[ Train Ensemble | {epoch + 1:03d}/{n_epochs:03d} ] loss = {ensemble_accs:.5f}, acc = {ensemble_accs:.5f}") + + # update logs + if valid_acc > best_acc: + with open(f"./{_exp_name}_log.txt", "a"): + print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best") + else: + with open(f"./{_exp_name}_log.txt", "a"): + print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}") + + # save models + if valid_acc > best_acc: + print(f"Best model found at epoch {epoch}, saving model") + torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error + best_acc = valid_acc + stale = 0 + else: + stale += 1 + if stale > patience: + print(f"No improvment {patience} consecutive epochs, early stopping") + break + + # ensemble update logs + if ensemble_accs > best_acc: + with open(f"./{_exp_name}_log.txt", "a"): + print(f"[ Model_1 Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best") + print(f"[ Model_2 Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss_2:.5f}, acc = {valid_accs_2:.5f} -> best") + print(f"[ Ensemble Model_1 Valid | {epoch + 1:03d}/{n_epochs:03d} ] ensemble acc = {ensemble_accs:.5f} -> best") + else: + with open(f"./{_exp_name}_log.txt", "a"): + print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}") + print(f"[ Model_2 Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss_2:.5f}, acc = {valid_accs_2:.5f} -> best") + # ensemble save models + if ensemble_accs > best_acc: + print(f"Best model found at epoch {epoch}, saving model") + torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error + torch.save(model2.state_dict(), f"{_exp_name}_2_best.ckpt") # only save best to prevent output memory exceed error + best_acc = ensemble_accs + stale = 0 + else: + stale += 1 + if stale > patience: + print(f"No improvment {patience} consecutive epochs, early stopping") + break + +test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm) +test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True) + +# model_best = Classifier().to(device) +# model_best = Residual_Network().to(device) +model_best = vgg16().to(device) +model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt")) +model_best.eval() + +model2_best = resnet18().to(device) +model2_best.load_state_dict(torch.load(f"{_exp_name}_2_best.ckpt")) +model2_best.eval() +prediction = [] +with torch.no_grad(): + for data,_ in test_loader: + test_pred = model_best(data.to(device)) + test_pred_2 = model2_best(data.to(device)) + test_label = np.argmax((test_pred * weight_1 + test_pred_2 * weight_2).cpu().data.numpy(), axis=1) + prediction += test_label.squeeze().tolist() + + +#create test csv +def pad4(i): + return "0"*(4-len(str(i)))+str(i) +df = pd.DataFrame() +df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)] +df["Category"] = prediction +df.to_csv("submission.csv",index = False) + +# 超参数 +print('-- 训练参数 --') +print('seed %d' % myseed) +print('batch_size %d' % batch_size) +print('n_epochs %d' % n_epochs) +print('patience %d' % patience) +print('lr %f' % lr) +print('weight_decay %f' % weight_decay) +print('-------------') \ No newline at end of file diff --git a/HW03/note.md b/HW03/note.md new file mode 100644 index 00000000..2ca5f5d8 --- /dev/null +++ b/HW03/note.md @@ -0,0 +1,144 @@ + +# + + + nn.Conv2d(3, 64, 3, 1, 1) + 输入 通道数 3 + 输出 通道数 64, + 卷积核 3 * 3 + 步长 为 1 + padding 填充 1 + +```python + +torch.nn.Conv2d(in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + padding_mode='zeros', + device=None, + dtype=None) +``` + + + +# 1、第一次训练 + +未修改模型, 修改了图片增强的方法 +```python +train_tfm = transforms.Compose([ + transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET), + # 水平翻转图像 + transforms.RandomHorizontalFlip(p=0.5), + # 旋转 + transforms.RandomRotation(degrees=(0, 180)), + transforms.Resize((128, 128)), + transforms.ToTensor(), + # normalize, +]) + +``` + +训练参数 + +```bash +-- 训练参数 -- + +seed 6666 +batch_size 64 +n_epochs 50 +patience 300 +lr 0.000300 +weight_decay 0.000010 +------------- + +``` + +# 2、第二次训练 + +增大 batch_size, 延长训练次数 + + +```python +-- 训练参数 -- +seed 6666 +batch_size 128 +n_epochs 100 +patience 300 +lr 0.000300 +weight_decay 0.000010 +------------- + +``` + +kaggle 评分 + +``` +Score: 0.76195 +Private score: 0.73282 +``` + +# 3、第三次训练 + +修改模型,使用示例中的 Residual_Network ,降低 batch_size + + +```python +-- 训练参数 -- +seed 6666 +batch_size 64 +n_epochs 100 +patience 300 +lr 0.000300 +weight_decay 0.000010 +``` +acc = 0.69709 -> best + +# 4、第四次训练 + +修改模型,使用 `resnet18` + +```python +-- 训练参数 -- +seed 6666 +batch_size 64 +n_epochs 100 +patience 300 +lr 0.000300 +weight_decay 0.000010 + +``` +0.72151 + + +# 5、第五次训练 +修改模型,使用 `resnet18`,延长训练时间 200个 epoch + +```python +-- 训练参数 -- +seed 6666 +batch_size 64 +n_epochs 200 +patience 300 +lr 0.000300 +weight_decay 0.000010 +``` + +测试集准确率:`0.75332` +训练集准确率:`0.90812` + +kaggle 结果 + +```python +private_score: 0.77934 +public_score: 0.77988 + +``` + + +# 6、第六次训练 +使用模型集成