diff --git a/HW01/zhengquanwei028-HW01.py b/HW01/zhengquanwei028-HW01.py
new file mode 100644
index 00000000..8fe5c222
--- /dev/null
+++ b/HW01/zhengquanwei028-HW01.py
@@ -0,0 +1,276 @@
+# HW1
+# Numerical Operations
+import math
+import numpy as np
+
+# Reading/Writing Data
+import pandas as pd
+import os
+import csv
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+# For Progress Bar
+from tqdm import tqdm
+
+# Pytorch
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader, random_split
+from torch.optim.lr_scheduler import CosineAnnealingLR,CosineAnnealingWarmRestarts,StepLR, ReduceLROnPlateau
+
+# For plotting learning curve
+from torch.utils.tensorboard import SummaryWriter
+
+# 功能函数
+def same_seed(seed):
+    '''Fixes random number generator seeds for reproducibility.'''
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+
+
+def train_valid_split(data_set, valid_ratio, seed):
+    '''Split provided training data into training set and validation set'''
+    valid_set_size = int(valid_ratio * len(data_set))
+    train_set_size = len(data_set) - valid_set_size
+    train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed))
+    return np.array(train_set), np.array(valid_set)
+
+
+def predict(test_loader, model, device):
+    model.eval() # Set your model to evaluation mode.
+    preds = []
+    for x in tqdm(test_loader):
+        x = x.to(device)
+        with torch.no_grad():
+            pred = model(x)
+            preds.append(pred.detach().cpu())
+    preds = torch.cat(preds, dim=0).numpy()
+    return preds
+
+
+## 数据
+
+class COVID19Dataset(Dataset):
+    '''
+    x: Features.
+    y: Targets, if none, do prediction.
+    '''
+    def __init__(self, x, y=None):
+        if y is None:
+            self.y = y
+        else:
+            self.y = torch.FloatTensor(y)
+        self.x = torch.FloatTensor(x)
+
+
+    def __getitem__(self, idx):
+        if self.y is None:
+            return self.x[idx]
+        else:
+            return self.x[idx], self.y[idx]
+
+    def __len__(self):
+        return len(self.x)
+
+
+## 神经网络
+
+class My_Model(nn.Module):
+    def __init__(self, input_dim):
+        super(My_Model, self).__init__()
+        # TODO: modify model's structure, be aware of dimensions.
+        self.layers = nn.Sequential(
+            nn.Linear(input_dim, 64),
+            nn.Dropout(p=0.2),
+            nn.ReLU(),
+            nn.Linear(64, 8),
+            nn.Dropout(p=0.1),
+            nn.LeakyReLU(0.6),
+            nn.Linear(8, 1),
+        )
+
+    def forward(self, x):
+        # x = self.dropout(x)
+        # print('dropout 输出', x)
+        x = self.layers(x)
+        x = x.squeeze(1) # (B, 1) -> (B)
+        return x
+
+config = {
+    'seed': 404,      # Your seed number, you can pick your lucky number. :)
+    'select_all': False,   # Whether to use all features.
+    'valid_ratio': 0.2,   # validation_size = train_size * valid_ratio
+    'n_epochs': 3000,     # Number of epochs.
+    'batch_size': 512,
+    'learning_rate': 1e-3,
+    # 'weight_decay': 1e-3,
+    'weight_decay': 0,
+    'early_stop': 400,    # If model has not improved for this many consecutive epochs, stop training.
+    'save_path': './models/model.ckpt'  # Your model will be saved here.
+}
+
+def select_feat(train_data, valid_data, test_data, select_all=True):
+    '''Selects useful features to perform regression'''
+    y_train, y_valid = train_data[:, -1], valid_data[:, -1]
+    raw_x_train, raw_x_valid, raw_x_test = train_data[:, :-1], valid_data[:, :-1], test_data
+    print('select_feats', raw_x_train[0])
+    print('select_feat default idx', raw_x_train.shape[1])
+    all_feats = 'id,AL,AK,AZ,AR,CA,CO,CT,FL,GA,ID,IL,IN,IA,KS,KY,LA,MD,MA,MI,MN,MS,MO,NE,NV,NJ,NM,NY,NC,OH,OK,OR,RI,SC,TX,UT,VA,WA,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances,tested_positive,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,wearing_mask,travel_outside_state,work_outside_home,shop,restaurant,spent_time,large_event,public_transit,anxious,depressed,worried_finances'.split(',')
+    # states_feats = all_feats[1:38]
+    # illness = np.array(['cli','ili','hh_cmnty_cli','nohh_cmnty_cli','tested_positive'])
+    # valid_feats = 'AL,AK,AZ,AR,CA,CO,CT,FL,GA,ID,IL,IN,IA,KS,KY,LA,MD,MA,MI,MN,MS,MO,NE,NV,NJ,NM,NY,NC,OH,OK,OR,RI,SC,TX,UT,VA,WA,cli,ili,hh_cmnty_cli,nohh_cmnty_cli,tested_positive'.split(',')
+    valid_feats = 'cli,ili,hh_cmnty_cli,nohh_cmnty_cli,tested_positive'.split(',')
+
+    valid_idx = []
+    for i in list(range(len(all_feats))):
+        x = all_feats[i]
+        if x in valid_feats:
+            valid_idx.append(i)
+    print(valid_idx)
+    if select_all:
+        feat_idx = list(range(raw_x_train.shape[1]))
+    else:
+        # feat_idx = [0, 1, 2, 3, 4]  # TODO: Select suitable feature columns.
+        feat_idx = valid_idx
+    return raw_x_train[:, feat_idx], raw_x_valid[:, feat_idx], raw_x_test[:, feat_idx], y_train, y_valid
+
+
+def trainer(train_loader, valid_loader, model, config, device):
+    criterion = nn.MSELoss(reduction='mean')  # Define your loss function, do not modify this.
+    # Define your optimization algorithm.
+    # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms.
+    # TODO: L2 regularization (optimizer(weight decay...) or implement by your self).
+
+    # optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9, weight_decay=config['weight_decay'])
+    # params_dict = [{
+    #     'params': model.layers[0].parameters(),
+    #     'lr': 0.001,
+    # }, {
+    #     'params': model.layers[1].parameters(),
+    #     'lr': 0.01,
+    #     # 'weight_decay': config['weight_decay'],
+    # }]
+    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'],weight_decay=config['weight_decay'], amsgrad=False)
+    # optimizer = torch.optim.Adam(params_dict)
+    # 余弦退火
+    # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, verbose=True)
+    # scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, min_lr=1e-7)
+    # scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-7, verbose=True)
+    writer = SummaryWriter()  # Writer of tensoboard.
+
+    if not os.path.isdir('./models'):
+        os.mkdir('./models')  # Create directory of saving models.
+
+    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0
+
+    for epoch in range(n_epochs):
+        model.train()  # Set your model to train mode.
+        loss_record = []
+
+        # tqdm is a package to visualize your training progress.
+        train_pbar = tqdm(train_loader, position=0, leave=True)
+
+        for x, y in train_pbar:
+            optimizer.zero_grad()  # Set gradient to zero.
+            x, y = x.to(device), y.to(device)  # Move your data to device.
+            pred = model(x)
+            loss = criterion(pred, y)
+            loss.backward()  # Compute gradient(backpropagation).
+            optimizer.step()  # Update parameters.
+            step += 1
+            loss_record.append(loss.detach().item())
+
+            # Display current epoch number and loss on tqdm progress bar.
+            train_pbar.set_description(f'Epoch [{epoch + 1}/{n_epochs}]')
+            train_pbar.set_postfix({'loss': loss.detach().item()})
+        #
+        # scheduler.step(epoch)
+        mean_train_loss = sum(loss_record) / len(loss_record)
+        writer.add_scalar('Loss/train', mean_train_loss, step)
+
+        model.eval()  # Set your model to evaluation mode.
+        loss_record = []
+        for x, y in valid_loader:
+            x, y = x.to(device), y.to(device)
+            with torch.no_grad():
+                pred = model(x)
+                loss = criterion(pred, y)
+            loss_record.append(loss.item())
+
+        mean_valid_loss = sum(loss_record) / len(loss_record)
+        print(f'Epoch [{epoch + 1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
+        writer.add_scalar('Loss/valid', mean_valid_loss, step)
+
+        if mean_valid_loss < best_loss:
+            best_loss = mean_valid_loss
+            torch.save(model.state_dict(), config['save_path'])  # Save your best model
+            print('Saving model with loss {:.3f}...'.format(best_loss))
+            early_stop_count = 0
+        else:
+            early_stop_count += 1
+
+        if early_stop_count >= config['early_stop']:
+            print('\nModel is not improving, so we halt the training session.')
+            print('\n Mean valid loss', mean_valid_loss)
+            print('\n Best loss', best_loss)
+            return
+
+
+
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+
+# Set seed for reproducibility
+same_seed(config['seed'])
+
+
+# train_data size: 2699 x 118 (id + 37 states + 16 features x 5 days)
+# test_data size: 1078 x 117 (without last day's positive rate)
+train_data, test_data = pd.read_csv('./data/covid.train.csv').values, pd.read_csv('./data/covid.test.csv').values
+train_data, valid_data = train_valid_split(train_data, config['valid_ratio'], config['seed'])
+
+# Print out the data size.
+print(f"""train_data size: {train_data.shape} 
+valid_data size: {valid_data.shape} 
+test_data size: {test_data.shape}""")
+
+# Select features
+x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all'])
+
+# Print out the number of features.
+print(f'number of features: {x_train.shape[1]}')
+
+# train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \
+#                                             COVID19Dataset(x_valid, y_valid), \
+#                                             COVID19Dataset(x_test)
+train_dataset = COVID19Dataset(x_train, y_train)
+valid_dataset = COVID19Dataset(x_valid, y_valid)
+test_dataset = COVID19Dataset(x_test)
+
+
+# Pytorch data loader loads pytorch dataset into batches.
+train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
+valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True)
+test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True)
+
+
+model = My_Model(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device.
+trainer(train_loader, valid_loader, model, config, device)
+
+def save_pred(preds, file):
+    ''' Save predictions to specified file '''
+    with open(file, 'w') as fp:
+        writer = csv.writer(fp)
+        writer.writerow(['id', 'tested_positive'])
+        for i, p in enumerate(preds):
+            writer.writerow([i, p])
+
+# 预测测试集
+model.load_state_dict(torch.load(config['save_path']))
+preds = predict(test_loader, model, device)
+save_pred(preds, 'pred.csv')
+
+
diff --git a/HW01/zhengquanwei028-kaggle-02.png b/HW01/zhengquanwei028-kaggle-02.png
new file mode 100644
index 00000000..e7d013d2
Binary files /dev/null and b/HW01/zhengquanwei028-kaggle-02.png differ
diff --git a/HW01/zhengquanwei028-kaggle.png b/HW01/zhengquanwei028-kaggle.png
new file mode 100644
index 00000000..9332db05
Binary files /dev/null and b/HW01/zhengquanwei028-kaggle.png differ
diff --git a/HW02/zhengquanwei028-HW02.py b/HW02/zhengquanwei028-HW02.py
new file mode 100644
index 00000000..19364672
--- /dev/null
+++ b/HW02/zhengquanwei028-HW02.py
@@ -0,0 +1,375 @@
+import os
+import random
+import pandas as pd
+import torch
+from tqdm import tqdm
+
+def load_feat(path):
+    feat = torch.load(path)
+    return feat
+
+def shift(x, n):
+    if n < 0:
+        left = x[0].repeat(-n, 1)
+        right = x[:n]
+
+    elif n > 0:
+        right = x[-1].repeat(n, 1)
+        left = x[n:]
+    else:
+        return x
+
+    return torch.cat((left, right), dim=0)
+
+def concat_feat(x, concat_n):
+    assert concat_n % 2 == 1 # n must be odd
+    if concat_n < 2:
+        return x
+    seq_len, feature_dim = x.size(0), x.size(1)
+    x = x.repeat(1, concat_n)
+    x = x.view(seq_len, concat_n, feature_dim).permute(1, 0, 2) # concat_n, seq_len, feature_dim
+    mid = (concat_n // 2)
+    for r_idx in range(1, mid+1):
+        x[mid + r_idx, :] = shift(x[mid + r_idx], r_idx)
+        x[mid - r_idx, :] = shift(x[mid - r_idx], -r_idx)
+
+    return x.permute(1, 0, 2).view(seq_len, concat_n * feature_dim)
+
+def preprocess_data(split, feat_dir, phone_path, concat_nframes, train_ratio=0.8, train_val_seed=1337):
+    class_num = 41 # NOTE: pre-computed, should not need change
+    mode = 'train' if (split == 'train' or split == 'val') else 'test'
+
+    label_dict = {}
+    if mode != 'test':
+      phone_file = open(os.path.join(phone_path, f'{mode}_labels.txt')).readlines()
+
+      for line in phone_file:
+          line = line.strip('\n').split(' ')
+          label_dict[line[0]] = [int(p) for p in line[1:]]
+
+    if split == 'train' or split == 'val':
+        # split training and validation data
+        usage_list = open(os.path.join(phone_path, 'train_split.txt')).readlines()
+        random.seed(train_val_seed)
+        random.shuffle(usage_list)
+        percent = int(len(usage_list) * train_ratio)
+        usage_list = usage_list[:percent] if split == 'train' else usage_list[percent:]
+    elif split == 'test':
+        usage_list = open(os.path.join(phone_path, 'test_split.txt')).readlines()
+    else:
+        raise ValueError('Invalid \'split\' argument for dataset: PhoneDataset!')
+
+    usage_list = [line.strip('\n') for line in usage_list]
+    print('[Dataset] - # phone classes: ' + str(class_num) + ', number of utterances for ' + split + ': ' + str(len(usage_list)))
+
+    max_len = 3000000
+    X = torch.empty(max_len, 39 * concat_nframes)
+    if mode != 'test':
+      y = torch.empty(max_len, dtype=torch.long)
+
+    idx = 0
+    for i, fname in tqdm(enumerate(usage_list)):
+        feat = load_feat(os.path.join(feat_dir, mode, f'{fname}.pt'))
+        cur_len = len(feat)
+        feat = concat_feat(feat, concat_nframes)
+        if mode != 'test':
+          label = torch.LongTensor(label_dict[fname])
+
+        X[idx: idx + cur_len, :] = feat
+        if mode != 'test':
+          y[idx: idx + cur_len] = label
+
+        idx += cur_len
+
+    X = X[:idx, :]
+    if mode != 'test':
+      y = y[:idx]
+
+    print(f'[INFO] {split} set')
+    print(X.shape)
+    if mode != 'test':
+      print(y.shape)
+      return X, y
+    else:
+      return X
+
+
+
+
+
+# dataset
+
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import DataLoader
+
+
+class LibriDataset(Dataset):
+    def __init__(self, X, y=None):
+        self.data = X
+        if y is not None:
+            self.label = torch.LongTensor(y)
+        else:
+            self.label = None
+
+    def __getitem__(self, idx):
+        if self.label is not None:
+            return self.data[idx], self.label[idx]
+        else:
+            return self.data[idx]
+
+    def __len__(self):
+        return len(self.data)
+
+
+
+
+# model
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+#
+
+from torch.autograd import Variable
+from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
+
+class BasicBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(BasicBlock, self).__init__()
+
+        self.block = nn.Sequential(
+            nn.Linear(input_dim, output_dim),
+            nn.ReLU(),
+            nn.BatchNorm1d(output_dim),
+            nn.Dropout(0.25),
+        )
+
+    def forward(self, x):
+        x = self.block(x)
+        return x
+
+
+class Classifier(nn.Module):
+    def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256):
+        super(Classifier, self).__init__()
+
+        self.fc = nn.Sequential(
+            BasicBlock(input_dim, hidden_dim),
+            *[BasicBlock(hidden_dim, hidden_dim) for _ in range(hidden_layers)],
+            nn.Linear(hidden_dim, output_dim)
+        )
+
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+
+# Hyper-parameters
+# config
+# data prarameters
+concat_nframes = 17              # the number of frames to concat with, n must be odd (total 2k+1 = n frames)
+train_ratio = 0.8               # the ratio of data used for training, the rest will be used for validation
+
+# training parameters
+seed = 1024                     # random seed
+batch_size = 2048                # batch size
+num_epoch = 50                   # the number of training epoch
+learning_rate = 0.0001          # learning rate
+model_path = './model.ckpt'     # the path where the checkpoint will be saved
+
+# model parameters
+input_dim = 39 * concat_nframes # the input dim of the model, you should not change the value
+hidden_layers = 12               # the number of hidden layers
+hidden_dim = 1024                  # the hidden dim
+# RNN
+
+class RNN_Classifier(nn.Module):
+    def __init__(self, input_dim, output_dim=41, hidden_layers=1, hidden_dim=256):
+        super(RNN_Classifier, self).__init__()
+        self.rnn = nn.RNN(input_size=input_dim, hidden_size=hidden_dim, num_layers=hidden_layers, batch_first=True, dropout=0.25)
+        self.fc = nn.Linear(hidden_dim, output_dim)
+
+    def forward(self, x):
+        out, (h_n, c_n) = self.rnn(x, None)
+        # 应该给 fc 的和 out
+        out = self.fc(out)
+        return out
+
+
+
+# Prepare dataset and model
+
+import gc
+
+# preprocess data
+train_X, train_y = preprocess_data(split='train', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio)
+val_X, val_y = preprocess_data(split='val', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes, train_ratio=train_ratio)
+
+# get dataset
+train_set = LibriDataset(train_X, train_y)
+val_set = LibriDataset(val_X, val_y)
+
+# remove raw feature to save memory
+del train_X, train_y, val_X, val_y
+gc.collect()
+
+# get dataloader
+train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
+val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
+
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+print(f'DEVICE: {device}')
+
+
+
+import numpy as np
+
+#fix seed
+def same_seeds(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    torch.backends.cudnn.benchmark = False
+    torch.backends.cudnn.deterministic = True
+
+# fix random seed
+same_seeds(seed)
+
+# create model, define a loss function, and optimizer
+model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
+# model = RNN_Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
+criterion = nn.CrossEntropyLoss()
+optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
+
+
+# training
+
+best_acc = 0.0
+for epoch in range(num_epoch):
+    train_acc = 0.0
+    train_loss = 0.0
+    val_acc = 0.0
+    val_loss = 0.0
+
+    # training
+    model.train()  # set the model to training mode
+    for i, batch in enumerate(tqdm(train_loader)):
+        features, labels = batch
+        features = features.to(device)
+        labels = labels.to(device)
+
+        optimizer.zero_grad()
+        outputs = model(features)
+        # outputs = model(torch.unsqueeze(features, dim=1)).to(device)
+        outputs = torch.squeeze(outputs)
+        loss = criterion(outputs, labels)
+        loss.backward()
+        optimizer.step()
+
+        _, train_pred = torch.max(outputs, 1)  # get the index of the class with the highest probability
+        train_acc += (train_pred.detach() == labels.detach()).sum().item()
+        train_loss += loss.item()
+
+    # validation
+    if len(val_set) > 0:
+        model.eval()  # set the model to evaluation mode
+        with torch.no_grad():
+            for i, batch in enumerate(tqdm(val_loader)):
+                features, labels = batch
+                features = features.to(device)
+                labels = labels.to(device)
+                outputs = model(features)
+                # RNN
+                # outputs = model(torch.unsqueeze(features, dim=1)).to(device)
+                # outputs = torch.squeeze(outputs)
+                loss = criterion(outputs)
+                _, val_pred = torch.max(outputs, 1)
+                val_acc += (
+                            val_pred.cpu() == labels.cpu()).sum().item()  # get the index of the class with the highest probability
+                val_loss += loss.item()
+
+            print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}'.format(
+                epoch + 1, num_epoch, train_acc / len(train_set), train_loss / len(train_loader),
+                val_acc / len(val_set), val_loss / len(val_loader)
+            ))
+
+            # if the model improves, save a checkpoint at this epoch
+            if val_acc > best_acc:
+                best_acc = val_acc
+                torch.save(model.state_dict(), model_path)
+                print('saving model with acc {:.3f}'.format(best_acc / len(val_set)))
+    else:
+        print('[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}'.format(
+            epoch + 1, num_epoch, train_acc / len(train_set), train_loss / len(train_loader)
+        ))
+
+# if not validating, save the last epoch
+if len(val_set) == 0:
+    torch.save(model.state_dict(), model_path)
+    print('saving model at last epoch')
+
+
+# save params
+
+print('===== 打印参数 =====')
+print('concat_nframes: ', concat_nframes)
+print('seed: ', seed)
+print('batch_size: ', batch_size)
+print('num_epoch: ', num_epoch)
+print('learning_rate: ', learning_rate)
+print('learning_rate: ', learning_rate)
+print('input_dim: ', input_dim)
+print('hidden_layers: ', hidden_layers)
+print('hidden_dim: ', hidden_dim)
+print('===== end =====')
+
+
+del train_loader, val_loader
+gc.collect()
+
+# Testing
+
+# load data
+test_X = preprocess_data(split='test', feat_dir='./libriphone/feat', phone_path='./libriphone', concat_nframes=concat_nframes)
+test_set = LibriDataset(test_X, None)
+test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
+
+
+# load model
+model = Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
+# model = RNN_Classifier(input_dim=input_dim, hidden_layers=hidden_layers, hidden_dim=hidden_dim).to(device)
+# model = torch.nn.DataParallel(model)
+model.load_state_dict(torch.load(model_path))
+
+
+# Make prediction.
+
+test_acc = 0.0
+test_lengths = 0
+pred = np.array([], dtype=np.int32)
+
+model.eval()
+with torch.no_grad():
+    for i, batch in enumerate(tqdm(test_loader)):
+        features = batch
+        features = features.to(device)
+        outputs = model(features)
+        # outputs = model(torch.unsqueeze(features, dim=1)).to(device)
+        # _, test_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
+        _, test_pred = torch.max(torch.squeeze(outputs), 1)
+        pred = np.concatenate((pred, test_pred.cpu().numpy()), axis=0)
+
+
+
+with open('prediction.csv', 'w') as f:
+    f.write('Id,Class\n')
+    for i, y in enumerate(pred):
+        f.write('{},{}\n'.format(i, y))
+
+
+
diff --git a/HW03/HW03.py b/HW03/HW03.py
new file mode 100644
index 00000000..fafd0a54
--- /dev/null
+++ b/HW03/HW03.py
@@ -0,0 +1,470 @@
+import torchvision.utils
+
+_exp_name = "sample"
+
+# Import necessary packages.
+import numpy as np
+import pandas as pd
+import torch
+import os
+import torch.nn as nn
+import torchvision.transforms as transforms
+import torchvision.transforms.functional as functional
+from PIL import Image
+# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
+from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
+from torchvision.datasets import DatasetFolder, VisionDataset
+from torchvision.models import resnet18, vgg16
+
+# This is for the progress bar.
+from tqdm.auto import tqdm
+import random
+import matplotlib.pyplot as plt
+import sys
+
+myseed = 2333  # set a random seed for reproducibility
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+np.random.seed(myseed)
+torch.manual_seed(myseed)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(myseed)
+
+# Normally, We don't need augmentations in testing and validation.
+# All we need here is to resize the PIL image and transform it into Tensor.
+test_tfm = transforms.Compose([
+    transforms.Resize((128, 128)),
+    transforms.ToTensor(),
+])
+# normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],std=[0.5, 0.5, 0.5])
+normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+# img_grid = torchvision.utils.make_grid()
+
+def imshow(imgs):
+    # imgs = imgs / 2 + 0.5
+    plt.imshow(imgs)
+    plt.show()
+
+# However, it is also possible to use augmentation in the testing phase.
+# You may use train_tfm to produce a variety of images and then test using ensemble methods
+# train_tfm = transforms.Compose([
+#     # Resize the image into ap fixed shape (height = width = 128)
+#     transforms.RandomResizedCrop(256, scale=(0.08, 1.0), ratio=(3./4,4./3)),
+#     # 水平翻转图像
+#     transforms.RandomHorizontalFlip(p=0.5),
+#
+#     transforms.Resize((128, 128)),
+#     # You may add some transforms here.
+#     # ToTensor() should be the last one of the transforms.
+#     transforms.ToTensor(),
+#     # normalize,
+# ])
+
+train_tfm = transforms.Compose([
+    transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
+    # 水平翻转图像
+    transforms.RandomHorizontalFlip(p=0.5),
+    # 旋转
+    transforms.RandomRotation(degrees=(0, 180)),
+    transforms.Resize((128, 128)),
+    transforms.ToTensor(),
+    # normalize,
+])
+
+
+class FoodDataset(Dataset):
+
+    def __init__(self, path, tfm=test_tfm, files=None):
+        super(FoodDataset).__init__()
+        self.path = path
+        self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
+        if files != None:
+            self.files = files
+        print(f"One {path} sample", self.files[0])
+        self.transform = tfm
+
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, idx):
+        fname = self.files[idx]
+        im = Image.open(fname)
+        im = self.transform(im)
+        # im = self.data[idx]
+        try:
+            label = int(fname.split("/")[-1].split("_")[0])
+        except:
+            label = -1  # test has no label
+        return im, label
+
+
+class Classifier(nn.Module):
+    def __init__(self):
+        super(Classifier, self).__init__()
+        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
+        # torch.nn.MaxPool2d(kernel_size, stride, padding)
+        # input 維度 [3, 128, 128]
+        self.cnn = nn.Sequential(
+            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2, 0),  # [64, 64, 64]
+
+            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 64, 64]
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2, 0),  # [128, 32, 32]
+
+            nn.Conv2d(128, 256, 3, 1, 1),  # [256, 32, 32]
+            nn.BatchNorm2d(256),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2, 0),  # [256, 16, 16]
+
+            nn.Conv2d(256, 512, 3, 1, 1),  # [512, 16, 16]
+            nn.BatchNorm2d(512),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2, 0),  # [512, 8, 8]
+
+            nn.Conv2d(512, 512, 3, 1, 1),  # [512, 8, 8]
+            nn.BatchNorm2d(512),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2, 0),  # [512, 4, 4]
+        )
+        self.fc = nn.Sequential(
+            nn.Linear(512 * 4 * 4, 1024),
+            nn.ReLU(),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Linear(512, 11)
+        )
+
+    def forward(self, x):
+        out = self.cnn(x)
+        out = out.view(out.size()[0], -1)
+        return self.fc(out)
+
+
+# ResNet
+class Residual_Network(nn.Module):
+    def __init__(self):
+        super(Residual_Network, self).__init__()
+
+        self.cnn_layer1 = nn.Sequential(
+            nn.Conv2d(3, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+        )
+
+        self.cnn_layer2 = nn.Sequential(
+            nn.Conv2d(64, 64, 3, 1, 1),
+            nn.BatchNorm2d(64),
+        )
+
+        self.cnn_layer3 = nn.Sequential(
+            nn.Conv2d(64, 128, 3, 2, 1),
+            nn.BatchNorm2d(128),
+        )
+
+        self.cnn_layer4 = nn.Sequential(
+            nn.Conv2d(128, 128, 3, 1, 1),
+            nn.BatchNorm2d(128),
+        )
+        self.cnn_layer5 = nn.Sequential(
+            nn.Conv2d(128, 256, 3, 2, 1),
+            nn.BatchNorm2d(256),
+        )
+        self.cnn_layer6 = nn.Sequential(
+            nn.Conv2d(256, 256, 3, 1, 1),
+            nn.BatchNorm2d(256),
+        )
+        self.fc_layer = nn.Sequential(
+            nn.Linear(256 * 32 * 32, 256),
+            nn.ReLU(),
+            nn.Linear(256, 11)
+        )
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        # input (x): [batch_size, 3, 128, 128]
+        # output: [batch_size, 11]
+
+        # Extract features by convolutional layers.
+        x1 = self.cnn_layer1(x)
+
+        x1 = self.relu(x1)
+
+        x2 = self.cnn_layer2(x1)
+
+        x2 = self.relu(x2)
+
+        x3 = self.cnn_layer3(x2)
+
+        x3 = self.relu(x3)
+
+        x4 = self.cnn_layer4(x3)
+
+        x4 = self.relu(x4)
+
+        x5 = self.cnn_layer5(x4)
+
+        x5 = self.relu(x5)
+
+        x6 = self.cnn_layer6(x5)
+
+        x6 = self.relu(x6)
+
+        # The extracted feature map must be flatten before going to fully-connected layers.
+        xout = x6.flatten(1)
+
+        # The features are transformed by fully-connected layers to obtain the final logits.
+        xout = self.fc_layer(xout)
+        return xout
+
+
+
+batch_size = 128
+_dataset_dir = "./food11"
+# Construct datasets.
+# The argument "loader" tells how torchvision reads the data.
+train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
+train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
+valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
+valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
+
+# "cuda" only when GPUs are available.
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# The number of training epochs and patience.
+n_epochs = 50
+patience = 300  # If no improvement in 'patience' epochs, early stop
+lr = 0.0003
+weight_decay = 1e-5
+# Initialize a model, and put it on the device specified.
+# model = Classifier().to(device)
+
+# model = Residual_Network().to(device)
+model = vgg16().to(device)
+model2 = resnet18().to(device)
+
+# For the classification task, we use cross-entropy as the measurement of performance.
+criterion = nn.CrossEntropyLoss()
+
+criterion2 = nn.CrossEntropyLoss()
+
+# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
+optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
+
+optimizer2 = torch.optim.Adam(model2.parameters(), lr=lr, weight_decay=weight_decay)
+
+# 设置模型的权重
+weight_1 = 0.4
+weight_2 = 0.6
+
+# Initialize trackers, these are not parameters and should not be changed
+stale = 0
+best_acc = 0
+
+
+# 超参数
+print('-- 训练参数 --')
+print('seed %d' % myseed)
+print('batch_size %d' % batch_size)
+print('n_epochs %d' % n_epochs)
+print('patience %d' % patience)
+print('lr %f' % lr)
+print('weight_decay %f' % weight_decay)
+print('-------------')
+for epoch in range(n_epochs):
+
+    # ---------- Training ----------
+    # Make sure the model is in train mode before training.
+    model.train()
+    model2.train()
+    # These are used to record information in training.
+    train_loss = []
+    train_accs = []
+    train_loss_2 = []
+    train_accs_2 = []
+    # 模型集成的准确率
+    ensemble_accs = []
+    for batch in tqdm(train_loader):
+        # A batch consists of image data and corresponding labels.
+        imgs, labels = batch
+        # imgs = imgs.half()
+        # print(imgs.shape,labels.shape)
+        # for im in imgs:
+        #     img_2 = functional.to_pil_image(im)
+        #     plt.imshow(img_2)
+        #     plt.show()
+
+        # sys.exit(0)
+        # Forward the data. (Make sure data and model are on the same device.)
+        logits = model(imgs.to(device))
+        logits2 = model2(imgs.to(device))
+        ensemble_logits = logits * weight_1 + logits2 * weight_2
+        # Calculate the cross-entropy loss.
+        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
+        loss = criterion(logits, labels.to(device))
+        loss2 = criterion2(logits2, labels.to(device))
+        # Gradients stored in the parameters in the previous step should be cleared out first.
+        optimizer.zero_grad()
+        optimizer2.zero_grad()
+        # Compute the gradients for parameters.
+        loss.backward()
+        loss2.backward()
+        # Clip the gradient norms for stable training.
+        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
+        grad_norm2 = nn.utils.clip_grad_norm_(model2.parameters(), max_norm=10)
+
+        # Update the parameters with computed gradients.
+        optimizer.step()
+        optimizer2.step()
+        # Compute the accuracy for current batch.
+        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
+        acc2 = (logits2.argmax(dim=-1) == labels.to(device)).float().mean()
+        ensemble_acc = (ensemble_logits.argmax(dim=-1) == labels.to(device)).float().mean()
+        # Record the loss and accuracy.
+        train_loss.append(loss.item())
+        train_accs.append(acc)
+        train_loss_2.append(loss2.item())
+        train_accs_2.append(acc2)
+        ensemble_accs.append(ensemble_acc)
+    train_loss = sum(train_loss) / len(train_loss)
+    train_acc = sum(train_accs) / len(train_accs)
+    train_loss_2 = sum(train_loss_2) / len(train_loss_2)
+    train_accs_2 = sum(train_accs_2) / len(train_accs_2)
+    ensemble_accs = sum(ensemble_accs) / len(ensemble_accs)
+    # Print the information.
+    print(f"[ Train VGG16 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
+    print(f"[ Train Resnet18 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss_2:.5f}, acc = {train_accs_2:.5f}")
+    print(f"[ Train Ensemble | {epoch + 1:03d}/{n_epochs:03d} ] loss = {ensemble_accs:.5f}, acc = {ensemble_accs:.5f}")
+    # ---------- Validation ----------
+    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
+    model.eval()
+    model2.eval()
+    # These are used to record information in validation.
+    valid_loss = []
+    valid_accs = []
+    valid_loss_2 = []
+    valid_accs_2 = []
+    # 模型集成的准确率
+    ensemble_accs = []
+    # Iterate the validation set by batches.
+    for batch in tqdm(valid_loader):
+        # A batch consists of image data and corresponding labels.
+        imgs, labels = batch
+        # imgs = imgs.half()
+
+        # We don't need gradient in validation.
+        # Using torch.no_grad() accelerates the forward process.
+        with torch.no_grad():
+            logits = model(imgs.to(device))
+            logits2 = model2(imgs.to(device))
+            ensemble_logits = logits * weight_1 + logits2 * weight_2
+        # We can still compute the loss (but not the gradient).
+        loss = criterion(logits, labels.to(device))
+        loss2 = criterion2(logits2, labels.to(device))
+        # Compute the accuracy for current batch.
+        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
+        acc2 = (logits2.argmax(dim=-1) == labels.to(device)).float().mean()
+        ensemble_acc = (ensemble_logits.argmax(dim=-1) == labels.to(device)).float().mean()
+        # Record the loss and accuracy.
+        valid_loss.append(loss.item())
+        valid_accs.append(acc)
+        valid_loss_2.append(loss2.item())
+        valid_accs_2.append(acc2)
+        ensemble_accs.append(ensemble_acc)
+        # break
+
+    # The average loss and accuracy for entire validation set is the average of the recorded values.
+    valid_loss = sum(valid_loss) / len(valid_loss)
+    valid_acc = sum(valid_accs) / len(valid_accs)
+    valid_loss_2 = sum(valid_loss_2) / len(valid_loss_2)
+    valid_accs_2 = sum(valid_accs_2) / len(valid_accs_2)
+    ensemble_accs = sum(ensemble_accs) / len(ensemble_accs)
+
+    # Print the information.
+    print(f"[ Valid VGG16 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
+    print(f"[ Valid Resnet18 | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss_2:.5f}, acc = {valid_accs_2:.5f}")
+    print(f"[ Train Ensemble | {epoch + 1:03d}/{n_epochs:03d} ] loss = {ensemble_accs:.5f}, acc = {ensemble_accs:.5f}")
+
+    # update logs
+    if valid_acc > best_acc:
+        with open(f"./{_exp_name}_log.txt", "a"):
+            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
+    else:
+        with open(f"./{_exp_name}_log.txt", "a"):
+            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
+
+    # save models
+    if valid_acc > best_acc:
+        print(f"Best model found at epoch {epoch}, saving model")
+        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt")  # only save best to prevent output memory exceed error
+        best_acc = valid_acc
+        stale = 0
+    else:
+        stale += 1
+        if stale > patience:
+            print(f"No improvment {patience} consecutive epochs, early stopping")
+            break
+
+    # ensemble update logs
+    if ensemble_accs > best_acc:
+        with open(f"./{_exp_name}_log.txt", "a"):
+            print(f"[ Model_1 Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
+            print(f"[ Model_2 Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss_2:.5f}, acc = {valid_accs_2:.5f} -> best")
+            print(f"[ Ensemble Model_1 Valid | {epoch + 1:03d}/{n_epochs:03d} ] ensemble acc = {ensemble_accs:.5f} -> best")
+    else:
+        with open(f"./{_exp_name}_log.txt", "a"):
+            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
+            print(f"[ Model_2 Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss_2:.5f}, acc = {valid_accs_2:.5f} -> best")
+    # ensemble save models
+    if ensemble_accs > best_acc:
+        print(f"Best model found at epoch {epoch}, saving model")
+        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt")  # only save best to prevent output memory exceed error
+        torch.save(model2.state_dict(), f"{_exp_name}_2_best.ckpt")  # only save best to prevent output memory exceed error
+        best_acc = ensemble_accs
+        stale = 0
+    else:
+        stale += 1
+        if stale > patience:
+            print(f"No improvment {patience} consecutive epochs, early stopping")
+            break
+
+test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
+test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
+
+# model_best = Classifier().to(device)
+# model_best = Residual_Network().to(device)
+model_best = vgg16().to(device)
+model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
+model_best.eval()
+
+model2_best = resnet18().to(device)
+model2_best.load_state_dict(torch.load(f"{_exp_name}_2_best.ckpt"))
+model2_best.eval()
+prediction = []
+with torch.no_grad():
+    for data,_ in test_loader:
+        test_pred = model_best(data.to(device))
+        test_pred_2 = model2_best(data.to(device))
+        test_label = np.argmax((test_pred * weight_1 + test_pred_2 * weight_2).cpu().data.numpy(), axis=1)
+        prediction += test_label.squeeze().tolist()
+
+
+#create test csv
+def pad4(i):
+    return "0"*(4-len(str(i)))+str(i)
+df = pd.DataFrame()
+df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
+df["Category"] = prediction
+df.to_csv("submission.csv",index = False)
+
+# 超参数
+print('-- 训练参数 --')
+print('seed %d' % myseed)
+print('batch_size %d' % batch_size)
+print('n_epochs %d' % n_epochs)
+print('patience %d' % patience)
+print('lr %f' % lr)
+print('weight_decay %f' % weight_decay)
+print('-------------')
\ No newline at end of file
diff --git a/HW03/note.md b/HW03/note.md
new file mode 100644
index 00000000..2ca5f5d8
--- /dev/null
+++ b/HW03/note.md
@@ -0,0 +1,144 @@
+
+# 
+
+
+ nn.Conv2d(3, 64, 3, 1, 1)
+ 输入 通道数 3
+ 输出 通道数 64，
+ 卷积核 3 * 3
+ 步长 为 1
+ padding 填充 1
+
+```python
+
+torch.nn.Conv2d(in_channels,
+                out_channels,
+                kernel_size,
+                stride=1,
+                padding=0,
+                dilation=1,
+                groups=1,
+                bias=True,
+                padding_mode='zeros',
+                device=None,
+                dtype=None)
+```
+
+
+
+# 1、第一次训练
+
+未修改模型， 修改了图片增强的方法
+```python
+train_tfm = transforms.Compose([
+    transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
+    # 水平翻转图像
+    transforms.RandomHorizontalFlip(p=0.5),
+    # 旋转
+    transforms.RandomRotation(degrees=(0, 180)),
+    transforms.Resize((128, 128)),
+    transforms.ToTensor(),
+    # normalize,
+])
+
+```
+
+训练参数
+
+```bash
+-- 训练参数 --
+
+seed 6666
+batch_size 64
+n_epochs 50
+patience 300
+lr 0.000300
+weight_decay 0.000010
+-------------
+
+```
+
+# 2、第二次训练
+
+增大 batch_size, 延长训练次数
+
+
+```python
+-- 训练参数 --
+seed 6666
+batch_size 128
+n_epochs 100
+patience 300
+lr 0.000300
+weight_decay 0.000010
+-------------
+
+```
+
+kaggle 评分
+
+```
+Score: 0.76195
+Private score: 0.73282
+```
+
+# 3、第三次训练
+
+修改模型,使用示例中的 Residual_Network ,降低 batch_size
+
+
+```python
+-- 训练参数 --
+seed 6666
+batch_size 64
+n_epochs 100
+patience 300
+lr 0.000300
+weight_decay 0.000010
+```
+acc = 0.69709 -> best
+
+# 4、第四次训练
+
+修改模型，使用 `resnet18`
+
+```python
+-- 训练参数 --
+seed 6666
+batch_size 64
+n_epochs 100
+patience 300
+lr 0.000300
+weight_decay 0.000010
+
+```
+0.72151
+
+
+# 5、第五次训练
+修改模型，使用 `resnet18`，延长训练时间 200个 epoch
+
+```python
+-- 训练参数 --
+seed 6666
+batch_size 64
+n_epochs 200
+patience 300
+lr 0.000300
+weight_decay 0.000010
+```
+
+测试集准确率：`0.75332`
+训练集准确率：`0.90812`
+
+kaggle 结果
+
+```python
+private_score: 0.77934
+public_score: 0.77988
+
+```
+
+
+# 6、第六次训练
+使用模型集成