diff --git a/EduKTM/LBKT/LBKT.py b/EduKTM/LBKT/LBKT.py
index 29846ee..540d0d2 100644
--- a/EduKTM/LBKT/LBKT.py
+++ b/EduKTM/LBKT/LBKT.py
@@ -3,33 +3,37 @@
 
 from sklearn import metrics
 from sklearn.metrics import mean_squared_error
-
 import logging
 import torch
 import torch.nn as nn
 import numpy as np
-from .model import *
-import math
-import json
-import argparse
+from .model import Recurrent
 from EduKTM import KTM
 from tqdm import tqdm
 
+
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+
 def compute_auc(all_target, all_pred):
     return metrics.roc_auc_score(all_target, all_pred)
 
+
 def compute_accuracy(all_target, all_pred):
     all_pred[all_pred > 0.5] = 1.0
     all_pred[all_pred <= 0.5] = 0.0
     return metrics.accuracy_score(all_target, all_pred)
 
+
 def binary_entropy(target, pred):
-    loss = target * np.log(np.maximum(1e-10, pred)) + (1.0 - target) * np.log(np.maximum(1e-10, 1.0 - pred))
+    loss = target * np.log(np.maximum(1e-10, pred)) \
+        + (1.0 - target) * np.log(np.maximum(1e-10, 1.0 - pred))
     return np.average(loss) * -1.0
 
-def train_one_epoch(recurrent,optimizer,criterion, batch_size,Topics_all, Resps_all,time_factor_all,attempts_factor_all,hints_factor_all):
+
+def train_one_epoch(recurrent, optimizer, criterion,
+                    batch_size, Topics_all, Resps_all,
+                    time_factor_all, attempts_factor_all, hints_factor_all):
     recurrent.train()
     all_pred = []
     all_target = []
@@ -45,27 +49,29 @@ def train_one_epoch(recurrent,optimizer,criterion, batch_size,Topics_all, Resps_
     for idx in tqdm(range(n)):
         optimizer.zero_grad()
 
-        Topics = Topics_all[idx * batch_size : (idx + 1) * batch_size,:]
-        Resps = Resps_all[idx * batch_size : (idx + 1) * batch_size, :]
-        time_factor = time_factor_all[idx * batch_size : (idx + 1) * batch_size, :]
-        attempts_factor = attempts_factor_all[idx * batch_size : (idx + 1) * batch_size, :]
-        hints_factor = hints_factor_all[idx * batch_size : (idx + 1) * batch_size, :]
+        Topics = Topics_all[idx * batch_size: (idx + 1) * batch_size, :]
+        Resps = Resps_all[idx * batch_size: (idx + 1) * batch_size, :]
+        time_factor = time_factor_all[idx * batch_size:
+                                      (idx + 1) * batch_size, :]
+        attempts_factor = attempts_factor_all[idx * batch_size:
+                                              (idx + 1) * batch_size, :]
+        hints_factor = hints_factor_all[idx * batch_size:
+                                        (idx + 1) * batch_size, :]
 
         input_topics = torch.from_numpy(Topics).long().to(device)
         input_resps = torch.from_numpy(Resps).long().to(device)
         input_time_factor = torch.from_numpy(time_factor).float().to(device)
-        input_attempts_factor = torch.from_numpy(attempts_factor).float().to(device)
+        input_attempts_factor = torch.from_numpy(
+            attempts_factor).float().to(device)
         input_hints_factor = torch.from_numpy(hints_factor).float().to(device)
 
-        y_pred = recurrent(input_topics,input_resps, input_time_factor,input_attempts_factor,input_hints_factor)
+        y_pred = recurrent(input_topics, input_resps, input_time_factor,
+                           input_attempts_factor, input_hints_factor)
 
         mask = input_topics[:, 1:] > 0
         masked_pred = y_pred[:, 1:][mask]
         masked_truth = input_resps[:, 1:][mask]
-        
         loss = criterion(masked_pred, masked_truth.float()).sum()
-        
-
         loss.backward()
         optimizer.step()
 
@@ -74,7 +80,7 @@ def train_one_epoch(recurrent,optimizer,criterion, batch_size,Topics_all, Resps_
 
         all_pred.append(masked_pred)
         all_target.append(masked_truth)
-    
+
     all_pred = np.concatenate(all_pred, axis=0)
     all_target = np.concatenate(all_target, axis=0)
 
@@ -82,28 +88,38 @@ def train_one_epoch(recurrent,optimizer,criterion, batch_size,Topics_all, Resps_
     auc = compute_auc(all_target, all_pred)
     acc = compute_accuracy(all_target, all_pred)
 
-    return loss,auc,acc
+    return loss, auc, acc
+
 
-def test_one_epoch(recurrent,batch_size,Topics_all, Resps_all,time_factor_all,attempts_factor_all,hints_factor_all):
+def test_one_epoch(recurrent, batch_size, Topics_all, Resps_all,
+                   time_factor_all, attempts_factor_all, hints_factor_all):
     recurrent.eval()
-    all_pred,all_target = [],[]
+    all_pred, all_target = [], []
     n = len(Topics_all) // batch_size
     for idx in range(n):
-        Topics = Topics_all[idx * batch_size : (idx + 1) * batch_size,:]
-        Resps = Resps_all[idx * batch_size : (idx + 1) * batch_size, :]
-        time_factor = time_factor_all[idx * batch_size : (idx + 1) * batch_size, :]
-        attempts_factor = attempts_factor_all[idx * batch_size : (idx + 1) * batch_size, :]
-        hints_factor = hints_factor_all[idx * batch_size : (idx + 1) * batch_size, :]
+        Topics = Topics_all[idx * batch_size:
+                            (idx + 1) * batch_size, :]
+        Resps = Resps_all[idx * batch_size:
+                          (idx + 1) * batch_size, :]
+        time_factor = time_factor_all[idx * batch_size:
+                                      (idx + 1) * batch_size, :]
+        attempts_factor = attempts_factor_all[idx * batch_size:
+                                              (idx + 1) * batch_size, :]
+        hints_factor = hints_factor_all[idx * batch_size:
+                                        (idx + 1) * batch_size, :]
 
         input_topics = torch.from_numpy(Topics).long().to(device)
         input_resps = torch.from_numpy(Resps).long().to(device)
         input_time_factor = torch.from_numpy(time_factor).float().to(device)
-        input_attempts_factor = torch.from_numpy(attempts_factor).float().to(device)
-        input_hints_factor = torch.from_numpy(hints_factor).float().to(device)
+        input_attempts_factor = torch.from_numpy(attempts_factor)\
+            .float().to(device)
+        input_hints_factor = torch.from_numpy(hints_factor)\
+            .float().to(device)
 
         with torch.no_grad():
-            y_pred = recurrent(input_topics,input_resps, input_time_factor,input_attempts_factor,input_hints_factor)
-        
+            y_pred = recurrent(input_topics, input_resps, input_time_factor,
+                               input_attempts_factor, input_hints_factor)
+
             mask = input_topics[:, 1:] > 0
             masked_pred = y_pred[:, 1:][mask]
             masked_truth = input_resps[:, 1:][mask]
@@ -116,41 +132,52 @@ def test_one_epoch(recurrent,batch_size,Topics_all, Resps_all,time_factor_all,at
 
     all_pred = np.concatenate(all_pred, axis=0)
     all_target = np.concatenate(all_target, axis=0)
-    
+
     loss = binary_entropy(all_target, all_pred)
     auc = compute_auc(all_target, all_pred)
-    rmse = mean_squared_error(all_target, all_pred,squared = False)
+    rmse = mean_squared_error(all_target, all_pred, squared=False)
     acc = compute_accuracy(all_target, all_pred)
-    
-    return loss,auc,acc,rmse
-        
+
+    return loss, auc, acc, rmse
+
+
 class LBKT(KTM):
-    def __init__(self,num_topics,dim_tp, num_resps,num_units, dropout,dim_hidden,memory_size,BATCH_SIZE,q_matrix):
-        super(LBKT,self).__init__()
+    def __init__(self, num_topics, dim_tp, num_resps, num_units,
+                 dropout, dim_hidden, memory_size, BATCH_SIZE, q_matrix):
+        super(LBKT, self).__init__()
         q_matrix = torch.from_numpy(q_matrix).float().to(device)
-        self.recurrent = Recurrent(num_topics,dim_tp, num_resps,num_units, dropout,dim_hidden,memory_size,BATCH_SIZE,q_matrix).to(device)
+        self.recurrent = Recurrent(num_topics, dim_tp, num_resps, num_units,
+                                   dropout, dim_hidden, memory_size,
+                                   BATCH_SIZE, q_matrix).to(device)
         self.batch_size = BATCH_SIZE
-    def train(self, train_data, test_data, epoch: int, lr, lr_decay_step=1, lr_decay_rate=0.5) -> ...:
-        
-        optimizer = torch.optim.Adam(self.recurrent.parameters(), lr=lr, eps=1e-8, betas=(0.1, 0.999), weight_decay=1e-6)
-        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, lr_decay_step, gamma=lr_decay_rate)
+
+    def train(self, train_data, test_data, epoch: int,
+              lr, lr_decay_step=1, lr_decay_rate=0.5) -> ...:
+        optimizer = torch.optim.Adam(self.recurrent.parameters(), lr=lr,
+                                     eps=1e-8, betas=(0.1, 0.999),
+                                     weight_decay=1e-6)
+        scheduler = torch.optim.lr_scheduler.StepLR(
+            optimizer, lr_decay_step, gamma=lr_decay_rate)
         criterion = nn.BCELoss(reduction='none')
-  
+
         best_test_auc = 0
         for idx in range(epoch):
-            train_loss, train_auc, train_acc = train_one_epoch(self.recurrent,  optimizer,criterion,self.batch_size,*train_data)
+            train_loss, _, _ = train_one_epoch(self.recurrent,
+                                               optimizer, criterion,
+                                               self.batch_size, *train_data)
             print("[Epoch %d] LogisticLoss: %.6f" % (idx, train_loss))
             scheduler.step()
             if test_data is not None:
-                valid_loss, valid_auc, valid_acc,valid_rmse = self.eval(test_data)
-                print("[Epoch %d] auc: %.6f, accuracy: %.6f, rmse: %.6f" % (idx, valid_auc, valid_acc,valid_rmse))
+                _, valid_auc, valid_acc, valid_rmse = self.eval(test_data)
+                print("[Epoch %d] auc: %.6f, accuracy: %.6f, rmse: %.6f" % (
+                    idx, valid_auc, valid_acc, valid_rmse))
                 if valid_auc > best_test_auc:
                     best_test_auc = valid_auc
         return best_test_auc
 
     def eval(self, test_data) -> ...:
         self.recurrent.eval()
-        return test_one_epoch(self.recurrent, self.batch_size,*test_data)
+        return test_one_epoch(self.recurrent, self.batch_size, *test_data)
 
     def save(self, filepath) -> ...:
 
@@ -159,4 +186,4 @@ def save(self, filepath) -> ...:
 
     def load(self, filepath) -> ...:
         self.recurrent.load_state_dict(torch.load(filepath))
-        logging.info("load parameters from %s" % filepath)
\ No newline at end of file
+        logging.info("load parameters from %s" % filepath)
diff --git a/EduKTM/LBKT/__init__.py b/EduKTM/LBKT/__init__.py
index 730742c..dd9cb65 100644
--- a/EduKTM/LBKT/__init__.py
+++ b/EduKTM/LBKT/__init__.py
@@ -1,4 +1,4 @@
 # coding: utf-8
 # 2023/11/21 @ xubihan
 
-from .LBKT import LBKT
\ No newline at end of file
+from .LBKT import LBKT
diff --git a/EduKTM/LBKT/model.py b/EduKTM/LBKT/model.py
index aacb963..580906d 100644
--- a/EduKTM/LBKT/model.py
+++ b/EduKTM/LBKT/model.py
@@ -3,14 +3,13 @@
 
 import torch
 import torch.nn as nn
-import numpy as np
-import torch.nn.functional as F
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+
 class Layer1(nn.Module):
-    def __init__(self,num_units,d = 10,k = 0.3,b = 0.3,name = 'lb'):
-        super(Layer1,self).__init__()
+    def __init__(self, num_units, d=10, k=0.3, b=0.3, name='lb'):
+        super(Layer1, self).__init__()
         self.weight = nn.Parameter(torch.Tensor(2 * num_units, num_units))
         self.bias = nn.Parameter(torch.zeros(1, num_units))
 
@@ -20,8 +19,8 @@ def __init__(self,num_units,d = 10,k = 0.3,b = 0.3,name = 'lb'):
         self.d = d
         self.k = k
         self.b = b
-    
-    def forward(self,factor,interact_emb,h):
+
+    def forward(self, factor, interact_emb, h):
         k = self.k
         d = self.d
         b = self.b
@@ -30,22 +29,23 @@ def forward(self,factor,interact_emb,h):
 
         w = torch.cat([h, interact_emb], -1).matmul(self.weight) + self.bias
 
-        w = F.sigmoid(w * gate)
-        
+        w = nn.Sigmoid()(w * gate)
         return w
 
+
 class LBKTcell(nn.Module):
-    def __init__(self,num_units,memory_size,dim_tp,dropout = 0.2,name = 'lbktcell'):
-        super(LBKTcell,self).__init__()
+    def __init__(self, num_units, memory_size, dim_tp,
+                 dropout=0.2, name='lbktcell'):
+        super(LBKTcell, self).__init__()
         self.num_units = num_units
         self.memory_size = memory_size
         self.dim_tp = dim_tp
-        self.r = 4  
+        self.r = 4
         self.factor_dim = 50
 
-        self.time_gain = Layer1(self.num_units,name = 'time_gain')
-        self.attempt_gain = Layer1(self.num_units,name = 'attempt_gain')
-        self.hint_gain = Layer1(self.num_units,name = 'hint_gain')
+        self.time_gain = Layer1(self.num_units, name='time_gain')
+        self.attempt_gain = Layer1(self.num_units, name='attempt_gain')
+        self.hint_gain = Layer1(self.num_units, name='hint_gain')
 
         self.time_weight = nn.Parameter(torch.Tensor(self.r, num_units + 1, num_units))
         nn.init.xavier_normal_(self.time_weight)
@@ -58,63 +58,63 @@ def __init__(self,num_units,memory_size,dim_tp,dropout = 0.2,name = 'lbktcell'):
 
         self.Wf = nn.Parameter(torch.Tensor(1, self.r))
         nn.init.xavier_normal_(self.Wf)
-        
+
         self.bias = nn.Parameter(torch.Tensor(1, num_units))
         nn.init.xavier_normal_(self.bias)
 
-        
         self.gate3 = nn.Linear(2 * num_units + 3 * self.factor_dim, num_units)
         torch.nn.init.xavier_normal_(self.gate3.weight)
-        
+
         self.dropout = nn.Dropout(dropout)
         self.output_layer = nn.Linear(dim_tp + num_units, num_units)
         torch.nn.init.xavier_normal_(self.output_layer.weight)
-
-    
-    def forward(self, interact_emb, correlation_weight, topic_emb, time_factor, attempt_factor, hint_factor, h_pre):
-       
-        h_pre_tilde = torch.squeeze(torch.bmm(correlation_weight.unsqueeze(1), h_pre), 1) #bs *1 * memory_size , bs * memory_size * d_k
-        #predict performance
-        preds = torch.sum(F.sigmoid(self.output_layer(torch.cat([h_pre_tilde,topic_emb],-1))),-1) / self.num_units #bs
-        #characterize each behavior's effect
-        time_gain = self.time_gain(time_factor,interact_emb,h_pre_tilde)
-        attempt_gain = self.attempt_gain(attempt_factor,interact_emb,h_pre_tilde)
-        hint_gain = self.hint_gain(hint_factor,interact_emb,h_pre_tilde)
-        
-        #capture the dependency among different behaviors
-        pad = torch.ones_like(time_factor) #bs * 1
-        time_gain1 = torch.cat([time_gain,pad],-1) #bs * num_units + 1
-        attempt_gain1 = torch.cat([attempt_gain,pad],-1)
-        hint_gain1 = torch.cat([hint_gain,pad],-1)
-        fusion_time = torch.matmul(time_gain1,self.time_weight) #bs * r  *num_units: bs * num_units + 1 ,r * num_units + 1 *num_units
-        fusion_attempt = torch.matmul(attempt_gain1,self.attempt_weight)
-        fusion_hint = torch.matmul(hint_gain1,self.hint_weight)
+        self.sig = nn.Sigmoid()
+
+    def forward(self, interact_emb, correlation_weight, topic_emb,
+                time_factor, attempt_factor, hint_factor, h_pre):
+        # bs *1 * memory_size , bs * memory_size * d_k
+        h_pre_tilde = torch.squeeze(torch.bmm(correlation_weight.unsqueeze(1), h_pre), 1)
+        # predict performance
+        preds = torch.sum(self.sig(self.output_layer(torch.cat([h_pre_tilde, topic_emb], -1))),
+                          -1) / self.num_units  # bs
+
+        # characterize each behavior's effect
+        time_gain = self.time_gain(time_factor, interact_emb, h_pre_tilde)
+        attempt_gain = self.attempt_gain(attempt_factor, interact_emb, h_pre_tilde)
+        hint_gain = self.hint_gain(hint_factor, interact_emb, h_pre_tilde)
+
+        # capture the dependency among different behaviors
+        pad = torch.ones_like(time_factor)  # bs * 1
+        time_gain1 = torch.cat([time_gain, pad], -1)  # bs * num_units + 1
+        attempt_gain1 = torch.cat([attempt_gain, pad], -1)
+        hint_gain1 = torch.cat([hint_gain, pad], -1)
+        # bs * r  *num_units: bs * num_units + 1 ,r * num_units + 1 *num_units
+        fusion_time = torch.matmul(time_gain1, self.time_weight)
+        fusion_attempt = torch.matmul(attempt_gain1, self.attempt_weight)
+        fusion_hint = torch.matmul(hint_gain1, self.hint_weight)
         fusion_all = fusion_time * fusion_attempt * fusion_hint
-
-        fusion_all = torch.matmul(self.Wf, fusion_all.permute(1,0,2)).squeeze(1) + self.bias #1 * r, bs * r * num_units -> bs * 1 * num_units -> bs * num_units
+        # 1 * r, bs * r * num_units -> bs * 1 * num_units -> bs * num_units
+        fusion_all = torch.matmul(self.Wf, fusion_all.permute(1, 0, 2)).squeeze(1) + self.bias
         learning_gain = torch.relu(fusion_all)
 
-        LG = torch.matmul(correlation_weight.unsqueeze(-1),learning_gain.unsqueeze(1)) #bs * memory_size * 1, bs * 1 *num_units -> bs * memory_size * num_units
-        
-        #forget effect
-        forget_gate = self.gate3(torch.cat([h_pre, #bs * memory_size * num_units
-                      interact_emb.unsqueeze(1).repeat(1, self.memory_size,1),
-                      time_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
-                      attempt_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
-                      hint_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
-                      ],-1))
-        
+        LG = torch.matmul(correlation_weight.unsqueeze(-1), learning_gain.unsqueeze(1))
+
+        # forget effect
+        forget_gate = self.gate3(torch.cat([h_pre, interact_emb.unsqueeze(1).repeat(1, self.memory_size, 1),
+                                            time_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
+                                            attempt_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
+                                            hint_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim)], -1))
         LG = self.dropout(LG)
-        
-        h = h_pre * F.sigmoid(forget_gate) + LG
+        h = h_pre * self.sig(forget_gate) + LG
 
-        return preds,h
+        return preds, h
 
 
 class Recurrent(nn.Module):
-    def __init__(self, num_topics, dim_tp, num_resps, num_units, dropout,dim_hidden,memory_size,batch_size,q_matrix):
+    def __init__(self, num_topics, dim_tp, num_resps, num_units, dropout,
+                 dim_hidden, memory_size, batch_size, q_matrix):
         super(Recurrent, self).__init__()
-        
+
         self.embedding_topic = nn.Embedding(num_topics + 10, dim_tp)
         torch.nn.init.xavier_normal_(self.embedding_topic.weight)
 
@@ -125,35 +125,35 @@ def __init__(self, num_topics, dim_tp, num_resps, num_units, dropout,dim_hidden,
         self.num_units = num_units
         self.dim_tp = dim_tp
         self.q_matrix = q_matrix
-        
+
         self.input_layer = nn.Linear(dim_tp + dim_hidden, num_units)
         torch.nn.init.xavier_normal_(self.input_layer.weight)
-        
-
-        self.lbkt_cell = LBKTcell(num_units,memory_size,dim_tp,dropout = dropout,name = 'lbkt')
-        
-        self.init_h = nn.Parameter(torch.Tensor(memory_size, num_units))
-        nn.init.xavier_normal_(self.init_h)  
 
+        self.lbkt_cell = LBKTcell(num_units, memory_size,
+                                  dim_tp, dropout=dropout, name='lbkt')
 
+        self.init_h = nn.Parameter(torch.Tensor(memory_size, num_units))
+        nn.init.xavier_normal_(self.init_h)
 
-    def forward(self,topics, resps, time_factor,attempt_factor,hint_factor):
+    def forward(self, topics, resps, time_factor, attempt_factor, hint_factor):
         batch_size, seq_len = topics.size(0), topics.size(1)
         topic_emb = self.embedding_topic(topics)
         resps_emb = self.embedding_resps(resps)
 
         correlation_weight = self.q_matrix[topics]
-        acts_emb = torch.relu(self.input_layer(torch.cat([topic_emb,resps_emb],-1))) #bs * seq_len * num_units
-        
+        acts_emb = torch.relu(self.input_layer(torch.cat([topic_emb, resps_emb], -1)))
+
         time_factor = time_factor.unsqueeze(-1)
         attempt_factor = attempt_factor.unsqueeze(-1)
         hint_factor = hint_factor.unsqueeze(-1)
-        
-        h_init = self.init_h.unsqueeze(0).repeat(batch_size,1,1)
+
+        h_init = self.init_h.unsqueeze(0).repeat(batch_size, 1, 1)
         h_pre = h_init
         preds = torch.zeros(batch_size, seq_len).to(device)
         for t in range(0, seq_len):
-            pred, h = self.lbkt_cell(acts_emb[:,t],correlation_weight[:,t],topic_emb[:,t],time_factor[:,t],attempt_factor[:,t],hint_factor[:,t],h_pre)
+            pred, h = self.lbkt_cell(acts_emb[:, t], correlation_weight[:, t],
+                                     topic_emb[:, t], time_factor[:, t],
+                                     attempt_factor[:, t], hint_factor[:, t], h_pre)
             h_pre = h
 
             preds[:, t] = pred
diff --git a/examples/LBKT/LBKT.py b/examples/LBKT/LBKT.py
index 966db09..42cfe57 100644
--- a/examples/LBKT/LBKT.py
+++ b/examples/LBKT/LBKT.py
@@ -1,12 +1,10 @@
 # coding: utf-8
 # 2023/11/21 @ xubihan
-
 import numpy as np
 from load_data import DATA
-
-import sys
-sys.path.append('../..')
 from EduKTM import LBKT
+import logging
+
 
 def generate_q_matrix(path, n_skill, n_problem, gamma=0):
     with open(path, 'r', encoding='utf-8') as f:
@@ -22,12 +20,11 @@ def generate_q_matrix(path, n_skill, n_problem, gamma=0):
 memory_size = n_question + 1
 n_exercises = 17751
 
-
 seqlen = 100
 dim_tp = 128
 num_resps = 2
 num_units = 128
-dropout  = 0.2 
+dropout = 0.2
 dim_hidden = 50
 batch_size = 8
 q_gamma = 0.1
@@ -42,13 +39,13 @@ def generate_q_matrix(path, n_skill, n_problem, gamma=0):
     q_gamma
 )
 
-import logging
 logging.getLogger().setLevel(logging.INFO)
 
-lbkt = LBKT(n_exercises,dim_tp, num_resps,num_units, dropout,dim_hidden,memory_size,batch_size,q_matrix)
+lbkt = LBKT(n_exercises, dim_tp, num_resps, num_units, dropout,
+            dim_hidden, memory_size, batch_size, q_matrix)
 lbkt.train(train_data, test_data, epoch=2, lr=0.001)
 lbkt.save("lbkt.params")
 
 lbkt.load("lbkt.params")
 _, auc, accuracy, rmse = lbkt.eval(test_data)
-print("auc: %.6f, accuracy: %.6f, rmse: %.6f" % (auc, accuracy, rmse))
\ No newline at end of file
+print("auc: %.6f, accuracy: %.6f, rmse: %.6f" % (auc, accuracy, rmse))
diff --git a/examples/LBKT/data_preprocess.ipynb b/examples/LBKT/data_preprocess.ipynb
index cc7b345..42be773 100644
--- a/examples/LBKT/data_preprocess.ipynb
+++ b/examples/LBKT/data_preprocess.ipynb
@@ -1,36 +1,39 @@
 {
- "metadata": {
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0-final"
-  },
-  "orig_nbformat": 2,
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.7.0 64-bit ('torch': conda)",
-   "metadata": {
-    "interpreter": {
-     "hash": "5cf61c3c4f1cc539f64af7db0506ad5641966e8e71ffdd31dff9860ecf37ab71"
-    }
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2,
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "downloader, INFO http://base.ustc.edu.cn/data/ASSISTment/2009_skill_builder_data_corrected.zip is saved as ../../data/2009_skill_builder_data_corrected.zip\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading ../../data/2009_skill_builder_data_corrected.zip 100.00%: 8.66MB | 8.66MB"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "downloader, INFO ../../data/2009_skill_builder_data_corrected.zip is unzip to ../../data/2009_skill_builder_data_corrected\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "from EduData import get_data\n",
     "import os\n",
@@ -43,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,17 +54,28 @@
     "import tqdm\n",
     "\n",
     "data = pd.read_csv(\n",
-    "    file_name ,\n",
-    "    usecols=['user_id', 'problem_id', 'skill_id', 'attempt_count', 'hint_count', 'correct','ms_first_response']\n",
+    "    file_name,\n",
+    "    usecols=['user_id', 'problem_id', 'skill_id', 'attempt_count', 'hint_count', 'correct','ms_first_response'], encoding='utf-8'\n",
     ").dropna(subset=['skill_id', 'problem_id'])\n",
+    "data = data[data['ms_first_response'] > 0]\n",
     "data['time_first_res'] = data['ms_first_response'] / 1000"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "number of users: 4163\n",
+      "number of skills: 123\n",
+      "number of problems: 17716\n"
+     ]
+    }
+   ],
    "source": [
     "skills = data.skill_id.unique().tolist()\n",
     "problems = data.problem_id.unique().tolist()\n",
@@ -79,7 +93,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -93,9 +107,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "finish merging\n",
+      "Finish processing time features \n"
+     ]
+    }
+   ],
    "source": [
     "from sklearn.model_selection import train_test_split, KFold\n",
     "from scipy.stats import norm\n",
@@ -122,9 +145,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finish processing attempt features \n"
+     ]
+    }
+   ],
    "source": [
     "# compute the mean of the attempts\n",
     "question_attempt_stats = train_data.groupby('problem_id')['attempt_count'].mean().reset_index()\n",
@@ -139,9 +170,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Finish processing hint features \n"
+     ]
+    }
+   ],
    "source": [
     "# compute the mean of the hints\n",
     "question_hint_stats = train_data.groupby('problem_id')['hint_count'].agg('mean').reset_index()\n",
@@ -157,9 +196,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "parse student sequence:\t: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 3330/3330 [00:01<00:00, 1689.24it/s]\n",
+      "parse student sequence:\t: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 833/833 [00:00<00:00, 1659.13it/s]\n"
+     ]
+    }
+   ],
    "source": [
     "def parse_all_seq(students):\n",
     "    all_sequences = []\n",
@@ -178,18 +226,27 @@
     "    attempt_factor = seq.attempt_factor.tolist()\n",
     "    hint_factor = seq.hint_factor.tolist()\n",
     "\n",
-    "    return s, a, p, time_factor,attempt_factor,hint_factor\n",
+    "    return s, a, p, time_factor, attempt_factor, hint_factor\n",
     "\n",
     "\n",
-    "train_data = np.array(parse_all_seq(train_student_ids))\n",
-    "test_data = np.array(parse_all_seq(test_student_ids))"
+    "train_data = np.array(parse_all_seq(train_student_ids), dtype=object)\n",
+    "test_data = np.array(parse_all_seq(test_student_ids), dtype=object)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "write data into file: ../../data/2009_skill_builder_data_corrected/train.txt: 100%|█████████████████████████████████████| 3330/3330 [00:00<00:00, 6110.41it/s]\n",
+      "write data into file: ../../data/2009_skill_builder_data_corrected/test.txt: 100%|████████████████████████████████████████| 833/833 [00:00<00:00, 5741.12it/s]\n"
+     ]
+    }
+   ],
    "source": [
     "def sequences2l(sequences, trg_path):\n",
     "    with open(trg_path, 'w', encoding='utf8') as f:\n",
@@ -208,5 +265,26 @@
     "sequences2l(test_data, data_path + 'test.txt')"
    ]
   }
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/LBKT/load_data.py b/examples/LBKT/load_data.py
index 9977217..ed1a0ab 100644
--- a/examples/LBKT/load_data.py
+++ b/examples/LBKT/load_data.py
@@ -105,4 +105,5 @@ def load_data(self, path):
             dat = hint_data[j]
             hint_dataArray[j, :len(dat)] = dat
 
-        return e_dataArray, a_dataArray, time_dataArray, attempt_dataArray, hint_dataArray
\ No newline at end of file
+        return e_dataArray, a_dataArray, time_dataArray, \
+            attempt_dataArray, hint_dataArray
diff --git a/tests/lbkt/conftest.py b/tests/lbkt/conftest.py
index 2ef32e9..80ee72d 100644
--- a/tests/lbkt/conftest.py
+++ b/tests/lbkt/conftest.py
@@ -10,7 +10,6 @@ def conf():
     batch_size = 16
     n_question = 8
     n_exercise = 32
-    
     q_matrix = np.zeros((n_exercise + 1, n_question + 1)) + 0.1
     for row_id in range(n_exercise + 1):
         rand_idx = random.randint(1, n_question)
@@ -18,7 +17,7 @@ def conf():
     return n_question, n_exercise, q_matrix, batch_size
 
 
-@pytest.fixture(scope="package", params=[0, 8])
+@pytest.fixture(scope="package", params=[0, 8, 16])
 def data(conf, request):
     n_question, n_exercise, q_matrix, batch_size = conf
     batch_size += request.param
@@ -33,18 +32,18 @@ def data(conf, request):
         for _ in range(batch_size)
     ]
     time = [
-        [random.uniform(0, 1)  for _ in range(seqlen)]
+        [random.uniform(0, 1) for _ in range(seqlen)]
         for _ in range(batch_size)
     ]
     attempt = [
-        [random.uniform(0, 1)  for _ in range(seqlen)]
+        [random.uniform(0, 1) for _ in range(seqlen)]
         for _ in range(batch_size)
     ]
     hint = [
-        [random.uniform(0, 1)  for _ in range(seqlen)]
+        [random.uniform(0, 1) for _ in range(seqlen)]
         for _ in range(batch_size)
     ]
-    
-    data = (np.array(e), np.array(a), np.array(time), np.array(attempt),np.array(hint))
+    data = (np.array(e), np.array(a), np.array(time),
+            np.array(attempt), np.array(hint))
 
-    return data
\ No newline at end of file
+    return data
diff --git a/tests/lbkt/test_lbkt.py b/tests/lbkt/test_lbkt.py
index 47163d6..0829f77 100644
--- a/tests/lbkt/test_lbkt.py
+++ b/tests/lbkt/test_lbkt.py
@@ -13,8 +13,9 @@ def test_train(data, conf, tmp_path):
     dropout = 0.2
     memory_size = n_question + 1
 
-    lbkt = LBKT(n_exercise,dim_tp, num_resps,num_units, dropout,dim_hidden,memory_size,batch_size,q_matrix)
-    lbkt.train(data, test_data=data, epoch=2,lr=0.005)
+    lbkt = LBKT(n_exercise, dim_tp, num_resps, num_units,
+                dropout, dim_hidden, memory_size, batch_size, q_matrix)
+    lbkt.train(data, test_data=data, epoch=2, lr=0.001)
     filepath = tmp_path / "lbkt.params"
     lbkt.save(filepath)
-    lbkt.load(filepath)
\ No newline at end of file
+    lbkt.load(filepath)