-
Notifications
You must be signed in to change notification settings - Fork 84
Open
Description
I used your code with AMP FP16 from pytorch 1.6. I achieved a good accuracy on validation set but showing the training accuracy is wrong. Do you have any suggestion to fix it? @xsacha @cavalleria . Thanks in advance
This is my log
batch inference time 0.09423589706420898
============================================================
Epoch 23/24 Batch 4000/5563 Training Loss 5.1602 (5.0847) Training Prec@1 44.824 (45.528) Training Prec@5 58.203 (57.886)
============================================================
Current lr 0.0007111800824550257
============================================================
Perform Evaluation on lfw,cfp_fp,agedb_30, and Save Checkpoints...
Epoch 23/24, Evaluation: lfw, Acc: 0.9964999999999999, Best_Threshold: 1.3989999999999998
Epoch 23/24, Evaluation: cfp_fp, Acc: 0.9687142857142856, Best_Threshold: 1.591
Epoch 23/24, Evaluation: agedb_30, Acc: 0.969, Best_Threshold: 1.546
============================================================
============================================================
I think Training Prec@1
and Training Prec@5
should be near 100. This is my training code
for inputs, labels in tqdm(iter(train_loader)):
if LR_SCHEDULER == 'cosine':
scheduler.step()
# compute output
start_time=time.time()
inputs = inputs.cuda(cfg['GPU'], non_blocking=True)
labels = labels.cuda(cfg['GPU'], non_blocking=True)
#=================FP16============================
with autocast():
features = backbone(inputs)
outputs = head(features, labels)
if cfg['MIXUP'] or cfg['CUTMIX']:
lossx = mixup_criterion(loss, outputs, labels_a, labels_b, lam)
else:
lossx = loss(outputs, labels) if HEAD_NAME != 'CircleLoss' else loss(outputs).mean()
end_time = time.time()
duration = end_time - start_time
if ((batch + 1) % DISP_FREQ == 0) and batch != 0:
print("batch inference time", duration)
# compute gradient and do SGD step
optimizer.zero_grad()
if USE_APEX:
# with amp.scale_loss(lossx, optimizer) as scaled_loss:
# scaled_loss.backward()
scaler.scale(lossx).backward()
scaler.step(optimizer)
scaler.update()
else:
lossx.backward()
optimizer.step()
# measure accuracy and record loss
prec1, prec5 = accuracy(outputs.data, labels, topk = (1, 5)) if HEAD_NAME != 'CircleLoss' else accuracy(features.data, labels, topk = (1, 5))
losses.update(lossx.data.item(), inputs.size(0))
top1.update(prec1.data.item(), inputs.size(0))
top5.update(prec5.data.item(), inputs.size(0))
# dispaly training loss & acc every DISP_FREQ
if ((batch + 1) % DISP_FREQ == 0) or batch == 0:
print("=" * 60)
print('Epoch {}/{} Batch {}/{}\t'
'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t'
'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
epoch + 1, cfg['NUM_EPOCH'], batch + 1, len(train_loader), loss = losses, top1 = top1, top5 = top5))
print("=" * 60)
And this is my head
class ArcFace(nn.Module):
...
def forward(self, embbedings, label):
embbedings = l2_norm(embbedings, axis = 1)
kernel_norm = l2_norm(self.kernel, axis = 0)
#print (embbedings.dtype, kernel_norm.dtype)
cos_theta = torch.mm(embbedings, kernel_norm).clamp(-1, 1) # for numerical stability
with torch.no_grad():
origin_cos = cos_theta.clone()
target_logit = cos_theta[torch.arange(0, embbedings.size(0)), label].view(-1, 1)
sin_theta = torch.sqrt(1.0 - torch.pow(target_logit, 2))
cos_theta_m = target_logit * self.cos_m - sin_theta * self.sin_m #cos(target+margin)
cos_theta_m = cos_theta_m.type(cos_theta.dtype)
cos_theta.scatter_(1, label.view(-1, 1).long(), final_target_logit)
output = cos_theta * self.s
return output
Metadata
Metadata
Assignees
Labels
No labels