diff --git a/README.md b/README.md index 67baf6f..6a86fb9 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,8 @@ https://github.com/user-attachments/assets/e5933d8e-3c8a-400e-870b-4e452f5321d9 **D‑FINE‑X** | Objects365+COCO | **59.3** | 62M | 12.89ms | 202 | [yml](./configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml) | [59.3](https://github.com/Peterande/storage/releases/download/dfinev1.0/dfine_x_obj2coco.pth) | [url](https://raw.githubusercontent.com/Peterande/storage/refs/heads/master/logs/obj2coco/dfine_x_obj2coco_log.txt) **We highly recommend that you use the Objects365 pre-trained model for fine-tuning:** + +⚠️ **Important**: Please note that this is generally beneficial for complex scene understanding. If your categories are very simple, it might lead to overfitting and suboptimal performance.
🔥 Pretrained Models on Objects365 (Best generalization) diff --git a/README_cn.md b/README_cn.md index e5d2d62..a22821c 100644 --- a/README_cn.md +++ b/README_cn.md @@ -99,6 +99,8 @@ https://github.com/user-attachments/assets/e5933d8e-3c8a-400e-870b-4e452f5321d9 **我们强烈推荐您使用 Objects365 预训练模型进行微调:** +⚠️ 重要提醒:通常这种预训练模型对复杂场景的理解非常有用。如果您的类别非常简单,请注意,这可能会导致过拟合和次优性能。 +
🔥 Objects365 预训练模型(泛化性最好) | 模型 | 数据集 | AP5000 | 参数量 | 时延 (ms) | GFLOPs | 配置 | 权重 | 日志 | diff --git a/README_ja.md b/README_ja.md index 0126f16..90c22ae 100644 --- a/README_ja.md +++ b/README_ja.md @@ -114,6 +114,8 @@ https://github.com/user-attachments/assets/e5933d8e-3c8a-400e-870b-4e452f5321d9 **微調整のために Objects365 の事前学習モデルを使用することを強くお勧めします:** +⚠️ 重要なお知らせ:このプリトレインモデルは複雑なシーンの理解に有益ですが、カテゴリが非常に単純な場合、過学習や最適ではない性能につながる可能性がありますので、ご注意ください。 +
🔥 Objects365で事前トレーニングされたモデル(最良の汎化性能) | モデル | データセット | AP5000 | パラメータ数 | レイテンシ | GFLOPs | config | checkpoint | logs | diff --git a/configs/dfine/custom/dfine_hgnetv2_n_custom.yml b/configs/dfine/custom/dfine_hgnetv2_n_custom.yml new file mode 100644 index 0000000..d40b488 --- /dev/null +++ b/configs/dfine/custom/dfine_hgnetv2_n_custom.yml @@ -0,0 +1,82 @@ +__include__: [ + '../../dataset/custom_detection.yml', + '../../runtime.yml', + '../include/dataloader.yml', + '../include/optimizer.yml', + '../include/dfine_hgnetv2.yml', +] + +output_dir: ./output/dfine_hgnetv2_n_custom + + +DFINE: + backbone: HGNetv2 + +HGNetv2: + name: 'B0' + return_idx: [2, 3] + freeze_at: -1 + freeze_norm: False + use_lab: True + + +HybridEncoder: + in_channels: [512, 1024] + feat_strides: [16, 32] + + # intra + hidden_dim: 128 + use_encoder_idx: [1] + dim_feedforward: 512 + + # cross + expansion: 0.34 + depth_mult: 0.5 + + +DFINETransformer: + feat_channels: [128, 128] + feat_strides: [16, 32] + hidden_dim: 128 + dim_feedforward: 512 + num_levels: 2 + + num_layers: 3 + eval_idx: -1 + + num_points: [6, 6] + +optimizer: + type: AdamW + params: + - + params: '^(?=.*backbone)(?!.*norm|bn).*$' + lr: 0.0004 + - + params: '^(?=.*backbone)(?=.*norm|bn).*$' + lr: 0.0004 + weight_decay: 0. + - + params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' + weight_decay: 0. + + lr: 0.0008 + betas: [0.9, 0.999] + weight_decay: 0.0001 + + +# Increase to search for the optimal ema +epoches: 300 +train_dataloader: + total_batch_size: 128 + dataset: + transforms: + policy: + epoch: 280 + collate_fn: + stop_epoch: 280 + ema_restart_decay: 0.9999 + base_size_repeat: ~ + +val_dataloader: + total_batch_size: 256 diff --git a/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml b/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml index c6a2c21..c8ae894 100644 --- a/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml +++ b/configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml @@ -1,9 +1,9 @@ __include__: [ - '../../dataset/custom_detection.yml', - '../../runtime.yml', - '../include/dataloader.yml', - '../include/optimizer.yml', - '../include/dfine_hgnetv2.yml', + '../../../dataset/custom_detection.yml', + '../../../runtime.yml', + '../../include/dataloader.yml', + '../../include/optimizer.yml', + '../../include/dfine_hgnetv2.yml', ] output_dir: ./output/dfine_hgnetv2_l_obj2custom @@ -18,6 +18,7 @@ HGNetv2: freeze_stem_only: True freeze_at: 0 freeze_norm: True + pretrained: False optimizer: type: AdamW diff --git a/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml b/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml index 78d655c..c1874d4 100644 --- a/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml +++ b/configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml @@ -1,9 +1,9 @@ __include__: [ - '../../dataset/custom_detection.yml', - '../../runtime.yml', - '../include/dataloader.yml', - '../include/optimizer.yml', - '../include/dfine_hgnetv2.yml', + '../../../dataset/custom_detection.yml', + '../../../runtime.yml', + '../../include/dataloader.yml', + '../../include/optimizer.yml', + '../../include/dfine_hgnetv2.yml', ] output_dir: ./output/dfine_hgnetv2_m_obj2custom @@ -18,6 +18,7 @@ HGNetv2: freeze_at: -1 freeze_norm: False use_lab: True + pretrained: False DFINETransformer: num_layers: 4 # 5 6 diff --git a/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml b/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml index b834eb1..dcd57c1 100644 --- a/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml +++ b/configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml @@ -1,9 +1,9 @@ __include__: [ - '../../dataset/custom_detection.yml', - '../../runtime.yml', - '../include/dataloader.yml', - '../include/optimizer.yml', - '../include/dfine_hgnetv2.yml', + '../../../dataset/custom_detection.yml', + '../../../runtime.yml', + '../../include/dataloader.yml', + '../../include/optimizer.yml', + '../../include/dfine_hgnetv2.yml', ] output_dir: ./output/dfine_hgnetv2_s_obj2custom @@ -18,6 +18,7 @@ HGNetv2: freeze_at: -1 freeze_norm: False use_lab: True + pretrained: False DFINETransformer: num_layers: 3 # 4 5 6 diff --git a/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml b/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml index e884e07..671329a 100644 --- a/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml +++ b/configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml @@ -1,9 +1,9 @@ __include__: [ - '../../dataset/custom_detection.yml', - '../../runtime.yml', - '../include/dataloader.yml', - '../include/optimizer.yml', - '../include/dfine_hgnetv2.yml', + '../../../dataset/custom_detection.yml', + '../../../runtime.yml', + '../../include/dataloader.yml', + '../../include/optimizer.yml', + '../../include/dfine_hgnetv2.yml', ] output_dir: ./output/dfine_hgnetv2_x_obj2custom @@ -18,6 +18,7 @@ HGNetv2: freeze_stem_only: True freeze_at: 0 freeze_norm: True + pretrained: False HybridEncoder: # intra diff --git a/configs/dfine/include/dataloader.yml b/configs/dfine/include/dataloader.yml index ee0ff70..3f7c67c 100644 --- a/configs/dfine/include/dataloader.yml +++ b/configs/dfine/include/dataloader.yml @@ -24,7 +24,7 @@ train_dataloader: stop_epoch: 72 # epoch in [72, ~) stop `multiscales` shuffle: True - total_batch_size: 32 # total batch size equals to 32 s(4 * 8) + total_batch_size: 32 # total batch size equals to 32 (4 * 8) num_workers: 4 diff --git a/src/core/_config.py b/src/core/_config.py index a393f41..e194b3f 100644 --- a/src/core/_config.py +++ b/src/core/_config.py @@ -151,7 +151,8 @@ def val_dataloader(self) -> DataLoader: num_workers=self.num_workers, drop_last=False, collate_fn=self.collate_fn, - shuffle=self.val_shuffle) + shuffle=self.val_shuffle, + persistent_workers=True) loader.shuffle = self.val_shuffle self._val_dataloader = loader diff --git a/src/data/dataset/coco_dataset.py b/src/data/dataset/coco_dataset.py index f8ce50a..83202e7 100644 --- a/src/data/dataset/coco_dataset.py +++ b/src/data/dataset/coco_dataset.py @@ -19,6 +19,7 @@ torchvision.disable_beta_transforms_warning() faster_coco_eval.init_as_pycocotools() +Image.MAX_IMAGE_PIXELS = None __all__ = ['CocoDetection'] @@ -50,7 +51,6 @@ def load_item(self, idx): if self.remap_mscoco_category: image, target = self.prepare(image, target, category2label=mscoco_category2label) - # image, target = self.prepare(image, target, category2label=self.category2label) else: image, target = self.prepare(image, target) diff --git a/src/solver/det_solver.py b/src/solver/det_solver.py index bae9cc0..8d4a9e1 100644 --- a/src/solver/det_solver.py +++ b/src/solver/det_solver.py @@ -44,7 +44,8 @@ def fit(self, ): best_stat[k] = test_stats[k][0] top1 = test_stats[k][0] print(f'best_stat: {best_stat}') - + + best_stat_print = best_stat.copy() start_time = time.time() start_epoch = self.last_epoch + 1 for epoch in range(start_epoch, args.epoches): @@ -109,13 +110,26 @@ def fit(self, ): else: best_stat['epoch'] = epoch best_stat[k] = test_stats[k][0] - + + if best_stat[k] > top1: + best_stat_print['epoch'] = epoch + top1 = best_stat[k] + if self.output_dir: + if epoch >= self.train_dataloader.collate_fn.stop_epoch: + dist_utils.save_on_master(self.state_dict(), self.output_dir / 'best_stg2.pth') + else: + dist_utils.save_on_master(self.state_dict(), self.output_dir / 'best_stg1.pth') + + best_stat_print[k] = max(best_stat[k], top1) + print(f'best_stat: {best_stat_print}') # global best + if best_stat['epoch'] == epoch and self.output_dir: if epoch >= self.train_dataloader.collate_fn.stop_epoch: if test_stats[k][0] > top1: top1 = test_stats[k][0] dist_utils.save_on_master(self.state_dict(), self.output_dir / 'best_stg2.pth') else: + top1 = max(test_stats[k][0], top1) dist_utils.save_on_master(self.state_dict(), self.output_dir / 'best_stg1.pth') elif epoch >= self.train_dataloader.collate_fn.stop_epoch: @@ -123,8 +137,7 @@ def fit(self, ): self.ema.decay -= 0.0001 self.load_resume_state(str(self.output_dir / 'best_stg1.pth')) print(f'Refresh EMA at epoch {epoch} with decay {self.ema.decay}') - - print(f'best_stat: {best_stat}') + log_stats = { **{f'train_{k}': v for k, v in train_stats.items()}, diff --git a/tools/benchmark/dataset.py b/tools/benchmark/dataset.py index b2a4869..76fa649 100644 --- a/tools/benchmark/dataset.py +++ b/tools/benchmark/dataset.py @@ -13,6 +13,7 @@ import torchvision.transforms as T import torchvision.transforms.functional as F +Image.MAX_IMAGE_PIXELS = None class ToTensor(T.ToTensor): def __init__(self) -> None: