From 03cc549ba8bfca228a804b75bb28b142e9703168 Mon Sep 17 00:00:00 2001 From: Adriano Santos Date: Mon, 24 Aug 2020 16:41:10 -0400 Subject: [PATCH] Changing CustomDataParallel to DistributedDataParallel --- requirements.txt | 12 ++++++++++++ train.py | 11 ++++++++--- utils/utils.py | 25 ------------------------- 3 files changed, 20 insertions(+), 28 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..ca387f146 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +setuptools>=41.0.0 +pycocotools +scikit-build +numpy +opencv-python +tqdm +tensorboard +tensorboardX +pyyaml +webcolors +torch==1.4.0 +torchvision==0.5.0 \ No newline at end of file diff --git a/train.py b/train.py index 04133cb0c..961ea00d4 100644 --- a/train.py +++ b/train.py @@ -20,7 +20,8 @@ from efficientdet.dataset import CocoDataset, Resizer, Normalizer, Augmenter, collater from efficientdet.loss import FocalLoss from utils.sync_batchnorm import patch_replication_callback -from utils.utils import replace_w_sync_bn, CustomDataParallel, get_last_weights, init_weights, boolean_string +from utils.utils import replace_w_sync_bn, get_last_weights, init_weights, boolean_string +import torch.distributed as dist class Params: @@ -181,9 +182,13 @@ def freeze_backbone(m): if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: - model = CustomDataParallel(model, params.num_gpus) + # https://pytorch.org/tutorials/intermediate/dist_tuto.html + dist.init_process_group(backend='nccl', init_method='tcp://127.0.0.1:6666', world_size=1, rank=0) + model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) if use_sync_bn: patch_replication_callback(model) + + torch.backends.cudnn.benchmark = True if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) @@ -315,7 +320,7 @@ def freeze_backbone(m): def save_checkpoint(model, name): - if isinstance(model, CustomDataParallel): + if isinstance(model, torch.nn.parallel.DistributedDataParallel): torch.save(model.module.model.state_dict(), os.path.join(opt.saved_path, name)) else: torch.save(model.model.state_dict(), os.path.join(opt.saved_path, name)) diff --git a/utils/utils.py b/utils/utils.py index 0b69340a6..24f25f239 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -181,31 +181,6 @@ def replace_w_sync_bn(m): for var_name, children in m.named_children(): replace_w_sync_bn(children) - -class CustomDataParallel(nn.DataParallel): - """ - force splitting data to all gpus instead of sending all data to cuda:0 and then moving around. - """ - - def __init__(self, module, num_gpus): - super().__init__(module) - self.num_gpus = num_gpus - - def scatter(self, inputs, kwargs, device_ids): - # More like scatter and data prep at the same time. The point is we prep the data in such a way - # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs. - devices = ['cuda:' + str(x) for x in range(self.num_gpus)] - splits = inputs[0].shape[0] // self.num_gpus - - if splits == 0: - raise Exception('Batchsize must be greater than num_gpus.') - - return [(inputs[0][splits * device_idx: splits * (device_idx + 1)].to(f'cuda:{device_idx}', non_blocking=True), - inputs[1][splits * device_idx: splits * (device_idx + 1)].to(f'cuda:{device_idx}', non_blocking=True)) - for device_idx in range(len(devices))], \ - [kwargs] * len(devices) - - def get_last_weights(weights_path): weights_path = glob(weights_path + f'/*.pth') weights_path = sorted(weights_path,