@@ -232,13 +232,11 @@ def hook_with_zero_step(
232232 )
233233 ddp_ref = weakref .ref (ddp )
234234
235- # NOTE: Gloo may hang with this overlapping approach, so we require
236- # NCCL/HCCL/XCCL backend for now; see https://github.com/pytorch/pytorch/issues/62300
235+ # NOTE: Gloo may hang with this overlapping approach; see https://github.com/pytorch/pytorch/issues/62300
237236 pg = dist .get_backend (ddp_ref ().process_group ) # type: ignore[union-attr]
238- if ( pg != dist .Backend .NCCL ) and ( pg != "hccl" ) and ( pg != "xccl" ) :
237+ if pg == dist .Backend .GLOO :
239238 raise RuntimeError (
240- "Overlapping DDP with ZeRO using this approach currently requires "
241- "NCCL/HCCL/XCCL backend to avoid hangs"
239+ "Gloo backend using Overlapping DDP with ZeRO may meet hangs"
242240 )
243241
244242 if shard_buckets :
@@ -394,13 +392,11 @@ def hook_with_zero_step_interleaved(
394392 )
395393 ddp_ref = weakref .ref (ddp )
396394
397- # NOTE: Gloo may hang with this overlapping approach, so we require
398- # NCCL/HCCL/XCCL backend for now; see https://github.com/pytorch/pytorch/issues/62300
395+ # NOTE: Gloo may hang with this overlapping approach; see https://github.com/pytorch/pytorch/issues/62300
399396 pg = dist .get_backend (ddp_ref ().process_group ) # type: ignore[union-attr]
400- if ( pg != dist .Backend .NCCL ) and ( pg != "hccl" ) and ( pg != "xccl" ) :
397+ if pg == dist .Backend .GLOO :
401398 raise RuntimeError (
402- "Overlapping DDP with ZeRO using this approach currently requires "
403- "NCCL/HCCL/XCCL backend to avoid hangs"
399+ "Gloo backend using Overlapping DDP with ZeRO may meet hangs"
404400 )
405401
406402 if shard_buckets :
0 commit comments