PMT (#3023)

TroyGarden · facebook-github-bot · commit 10f1c7d009a2 · 2025-05-31T11:50:46.000-07:00
Summary: Pull Request resolved: #3023 # context * `_test_sharding` is frequently used test function covering many TorchRec sharding test cases * the multiprocess env often introduces additional difficulties when debugging, espeically for kernel-size issues (the multiprocess env is not actually needed) * this change make it run on the main process when the `world_size==1` so that a simple `breakpoint()` can just work. Reviewed By: iamzainhuda Differential Revision: D74131796 fbshipit-source-id: ccc34ab589c0153cc0ce1187bba3df7dd63cbfc6
diff --git a/torchrec/distributed/test_utils/test_model_parallel.py b/torchrec/distributed/test_utils/test_model_parallel.py
@@ -163,33 +163,63 @@ def _test_sharding(
         lengths_dtype: torch.dtype = torch.int64,
     ) -> None:
         self._build_tables_and_groups(data_type=data_type)
-        self._run_multi_process_test(
-            callable=sharding_single_rank_test,
-            world_size=world_size,
-            local_size=local_size,
-            world_size_2D=world_size_2D,
-            node_group_size=node_group_size,
-            model_class=model_class,
-            tables=self.tables if pooling == PoolingType.SUM else self.mean_tables,
-            weighted_tables=self.weighted_tables if has_weighted_tables else None,
-            embedding_groups=self.embedding_groups,
-            sharders=sharders,
-            backend=backend,
-            optim=EmbOptimType.EXACT_SGD,
-            constraints=constraints,
-            qcomms_config=qcomms_config,
-            variable_batch_size=variable_batch_size,
-            apply_optimizer_in_backward_config=apply_optimizer_in_backward_config,
-            variable_batch_per_feature=variable_batch_per_feature,
-            global_constant_batch=global_constant_batch,
-            use_inter_host_allreduce=use_inter_host_allreduce,
-            allow_zero_batch_size=allow_zero_batch_size,
-            custom_all_reduce=custom_all_reduce,
-            use_offsets=use_offsets,
-            indices_dtype=indices_dtype,
-            offsets_dtype=offsets_dtype,
-            lengths_dtype=lengths_dtype,
-        )
+        # directly run the test with single process
+        if world_size == 1:
+            sharding_single_rank_test(
+                rank=0,
+                world_size=world_size,
+                local_size=local_size,
+                world_size_2D=world_size_2D,
+                node_group_size=node_group_size,
+                model_class=model_class,  # pyre-ignore[6]
+                tables=self.tables if pooling == PoolingType.SUM else self.mean_tables,
+                weighted_tables=self.weighted_tables if has_weighted_tables else None,
+                embedding_groups=self.embedding_groups,
+                sharders=sharders,
+                backend=backend,
+                optim=EmbOptimType.EXACT_SGD,
+                constraints=constraints,
+                qcomms_config=qcomms_config,
+                variable_batch_size=variable_batch_size,
+                apply_optimizer_in_backward_config=apply_optimizer_in_backward_config,
+                variable_batch_per_feature=variable_batch_per_feature,
+                global_constant_batch=global_constant_batch,
+                use_inter_host_allreduce=use_inter_host_allreduce,
+                allow_zero_batch_size=allow_zero_batch_size,
+                custom_all_reduce=custom_all_reduce,
+                use_offsets=use_offsets,
+                indices_dtype=indices_dtype,
+                offsets_dtype=offsets_dtype,
+                lengths_dtype=lengths_dtype,
+            )
+        else:
+            self._run_multi_process_test(
+                callable=sharding_single_rank_test,
+                world_size=world_size,
+                local_size=local_size,
+                world_size_2D=world_size_2D,
+                node_group_size=node_group_size,
+                model_class=model_class,
+                tables=self.tables if pooling == PoolingType.SUM else self.mean_tables,
+                weighted_tables=self.weighted_tables if has_weighted_tables else None,
+                embedding_groups=self.embedding_groups,
+                sharders=sharders,
+                backend=backend,
+                optim=EmbOptimType.EXACT_SGD,
+                constraints=constraints,
+                qcomms_config=qcomms_config,
+                variable_batch_size=variable_batch_size,
+                apply_optimizer_in_backward_config=apply_optimizer_in_backward_config,
+                variable_batch_per_feature=variable_batch_per_feature,
+                global_constant_batch=global_constant_batch,
+                use_inter_host_allreduce=use_inter_host_allreduce,
+                allow_zero_batch_size=allow_zero_batch_size,
+                custom_all_reduce=custom_all_reduce,
+                use_offsets=use_offsets,
+                indices_dtype=indices_dtype,
+                offsets_dtype=offsets_dtype,
+                lengths_dtype=lengths_dtype,
+            )
 
     def _test_dynamic_sharding(
         self,