intel · Chao1Han · Sep 16, 2025 · Copilot · Sep 16, 2025
diff --git a/test/xpu/distributed/test_c10d_xccl.py b/test/xpu/distributed/test_c10d_xccl.py
@@ -417,6 +417,32 @@ def _test_broadcast_coalesced(self, process_group, device, root_rank):
         if self.rank != root_rank:
             self.assertEqual(tensors, target)
 
+    def _test_pass_xccl_options(self, pg_opts):
+        store = c10d.FileStore(self.file_name, self.world_size)
+        # Test init_process_group accepts options
+        dist.init_process_group(
+            "xccl",
+            world_size=self.world_size,
+            rank=self.rank,
+            store=store,
+            pg_options=pg_opts,
+        )
+
+        # Test with new_group
+        pg = c10d.new_group([0, 1], pg_options=pg_opts)
+        # test the process group works as expected
+        t = torch.tensor([self.rank + 1] * 10).xpu(self.rank)
+        pg.allreduce(t).wait()
+        expected_tensor = torch.tensor([3] * 10).xpu(self.rank)
-        expected_tensor = torch.tensor([3] * 10).xpu(self.rank)
+        # The expected value is the sum of (rank + 1) for all ranks in the group
+        expected_value = sum(rank + 1 for rank in range(self.world_size))
+        expected_tensor = torch.tensor([expected_value] * 10).xpu(self.rank)
-        expected_tensor = torch.tensor([3] * 10).xpu(self.rank)
+        # The expected value is the sum of (rank + 1) for all ranks in the group
+        expected_value = sum(rank + 1 for rank in range(self.world_size))
+        expected_tensor = torch.tensor([expected_value] * 10).xpu(self.rank)
+        self.assertEqual(expected_tensor, t)
+
+    @requires_xccl()
+    @skip_if_lt_x_gpu(2)
+    def test_pass_xccl_options_high_priority_stream(self):
+        pg_opts = c10d.ProcessGroupXCCL.Options()
+        pg_opts.is_high_priority_stream = True
+        self._test_pass_xccl_options(pg_opts)
+
     @requires_xccl()
     @skip_if_lt_x_gpu(2)
     def test_broadcast_coalesced_xccl(self):