NVIDIA · greg-kwasniewski1 · Oct 1, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025
@@ -73,8 +73,10 @@ transforms:
   detect_sharding:
     stage: sharding
     simple_shard_only: false
-    use_sharding_from_factory: false
-    support_partial_config: false
+    # sharding_source: ['factory', 'custom', 'heuristic']
+    sharding_source: ['heuristic']
+    support_partial_config: true
+    # custom_sharding_config: 'tp_sharding.yaml'
     sharding_dims: ['tp', 'ep', 'bmm']
     requires_shape_prop: true
   # TODO: (hg) need to ensure run_shape_prop after sharding.

@@ -163,17 +163,6 @@ class AutoDeployConfig(DynamicYamlMixInForSettings, BaseSettings):
         "If False, auto-detect and use column+row (all_reduce) sharding when possible.",
     )
 
-    use_sharding_from_factory: bool = Field(
-        default=False,
-        description="If True, use sharding from the model factory. If False, use sharding from the "
-        "AutoDeployConfig.",
-    )
-
-    sharding_dims: List[str] = Field(
-        default=["tp", "ep", "dp"],
-        description="The sharding methods to apply by the heuristic sharding stage.",
-    )
-
     compile_backend: Literal["torch-simple", "torch-compile", "torch-cudagraph", "torch-opt"] = (
         Field(
             default="torch-compile",

@@ -155,6 +155,16 @@ def from_last_info(cls, info: "TransformInfo") -> "TransformInfo":
             has_valid_shapes=info.has_valid_shapes,
         )
 
+    # overload += operator to concatenate TransformInfo objects
+    def __iadd__(self, other: "TransformInfo") -> "TransformInfo":
+        # since TransformInfo is frozen, instead, we return a new TransformInfo object
+        return TransformInfo(
+            skipped=self.skipped & other.skipped,
+            num_matches=self.num_matches + other.num_matches,
+            is_clean=self.is_clean & other.is_clean,
+            has_valid_shapes=self.has_valid_shapes & other.has_valid_shapes,
+        )
+
     def __or__(self, other: "TransformInfo") -> "TransformInfo":
         """Merge two TransformInfo objects."""
         return TransformInfo(

@@ -1,14 +1,17 @@
 """Sharding config definitions for the inference optimizer."""
 
+import json
 import math
 import operator
 from abc import ABC, abstractmethod
-from enum import IntEnum
+from enum import Enum, IntEnum
 from functools import partial
+from pathlib import Path
 from typing import Any, Callable, Dict, List, Literal, Optional, Sequence
 
 import torch
 import torch.nn as nn
+import yaml
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 from torch.fx import GraphModule, Node
 
@@ -834,16 +837,35 @@ def _resolve_ep_cls_from_node(node: Node) -> type[EPShardingInfo]:
     return EPShardingInfo
 
 
+class ShardingSource(Enum):
+    """Enum for sharding source."""
+
+    HEURISTIC = "heuristic"
+    FACTORY = "factory"
+    CUSTOM = "custom"
+
+
+class ShardingDim(Enum):
+    """Enum for sharding dimension."""
+
+    TP = "tp"
+    EP = "ep"
+    BMM = "bmm"
+
+
 class ShardingConfig(BaseModel):
     """Configuration for sharding the model."""
 
     factory_source: ShardingConfigSource = Field(default=ShardingConfigSource.UNKNOWN)
     rank: int = Field(default=0)
     world_size: int = Field(default=1)
     predefined_config: Optional[Dict[str, Any]] = None
+    custom_sharding_config: Optional[Dict[str, Any]] = None
     simple_shard_only: bool = Field(default=False)
-    use_sharding_from_factory: bool = False
     support_partial_config: bool = False
+    sharding_source: List[ShardingSource] = Field(
+        default_factory=lambda: [ShardingSource.HEURISTIC]
+    )
     sharding_dims: List[str] = Field(default_factory=list)
     tp_transforms: List[TPShardingInfo] = Field(default_factory=list)
     bmm_transforms: List[BMMShardingInfo] = Field(default_factory=list)
@@ -859,6 +881,41 @@ def _validate_and_normalize(self):
             self.validate_config()
         return self
 
+    def read_custom_sharding_config(self, config_path: str) -> bool:
+        """Read the custom sharding config from the given path.
+
+        Supports both JSON and YAML file formats. The format is auto-detected
+        based on the file extension (.json, .yaml, .yml).
+        """
+        path = Path(config_path)
+
+        if not path.exists():
+            ad_logger.warning(f"Sharding config file not found: {config_path}")
+            return False
+
+        try:
+            with open(config_path, "r") as f:
+                if path.suffix.lower() in [".yaml", ".yml"]:
+                    self.custom_sharding_config = yaml.safe_load(f)
+                elif path.suffix.lower() == ".json":
+                    self.custom_sharding_config = json.load(f)
+                else:
+                    ad_logger.warning(f"Unsupported sharding config file format: {path.suffix}")
+        except Exception as e:
+            ad_logger.warning(f"Failed to read sharding config file: {e}")
+            return False
+        return True
+
+    def append_TP(self, tp_transform: TPShardingInfo) -> bool:
+        """Append a TP transform only if that node was
+        not sharded before. Do not overwrite existing transforms.
+        """
+        for existing_transform in self.tp_transforms:
+            if existing_transform.target_node == tp_transform.target_node:
+                return False
+        self.tp_transforms.append(tp_transform)
+        return True
+
     def validate_config(self) -> bool:
         if self.factory_source != ShardingConfigSource.HUGGINGFACE:
             ad_logger.warning(

@@ -274,4 +274,6 @@ def run_sharding_pattern_detection_test(
     print("detected_set", detected_set)
     print("expected_set", expected_set)
 
-    assert detected_set == expected_set, "Expected sharding pattern does not match detected pattern"
+    assert detected_set == expected_set, (
+        f"Expected sharding pattern does not match detected pattern: {detected_set} != {expected_set}"
+    )
@@ -66,8 +66,9 @@ def _run_job(
         {
             "detect_sharding": {
                 "stage": "sharding",
-                "use_sharding_from_factory": False,
+                "sharding_source": ["heuristic"],
                 "sharding_dims": ["bmm"],
+                "support_partial_config": False,
             },
             "sharding_transform_executor": {
                 "stage": "sharding",
@@ -128,7 +129,8 @@ def _run_pattern_detection_job(
         {
             "detect_sharding": {
                 "stage": "sharding",
-                "use_sharding_from_factory": False,
+                "sharding_source": ["heuristic"],
+                "support_partial_config": False,
             },
         },
     )

@@ -50,8 +50,9 @@ def _get_expected_num_params(rank: int, world_size: int, num_p_og: int) -> int:
         {
             "detect_sharding": {
                 "stage": "sharding",
-                "use_sharding_from_factory": False,
+                "sharding_source": ["heuristic"],
                 "sharding_dims": ["ep"],
+                "support_partial_config": False,
             },
             "sharding_transform_executor": {
                 "stage": "sharding",
@@ -118,7 +119,8 @@ def _run_pattern_detection_job(num_experts: int, rank: int, world_size: int) ->
         {
             "detect_sharding": {
                 "stage": "sharding",
-                "use_sharding_from_factory": False,
+                "sharding_source": ["heuristic"],
+                "support_partial_config": False,
             },
         },
     )

@@ -1,12 +1,15 @@
 """Tests for basic graph sharding."""
 
+# add to the path directory 4 directories up
+import os
 from functools import partial
 from typing import Type
 
 import pytest
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import yaml
 from _dist_test_utils import get_device_counts
 from _graph_test_helpers import run_sharding_pattern_detection_test, run_test_transformed_gm
 from _model_test_utils import FakeFP8Linear
@@ -193,12 +196,22 @@ def verify_local_weight_sizes(gm) -> bool:
     op_expected = getattr(torch.ops.auto_deploy, dist_op_expected)
 
     gm = torch_export_to_gm(model, args=(x,), clone=True)
+    sharding_source = ["custom"] if from_config else ["heuristic"]
+
+    if sharding_source == ["custom"]:
+        # If the file does not exist, write predefined_config to tp_sharding.yaml file
+        if not os.path.exists("tp_sharding.yaml"):
+            with open("tp_sharding.yaml", "w") as f:
+                yaml.dump(predefined_config, f, sort_keys=False)
     gm_transformed = InferenceOptimizer(
         None,
         {
             "detect_sharding": {
                 "stage": "sharding",
-                "use_sharding_from_factory": from_config,
+                "sharding_source": sharding_source,
+                "custom_sharding_config": "tp_sharding.yaml",
+                "support_partial_config": False,
+                "sharding_dims": ["tp"],
             },
             "sharding_transform_executor": {
                 "stage": "sharding",
@@ -338,23 +351,33 @@ def _run_pattern_detection_job(
                         )
                     )
 
+    sharding_source = ["custom"] if from_config else ["heuristic"]
+
+    if sharding_source == ["custom"]:
+        # If the file does not exist, write predefined_config to tp_sharding.yaml file
+        if not os.path.exists("tp_sharding.yaml"):
+            with open("tp_sharding.yaml", "w") as f:
+                yaml.dump(predefined_config, f, sort_keys=False)
+
     # get detected transformations
     optimizer = InferenceOptimizer(
         None,
         {
             "detect_sharding": {
                 "stage": "sharding",
-                "use_sharding_from_factory": from_config,
+                "sharding_source": sharding_source,
+                "custom_sharding_config": "tp_sharding.yaml",
+                "support_partial_config": False,
+                "sharding_dims": ["tp"],
             },
         },
     )
     optimizer.shared_config.local_rank = rank
     optimizer.shared_config.world_size = world_size
+    optimizer.shared_config.sharding_config.predefined_config = predefined_config
     _ = optimizer(None, gm)
     detected_transformations = optimizer.shared_config.sharding_config.tp_transforms
 
-    print(f"detected_transformations: {detected_transformations}")
-    print(f"expected_transformations: {expected_transformations}")
     # Run pattern detection test
     run_sharding_pattern_detection_test(detected_transformations, expected_transformations)
 
@@ -409,7 +432,3 @@ def test_sharding_pattern_detection(
     No need to run distributed job, can be run on single process.
     """
     _run_pattern_detection_job(model_cls, bias, 0, world_size, from_config)
-
-
-if __name__ == "__main__":
-    _run_pattern_detection_job(nn.Linear, False, 0, 8, False)