triton-inference-server · nv-braf · Feb 12, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/genai-perf/genai_perf/config/generate/perf_analyzer_config.py b/genai-perf/genai_perf/config/generate/perf_analyzer_config.py
diff --git a/genai-perf/genai_perf/config/input/base_config.py b/genai-perf/genai_perf/config/input/base_config.py
@@ -13,7 +13,10 @@
 # limitations under the License.
 
 from copy import deepcopy
+from enum import Enum
+from pathlib import PosixPath
 
+from genai_perf.config.input.config_defaults import Range
 from genai_perf.config.input.config_field import ConfigField
 
 
@@ -44,6 +47,32 @@
 
         return self._fields[name]
 
+    def to_json(self):
+        config_dict = {}
+        for key, value in self._values.items():
+            if isinstance(value, BaseConfig):
+                config_dict[key] = value.to_json()
+            else:
+                config_dict[key] = self._get_legal_json_value(value)
+
+        return config_dict
+
+    def _get_legal_json_value(self, value):
+        if isinstance(value, Enum):
+            return value.name.lower()
+        elif isinstance(value, PosixPath):
+            return str(value)
+        elif hasattr(value, "__dict__"):
+            return value.__dict__()
+        elif isinstance(value, dict):
+            config_dict = {}
+            for k, v in value.items():
+                config_dict[k] = self._get_legal_json_value(v)
+
+            return config_dict
+        else:
+            return value
+
     def __setattr__(self, name, value):
         # This prevents recursion failure in __init__
         if name == "_fields" or name == "_values" or name == "_children":

diff --git a/genai-perf/genai_perf/config/input/config_analyze.py b/genai-perf/genai_perf/config/input/config_analyze.py
@@ -29,7 +29,7 @@ class ConfigAnalyze(BaseConfig):
     Describes the configuration for the analyze subcommand
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         super().__init__()
         self.sweep_parameters: Any = ConfigField(
             default=AnalyzeDefaults.SWEEP_PARAMETER, choices=all_parameters

diff --git a/genai-perf/genai_perf/config/input/config_command.py b/genai-perf/genai_perf/config/input/config_command.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 from enum import Enum, auto
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, TypeAlias, Union
 
+import genai_perf.logging as logging
 from genai_perf.config.input.base_config import BaseConfig
 from genai_perf.config.input.config_analyze import ConfigAnalyze
 from genai_perf.config.input.config_defaults import (
@@ -28,18 +30,20 @@
 from genai_perf.config.input.config_output import ConfigOutput
 from genai_perf.config.input.config_perf_analyzer import ConfigPerfAnalyzer
 from genai_perf.config.input.config_tokenizer import ConfigTokenizer
-from genai_perf.inputs.input_constants import ModelSelectionStrategy, OutputFormat
+from genai_perf.inputs.input_constants import OutputFormat
 from genai_perf.types import CheckpointObject, ModelName
 
 
 class Subcommand(Enum):
-    COMPARE = auto()
-    PROFILE = auto()
-    ANALYZE = auto()
+    COMPARE = "compare"
+    PROFILE = "profile"
+    ANALYZE = "analyze"
 
 
 ConfigRangeOrList: TypeAlias = Optional[Union[Range, List[int]]]
 
+logger = logging.getLogger(__name__)
+
 
 class ConfigCommand(BaseConfig):
     """
@@ -66,7 +70,7 @@ def __init__(self, user_config: Optional[Dict[str, Any]] = None):
     # Top-Level Parsing Methods
     ###########################################################################
     def _parse_yaml(self, user_config: Optional[Dict[str, Any]] = None) -> None:
-        if user_config is None:
+        if not user_config:
             return
 
         for key, value in user_config.items():
@@ -89,6 +93,11 @@ def _parse_yaml(self, user_config: Optional[Dict[str, Any]] = None) -> None:
                     f"User Config: {key} is not a valid top-level parameter"
                 )
 
+        self._infer_settings()
+        self._check_for_illegal_combinations()
+        self._set_artifact_directory()
+        self._set_profile_export_file()
+
     def _parse_model_names(self, model_names: str) -> None:
         if type(model_names) is str:
             self.model_names = [model_names]
@@ -97,6 +106,95 @@ def _parse_model_names(self, model_names: str) -> None:
         else:
             raise ValueError("User Config: model_names must be a string or list")
 
+    ###########################################################################
+    # Infer Methods
+    ###########################################################################
+    def _infer_settings(self) -> None:
+        self.endpoint.infer_settings(model_name=self.model_names[0])
+        self.input.infer_settings()
+
+    ###########################################################################
+    # Illegal Combination Methods
+    ###########################################################################
+    def _check_for_illegal_combinations(self) -> None:
+        self._check_output_tokens_and_service_kind()
+        self._check_output_format_and_generate_plots()
+
+        self.endpoint.check_for_illegal_combinations()
+
+    def _check_output_tokens_and_service_kind(self) -> None:
+        if self.endpoint.service_kind not in ["triton", "tensorrtllm_engine"]:
+            if self.input.output_tokens.get_field("deterministic").is_set_by_user:
+                raise ValueError(
+                    "User Config: input.output_tokens.deterministic is only supported with Triton or TensorRT-LLM Engine service kinds"
+                )
+
+    def _check_output_format_and_generate_plots(self) -> None:
+        if self.endpoint.output_format in [
+            OutputFormat.IMAGE_RETRIEVAL,
+            OutputFormat.NVCLIP,
+            OutputFormat.OPENAI_EMBEDDINGS,
+            OutputFormat.RANKINGS,
+        ]:
+            if self.output.generate_plots:
+                raise ValueError(
+                    "User Config: generate_plots is not supported with the {self.endpoint.output_format} output format"
+                )
+
+    ###########################################################################
+    # Set Path Methods
+    ###########################################################################
+    def _set_artifact_directory(self) -> None:
+        if not self.output.get_field("artifact_directory").is_set_by_user:
+            model_name = self.model_names[0]
+
+            # Preprocess Huggingface model names that include '/' in their model name.
+            if (model_name is not None) and ("/" in model_name):
+                filtered_name = "_".join(model_name.split("/"))
+                logger.info(
+                    f"Model name '{model_name}' cannot be used to create artifact "
+                    f"directory. Instead, '{filtered_name}' will be used."
+                )
+                name = [f"{filtered_name}"]
+            else:
+                name = [f"{model_name}"]
+
+            if self.endpoint.service_kind == "openai":
+                name += [f"{self.endpoint.service_kind}-{self.endpoint.type}"]
+            elif self.endpoint.service_kind == "triton":
+                name += [
+                    f"{self.endpoint.service_kind}-{self.endpoint.backend.to_lowercase()}"
+                ]
+            elif self.endpoint.service_kind == "tensorrtllm_engine":
+                name += [f"{self.endpoint.service_kind}"]
+            else:
+                raise ValueError(
+                    f"Unknown service kind '{self.endpoint.service_kind}'."
+                )
+
+            if "concurrency" in self.perf_analyzer.stimulus:
+                concurrency = self.perf_analyzer.stimulus["concurrency"]
+                name += [f"concurrency{concurrency}"]
+            elif "request_rate" in self.perf_analyzer.stimulus:
+                request_rate = self.perf_analyzer.stimulus["request_rate"]
+                name += [f"request_rate{request_rate}"]
+
+            self.output.artifact_directory = self.output.artifact_directory / Path(
+                "-".join(name)
+            )
+
+    def _set_profile_export_file(self) -> None:
+        if self.output.get_field("profile_export_file").is_set_by_user:
+            if Path(self.output.profile_export_file).parent != Path(""):
+                raise ValueError(
+                    "Please use artifact_directory option to define intermediary paths to "
+                    "the profile_export_file."
+                )
+
+        self.output.profile_export_file = (
+            self.output.artifact_directory / self.output.profile_export_file
+        )
+
     ###########################################################################
     # Utility Methods
     ###########################################################################

diff --git a/genai-perf/genai_perf/config/input/config_defaults.py b/genai-perf/genai_perf/config/input/config_defaults.py
@@ -27,6 +27,9 @@ class Range:
     min: int
     max: int
 
+    def __dict__(self):
+        return {"min": self.min, "max": self.max}
+
 
 @dataclass(frozen=True)
 class TopLevelDefaults:
@@ -59,17 +62,17 @@ class EndPointDefaults:
     TYPE = ""
     SERVICE_KIND = "triton"
     STREAMING = False
-    SERVER_METRICS_URL = ""
-    URL = ""
+    SERVER_METRICS_URL = ["http://localhost:8002/metrics"]
+    URL = "http://localhost:8001"
 
 
 @dataclass(frozen=True)
 class PerfAnalyzerDefaults:
-    PATH = "./perf_analyzer"
+    PATH = "perf_analyzer"
+    VERBOSE = False
     STIMULUS = {"concurrency": 1}
     STABILITY_PERCENTAGE = 999
     MEASUREMENT_INTERVAL = 10000
-    SKIP_ARGS = False
 
 
 @dataclass(frozen=True)