Merge branch 'main' into release/v2.13.1

mrDzurb · web-flow · commit b49f170f6818 · 2025-03-13T09:17:45.000-07:00
diff --git a/ads/llm/__init__.py b/ads/llm/__init__.py
@@ -16,6 +16,7 @@
         OCIDataScienceEmbedding,
     )
     from ads.llm.langchain.plugins.llms.oci_data_science_model_deployment_endpoint import (
+        OCIModelDeploymentLLM,
         OCIModelDeploymentTGI,
         OCIModelDeploymentVLLM,
     )
diff --git a/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py b/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 
@@ -433,23 +433,6 @@ def _construct_json_body(self, prompt: str, param:dict) -> dict:
     model: str = DEFAULT_MODEL_NAME
     """The name of the model."""
 
-    max_tokens: int = 256
-    """Denotes the number of tokens to predict per generation."""
-
-    temperature: float = 0.2
-    """A non-negative float that tunes the degree of randomness in generation."""
-
-    k: int = -1
-    """Number of most likely tokens to consider at each step."""
-
-    p: float = 0.75
-    """Total probability mass of tokens to consider at each step."""
-
-    best_of: int = 1
-    """Generates best_of completions server-side and returns the "best"
-    (the one with the highest log probability per token).
-    """
-
     stop: Optional[List[str]] = None
     """Stop words to use when generating. Model output is cut off
     at the first occurrence of any of these substrings."""
@@ -466,14 +449,9 @@ def _llm_type(self) -> str:
     def _default_params(self) -> Dict[str, Any]:
         """Get the default parameters."""
         return {
-            "best_of": self.best_of,
-            "max_tokens": self.max_tokens,
             "model": self.model,
             "stop": self.stop,
             "stream": self.streaming,
-            "temperature": self.temperature,
-            "top_k": self.k,
-            "top_p": self.p,
         }
 
     @property
@@ -788,6 +766,23 @@ class OCIModelDeploymentTGI(OCIModelDeploymentLLM):
 
     """
 
+    max_tokens: int = 256
+    """Denotes the number of tokens to predict per generation."""
+
+    temperature: float = 0.2
+    """A non-negative float that tunes the degree of randomness in generation."""
+
+    k: int = -1
+    """Number of most likely tokens to consider at each step."""
+
+    p: float = 0.75
+    """Total probability mass of tokens to consider at each step."""
+
+    best_of: int = 1
+    """Generates best_of completions server-side and returns the "best"
+    (the one with the highest log probability per token).
+    """
+
     api: Literal["/generate", "/v1/completions"] = "/v1/completions"
     """Api spec."""
 
@@ -922,6 +917,20 @@ class OCIModelDeploymentVLLM(OCIModelDeploymentLLM):
 
     """
 
+    max_tokens: int = 256
+    """Denotes the number of tokens to predict per generation."""
+
+    temperature: float = 0.2
+    """A non-negative float that tunes the degree of randomness in generation."""
+
+    p: float = 0.75
+    """Total probability mass of tokens to consider at each step."""
+
+    best_of: int = 1
+    """Generates best_of completions server-side and returns the "best"
+    (the one with the highest log probability per token).
+    """
+
     n: int = 1
     """Number of output sequences to return for the given prompt."""
 

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@`
`16`	`16`	`OCIDataScienceEmbedding,`
`17`	`17`	`)`
`18`	`18`	`from ads.llm.langchain.plugins.llms.oci_data_science_model_deployment_endpoint import (`
	`19`	`+ OCIModelDeploymentLLM,`
`19`	`20`	`OCIModelDeploymentTGI,`
`20`	`21`	`OCIModelDeploymentVLLM,`
`21`	`22`	`)`