1
1
#!/usr/bin/env python
2
2
3
- # Copyright (c) 2024 Oracle and/or its affiliates.
3
+ # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
4
4
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5
5
6
6
@@ -433,23 +433,6 @@ def _construct_json_body(self, prompt: str, param:dict) -> dict:
433
433
model : str = DEFAULT_MODEL_NAME
434
434
"""The name of the model."""
435
435
436
- max_tokens : int = 256
437
- """Denotes the number of tokens to predict per generation."""
438
-
439
- temperature : float = 0.2
440
- """A non-negative float that tunes the degree of randomness in generation."""
441
-
442
- k : int = - 1
443
- """Number of most likely tokens to consider at each step."""
444
-
445
- p : float = 0.75
446
- """Total probability mass of tokens to consider at each step."""
447
-
448
- best_of : int = 1
449
- """Generates best_of completions server-side and returns the "best"
450
- (the one with the highest log probability per token).
451
- """
452
-
453
436
stop : Optional [List [str ]] = None
454
437
"""Stop words to use when generating. Model output is cut off
455
438
at the first occurrence of any of these substrings."""
@@ -466,14 +449,9 @@ def _llm_type(self) -> str:
466
449
def _default_params (self ) -> Dict [str , Any ]:
467
450
"""Get the default parameters."""
468
451
return {
469
- "best_of" : self .best_of ,
470
- "max_tokens" : self .max_tokens ,
471
452
"model" : self .model ,
472
453
"stop" : self .stop ,
473
454
"stream" : self .streaming ,
474
- "temperature" : self .temperature ,
475
- "top_k" : self .k ,
476
- "top_p" : self .p ,
477
455
}
478
456
479
457
@property
@@ -788,6 +766,23 @@ class OCIModelDeploymentTGI(OCIModelDeploymentLLM):
788
766
789
767
"""
790
768
769
+ max_tokens : int = 256
770
+ """Denotes the number of tokens to predict per generation."""
771
+
772
+ temperature : float = 0.2
773
+ """A non-negative float that tunes the degree of randomness in generation."""
774
+
775
+ k : int = - 1
776
+ """Number of most likely tokens to consider at each step."""
777
+
778
+ p : float = 0.75
779
+ """Total probability mass of tokens to consider at each step."""
780
+
781
+ best_of : int = 1
782
+ """Generates best_of completions server-side and returns the "best"
783
+ (the one with the highest log probability per token).
784
+ """
785
+
791
786
api : Literal ["/generate" , "/v1/completions" ] = "/v1/completions"
792
787
"""Api spec."""
793
788
@@ -922,6 +917,20 @@ class OCIModelDeploymentVLLM(OCIModelDeploymentLLM):
922
917
923
918
"""
924
919
920
+ max_tokens : int = 256
921
+ """Denotes the number of tokens to predict per generation."""
922
+
923
+ temperature : float = 0.2
924
+ """A non-negative float that tunes the degree of randomness in generation."""
925
+
926
+ p : float = 0.75
927
+ """Total probability mass of tokens to consider at each step."""
928
+
929
+ best_of : int = 1
930
+ """Generates best_of completions server-side and returns the "best"
931
+ (the one with the highest log probability per token).
932
+ """
933
+
925
934
n : int = 1
926
935
"""Number of output sequences to return for the given prompt."""
927
936
0 commit comments