diff --git a/backend/src/evalassist/const.py b/backend/src/evalassist/const.py index 9e7b1735..153d6369 100644 --- a/backend/src/evalassist/const.py +++ b/backend/src/evalassist/const.py @@ -52,6 +52,8 @@ class ExtendedEvaluatorMetadata(EvaluatorMetadata): name: EvaluatorNameEnum | ExtendedEvaluatorNameEnum custom_model_name: Optional[str] = None custom_model_path: Optional[str] = None + custom_params: Optional[dict] = None + provider_specific_params: Optional[dict] = None providers: list[ModelProviderEnum | ExtendedModelProviderEnum] def __init__( @@ -60,10 +62,14 @@ def __init__( providers: ModelProviderEnum | ExtendedModelProviderEnum, custom_model_name: Optional[str] = None, custom_model_path: Optional[str] = None, + custom_params: Optional[dict] = None, + provider_specific_params: Optional[dict] = None, ): super().__init__(name, providers) self.custom_model_name = custom_model_name self.custom_model_path = custom_model_path + self.custom_params = custom_params + self.provider_specific_params = provider_specific_params EXTENDED_EVALUATORS_METADATA: list[ExtendedEvaluatorMetadata] = [ diff --git a/backend/src/evalassist/evaluators/unitxt.py b/backend/src/evalassist/evaluators/unitxt.py index 9dac02d2..fadb01e7 100644 --- a/backend/src/evalassist/evaluators/unitxt.py +++ b/backend/src/evalassist/evaluators/unitxt.py @@ -30,6 +30,7 @@ ExtendedModelProviderEnum, ) from ..utils import ( + dict_deep_merge, get_evaluator_metadata_wrapper, get_inference_engine, get_model_name_from_evaluator, @@ -72,18 +73,22 @@ def evaluate( ): model_name = get_model_name_from_evaluator( self.evaluator_metadata, - provider, ) + custom_params = { + "use_cache": UNITXT_CACHE_ENABLED, + "seed": 42, + "temperature": 0, + } + + dict_deep_merge(custom_params, self.evaluator_metadata.custom_params) + inference_engine = get_inference_engine( credentials, provider, model_name, - custom_params={ - "use_cache": UNITXT_CACHE_ENABLED, - "seed": 42, - "temperature": 0, - }, + custom_params, + provider_specific_params=self.evaluator_metadata.provider_specific_params, ) context_variables_list = [instance.context_variables for instance in instances] @@ -353,7 +358,6 @@ def evaluate( self.evaluator_metadata = get_evaluator_metadata_wrapper(self.evaluator_name) model_name = get_model_name_from_evaluator( self.evaluator_metadata, - provider, ) inference_engine = get_inference_engine( credentials, provider, model_name, custom_params diff --git a/backend/src/evalassist/main.py b/backend/src/evalassist/main.py index 8aac837e..f8728ccb 100644 --- a/backend/src/evalassist/main.py +++ b/backend/src/evalassist/main.py @@ -446,7 +446,7 @@ def download_notebook(params: NotebookParams, background_tasks: BackgroundTasks) evaluator_metadata = get_evaluator_metadata_wrapper( evaluator_name, custom_model_name ) - model_name = get_model_name_from_evaluator(evaluator_metadata, params.provider) + model_name = get_model_name_from_evaluator(evaluator_metadata) params.model_name = model_name if params.evaluator_type == EvaluatorTypeEnum.DIRECT: nb = DirectEvaluationNotebook(params).generate_notebook() diff --git a/backend/src/evalassist/synthetic_example_generation/generate.py b/backend/src/evalassist/synthetic_example_generation/generate.py index 6b331692..9d066f0d 100644 --- a/backend/src/evalassist/synthetic_example_generation/generate.py +++ b/backend/src/evalassist/synthetic_example_generation/generate.py @@ -71,7 +71,6 @@ def __init__( ) model_name = get_model_name_from_evaluator( evaluator_metadata, - self.provider, ) self.inference_engine = get_inference_engine( self.llm_provider_credentials, @@ -334,7 +333,6 @@ def __init__( ) model_name = get_model_name_from_evaluator( evaluator_metadata, - self.provider, ) self.inference_engine = get_inference_engine( self.llm_provider_credentials, diff --git a/backend/src/evalassist/utils.py b/backend/src/evalassist/utils.py index a03e793e..f00d5149 100644 --- a/backend/src/evalassist/utils.py +++ b/backend/src/evalassist/utils.py @@ -56,11 +56,9 @@ def get_custom_models(): with open(CUSTOM_MODELS_PATH, "r", encoding="utf-8") as file: try: custom_models = json.load(file) - from . import root_pkg_logger - root_pkg_logger.debug( - "Loaded the following custom models", - json.dumps(custom_models, indent=2), + logger.debug( + f"Loaded the following custom models:\n{json.dumps(custom_models, indent=2)}", ) return custom_models except Exception: @@ -87,6 +85,14 @@ def get_enum_by_value(value: str, enum: Enum) -> Enum: return None +def value_exists_in_enum(value: str, enum: Enum) -> bool: + try: + enum(value) + return True + except ValueError: + return False + + def get_local_hf_inference_engine_params( model_name: str, ): @@ -127,9 +133,13 @@ def get_cross_inference_engine_params( provider = ModelProviderEnum.OPENAI if custom_params is not None: - inference_engine_params.update(custom_params) + inference_engine_params = dict_deep_merge( + inference_engine_params, custom_params + ) if provider_specific_params is not None: - provider_specific_args.update(provider_specific_params) + provider_specific_args = dict_deep_merge( + provider_specific_args, provider_specific_params + ) inference_engine_params["model"] = model_name inference_engine_params["provider"] = provider.value inference_engine_params["provider_specific_args"] = provider_specific_args @@ -150,7 +160,6 @@ def get_cross_inference_engine( custom_params=custom_params, provider_specific_params=provider_specific_params, ) - return CrossProviderInferenceEngine(**inference_engine_params) @@ -215,7 +224,6 @@ def get_inference_engine( def get_model_name_from_evaluator( evaluator_metadata: ExtendedEvaluatorMetadata, - provider: str, ) -> str: model_name = EXTENDED_EVALUATOR_TO_MODEL_ID.get(evaluator_metadata.name, None) return ( @@ -249,18 +257,28 @@ def get_evaluator_metadata_wrapper( ]: raise ValueError("The specified custom model was not found") - custom_model = [ - custom_model - for custom_model in custom_models - if custom_model["name"] == custom_model_name - ][0] + custom_model = next( + iter( + custom_model + for custom_model in custom_models + if custom_model["name"] == custom_model_name + ) + ) return ExtendedEvaluatorMetadata( name=evaluator_name, - custom_model_name=custom_model["name"], + custom_model_name=custom_model["name"] + if "name" in custom_model + else custom_model["path"], custom_model_path=custom_model["path"], + custom_params=custom_model["custom_params"] + if "custom_params" in custom_model + else {}, + provider_specific_params=custom_model["provider_specific_params"] + if "provider_specific_params" in custom_model + else {}, providers=[ ExtendedModelProviderEnum(p) - if p in ExtendedModelProviderEnum + if value_exists_in_enum(p, ExtendedModelProviderEnum) else ModelProviderEnum(p) for p in custom_model["providers"] ], @@ -429,3 +447,24 @@ def clean_object(results: dict | list): } else: return results + + +def dict_deep_merge(a: dict | None, b: dict | None) -> dict: + """ + Return a new dict that deep-merges b into a: + - Values from b overwrite those in a. + - Nested dicts are merged recursively. + - Other types simply get replaced. + """ + if a is None: + return b + if b is None: + return a + result = a.copy() + for key, b_val in b.items(): + a_val = result.get(key) + if isinstance(a_val, dict) and isinstance(b_val, dict): + result[key] = dict_deep_merge(a_val, b_val) + else: + result[key] = b_val + return result