Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/src/evalassist/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class ExtendedEvaluatorMetadata(EvaluatorMetadata):
name: EvaluatorNameEnum | ExtendedEvaluatorNameEnum
custom_model_name: Optional[str] = None
custom_model_path: Optional[str] = None
custom_params: Optional[dict] = None
provider_specific_params: Optional[dict] = None
providers: list[ModelProviderEnum | ExtendedModelProviderEnum]

def __init__(
Expand All @@ -60,10 +62,14 @@ def __init__(
providers: ModelProviderEnum | ExtendedModelProviderEnum,
custom_model_name: Optional[str] = None,
custom_model_path: Optional[str] = None,
custom_params: Optional[dict] = None,
provider_specific_params: Optional[dict] = None,
):
super().__init__(name, providers)
self.custom_model_name = custom_model_name
self.custom_model_path = custom_model_path
self.custom_params = custom_params
self.provider_specific_params = provider_specific_params


EXTENDED_EVALUATORS_METADATA: list[ExtendedEvaluatorMetadata] = [
Expand Down
18 changes: 11 additions & 7 deletions backend/src/evalassist/evaluators/unitxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ExtendedModelProviderEnum,
)
from ..utils import (
dict_deep_merge,
get_evaluator_metadata_wrapper,
get_inference_engine,
get_model_name_from_evaluator,
Expand Down Expand Up @@ -72,18 +73,22 @@ def evaluate(
):
model_name = get_model_name_from_evaluator(
self.evaluator_metadata,
provider,
)

custom_params = {
"use_cache": UNITXT_CACHE_ENABLED,
"seed": 42,
"temperature": 0,
}

dict_deep_merge(custom_params, self.evaluator_metadata.custom_params)

inference_engine = get_inference_engine(
credentials,
provider,
model_name,
custom_params={
"use_cache": UNITXT_CACHE_ENABLED,
"seed": 42,
"temperature": 0,
},
custom_params,
provider_specific_params=self.evaluator_metadata.provider_specific_params,
)

context_variables_list = [instance.context_variables for instance in instances]
Expand Down Expand Up @@ -353,7 +358,6 @@ def evaluate(
self.evaluator_metadata = get_evaluator_metadata_wrapper(self.evaluator_name)
model_name = get_model_name_from_evaluator(
self.evaluator_metadata,
provider,
)
inference_engine = get_inference_engine(
credentials, provider, model_name, custom_params
Expand Down
2 changes: 1 addition & 1 deletion backend/src/evalassist/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def download_notebook(params: NotebookParams, background_tasks: BackgroundTasks)
evaluator_metadata = get_evaluator_metadata_wrapper(
evaluator_name, custom_model_name
)
model_name = get_model_name_from_evaluator(evaluator_metadata, params.provider)
model_name = get_model_name_from_evaluator(evaluator_metadata)
params.model_name = model_name
if params.evaluator_type == EvaluatorTypeEnum.DIRECT:
nb = DirectEvaluationNotebook(params).generate_notebook()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def __init__(
)
model_name = get_model_name_from_evaluator(
evaluator_metadata,
self.provider,
)
self.inference_engine = get_inference_engine(
self.llm_provider_credentials,
Expand Down Expand Up @@ -334,7 +333,6 @@ def __init__(
)
model_name = get_model_name_from_evaluator(
evaluator_metadata,
self.provider,
)
self.inference_engine = get_inference_engine(
self.llm_provider_credentials,
Expand Down
69 changes: 54 additions & 15 deletions backend/src/evalassist/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ def get_custom_models():
with open(CUSTOM_MODELS_PATH, "r", encoding="utf-8") as file:
try:
custom_models = json.load(file)
from . import root_pkg_logger

root_pkg_logger.debug(
"Loaded the following custom models",
json.dumps(custom_models, indent=2),
logger.debug(
f"Loaded the following custom models:\n{json.dumps(custom_models, indent=2)}",
)
return custom_models
except Exception:
Expand All @@ -87,6 +85,14 @@ def get_enum_by_value(value: str, enum: Enum) -> Enum:
return None


def value_exists_in_enum(value: str, enum: Enum) -> bool:
try:
enum(value)
return True
except ValueError:
return False


def get_local_hf_inference_engine_params(
model_name: str,
):
Expand Down Expand Up @@ -127,9 +133,13 @@ def get_cross_inference_engine_params(
provider = ModelProviderEnum.OPENAI

if custom_params is not None:
inference_engine_params.update(custom_params)
inference_engine_params = dict_deep_merge(
inference_engine_params, custom_params
)
if provider_specific_params is not None:
provider_specific_args.update(provider_specific_params)
provider_specific_args = dict_deep_merge(
provider_specific_args, provider_specific_params
)
inference_engine_params["model"] = model_name
inference_engine_params["provider"] = provider.value
inference_engine_params["provider_specific_args"] = provider_specific_args
Expand All @@ -150,7 +160,6 @@ def get_cross_inference_engine(
custom_params=custom_params,
provider_specific_params=provider_specific_params,
)

return CrossProviderInferenceEngine(**inference_engine_params)


Expand Down Expand Up @@ -215,7 +224,6 @@ def get_inference_engine(

def get_model_name_from_evaluator(
evaluator_metadata: ExtendedEvaluatorMetadata,
provider: str,
) -> str:
model_name = EXTENDED_EVALUATOR_TO_MODEL_ID.get(evaluator_metadata.name, None)
return (
Expand Down Expand Up @@ -249,18 +257,28 @@ def get_evaluator_metadata_wrapper(
]:
raise ValueError("The specified custom model was not found")

custom_model = [
custom_model
for custom_model in custom_models
if custom_model["name"] == custom_model_name
][0]
custom_model = next(
iter(
custom_model
for custom_model in custom_models
if custom_model["name"] == custom_model_name
)
)
return ExtendedEvaluatorMetadata(
name=evaluator_name,
custom_model_name=custom_model["name"],
custom_model_name=custom_model["name"]
if "name" in custom_model
else custom_model["path"],
custom_model_path=custom_model["path"],
custom_params=custom_model["custom_params"]
if "custom_params" in custom_model
else {},
provider_specific_params=custom_model["provider_specific_params"]
if "provider_specific_params" in custom_model
else {},
providers=[
ExtendedModelProviderEnum(p)
if p in ExtendedModelProviderEnum
if value_exists_in_enum(p, ExtendedModelProviderEnum)
else ModelProviderEnum(p)
for p in custom_model["providers"]
],
Expand Down Expand Up @@ -429,3 +447,24 @@ def clean_object(results: dict | list):
}
else:
return results


def dict_deep_merge(a: dict | None, b: dict | None) -> dict:
"""
Return a new dict that deep-merges b into a:
- Values from b overwrite those in a.
- Nested dicts are merged recursively.
- Other types simply get replaced.
"""
if a is None:
return b
if b is None:
return a
result = a.copy()
for key, b_val in b.items():
a_val = result.get(key)
if isinstance(a_val, dict) and isinstance(b_val, dict):
result[key] = dict_deep_merge(a_val, b_val)
else:
result[key] = b_val
return result