Skip to content

Commit 2a31a90

Browse files
AI Quick Actions v1.0.3 - llama-cpp container support (#910)
2 parents 627ec12 + 66f03b1 commit 2a31a90

39 files changed

+1459
-255
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ repos:
4545
rev: v8.18.4
4646
hooks:
4747
- id: gitleaks
48-
exclude: .github/workflows/reusable-actions/set-dummy-conf.yml
48+
exclude: .github/workflows/reusable-actions/set-dummy-conf.yml|./tests/operators/common/test_load_data.py
4949
# Oracle copyright checker
5050
- repo: https://github.com/oracle-samples/oci-data-science-ai-samples/
5151
rev: 1bc5270a443b791c62f634233c0f4966dfcc0dd6

ads/aqua/common/entities.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python
2+
# Copyright (c) 2024 Oracle and/or its affiliates.
3+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4+
5+
6+
class ContainerSpec:
7+
"""
8+
Class to hold to hold keys within the container spec.
9+
"""
10+
11+
CONTAINER_SPEC = "containerSpec"
12+
CLI_PARM = "cliParam"
13+
SERVER_PORT = "serverPort"
14+
HEALTH_CHECK_PORT = "healthCheckPort"
15+
ENV_VARS = "envVars"
16+
RESTRICTED_PARAMS = "restrictedParams"
17+
EVALUATION_CONFIGURATION = "evaluationConfiguration"

ads/aqua/common/enums.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32
# Copyright (c) 2024 Oracle and/or its affiliates.
43
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
54

@@ -8,6 +7,7 @@
87
~~~~~~~~~~~~~~
98
This module contains the set of enums used in AQUA.
109
"""
10+
1111
from ads.common.extended_enum import ExtendedEnumMeta
1212

1313

@@ -38,21 +38,25 @@ class Tags(str, metaclass=ExtendedEnumMeta):
3838
READY_TO_IMPORT = "ready_to_import"
3939
BASE_MODEL_CUSTOM = "aqua_custom_base_model"
4040
AQUA_EVALUATION_MODEL_ID = "evaluation_model_id"
41+
MODEL_FORMAT = "model_format"
4142

4243

4344
class InferenceContainerType(str, metaclass=ExtendedEnumMeta):
4445
CONTAINER_TYPE_VLLM = "vllm"
4546
CONTAINER_TYPE_TGI = "tgi"
47+
CONTAINER_TYPE_LLAMA_CPP = "llama-cpp"
4648

4749

4850
class InferenceContainerTypeFamily(str, metaclass=ExtendedEnumMeta):
4951
AQUA_VLLM_CONTAINER_FAMILY = "odsc-vllm-serving"
5052
AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
53+
AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
5154

5255

5356
class InferenceContainerParamType(str, metaclass=ExtendedEnumMeta):
5457
PARAM_TYPE_VLLM = "VLLM_PARAMS"
5558
PARAM_TYPE_TGI = "TGI_PARAMS"
59+
PARAM_TYPE_LLAMA_CPP = "LLAMA_CPP_PARAMS"
5660

5761

5862
class HuggingFaceTags(str, metaclass=ExtendedEnumMeta):

ads/aqua/common/utils.py

+32-2
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,17 @@
1010
import os
1111
import random
1212
import re
13+
from datetime import datetime, timedelta
1314
from functools import wraps
1415
from pathlib import Path
1516
from string import Template
1617
from typing import List, Union
1718

1819
import fsspec
1920
import oci
21+
from cachetools import TTLCache, cached
2022
from oci.data_science.models import JobRun, Model
23+
from oci.object_storage.models import ObjectSummary
2124

2225
from ads.aqua.common.enums import (
2326
InferenceContainerParamType,
@@ -45,7 +48,6 @@
4548
)
4649
from ads.aqua.data import AquaResourceIdentifier
4750
from ads.common.auth import default_signer
48-
from ads.common.decorator.threaded import threaded
4951
from ads.common.extended_enum import ExtendedEnumMeta
5052
from ads.common.object_storage_details import ObjectStorageDetails
5153
from ads.common.oci_resource import SEARCH_TYPE, OCIResource
@@ -213,7 +215,6 @@ def read_file(file_path: str, **kwargs) -> str:
213215
return UNKNOWN
214216

215217

216-
@threaded()
217218
def load_config(file_path: str, config_file_name: str, **kwargs) -> dict:
218219
artifact_path = f"{file_path.rstrip('/')}/{config_file_name}"
219220
signer = default_signer() if artifact_path.startswith("oci://") else {}
@@ -228,6 +229,32 @@ def load_config(file_path: str, config_file_name: str, **kwargs) -> dict:
228229
return config
229230

230231

232+
def list_os_files_with_extension(oss_path: str, extension: str) -> [str]:
233+
"""
234+
List files in the specified directory with the given extension.
235+
236+
Parameters:
237+
- oss_path: The path to the directory where files are located.
238+
- extension: The file extension to filter by (e.g., 'txt' for text files).
239+
240+
Returns:
241+
- A list of file paths matching the specified extension.
242+
"""
243+
244+
oss_client = ObjectStorageDetails.from_path(oss_path)
245+
246+
# Ensure the extension is prefixed with a dot if not already
247+
if not extension.startswith("."):
248+
extension = "." + extension
249+
files: List[ObjectSummary] = oss_client.list_objects().objects
250+
251+
return [
252+
file.name[len(oss_client.filepath) :].lstrip("/")
253+
for file in files
254+
if file.name.endswith(extension)
255+
]
256+
257+
231258
def is_valid_ocid(ocid: str) -> bool:
232259
"""Checks if the given ocid is valid.
233260
@@ -503,6 +530,7 @@ def container_config_path():
503530
return f"oci://{AQUA_SERVICE_MODELS_BUCKET}@{CONDA_BUCKET_NS}/service_models/config"
504531

505532

533+
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(hours=5), timer=datetime.now))
506534
def get_container_config():
507535
config = load_config(
508536
file_path=container_config_path(),
@@ -881,6 +909,8 @@ def get_container_params_type(container_type_name: str) -> str:
881909
return InferenceContainerParamType.PARAM_TYPE_VLLM
882910
elif InferenceContainerType.CONTAINER_TYPE_TGI in container_type_name.lower():
883911
return InferenceContainerParamType.PARAM_TYPE_TGI
912+
elif InferenceContainerType.CONTAINER_TYPE_LLAMA_CPP in container_type_name.lower():
913+
return InferenceContainerParamType.PARAM_TYPE_LLAMA_CPP
884914
else:
885915
return UNKNOWN
886916

ads/aqua/config/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32
# Copyright (c) 2024 Oracle and/or its affiliates.
43
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
54

@@ -14,5 +13,6 @@ def get_finetuning_config_defaults():
1413
"BM.GPU.A10.4": {"batch_size": 1, "replica": 1},
1514
"BM.GPU4.8": {"batch_size": 4, "replica": 1},
1615
"BM.GPU.A100-v2.8": {"batch_size": 6, "replica": 1},
16+
"BM.GPU.H100.8": {"batch_size": 6, "replica": 1},
1717
}
1818
}
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,37 @@
11
{
2+
"configuration": {
3+
"VM.Standard.A1.Flex": {
4+
"parameters": {},
5+
"shape_info": {
6+
"configs": [
7+
{
8+
"memory_in_gbs": 128,
9+
"ocpu": 20
10+
},
11+
{
12+
"memory_in_gbs": 256,
13+
"ocpu": 40
14+
},
15+
{
16+
"memory_in_gbs": 384,
17+
"ocpu": 60
18+
},
19+
{
20+
"memory_in_gbs": 512,
21+
"ocpu": 80
22+
}
23+
],
24+
"type": "CPU"
25+
}
26+
}
27+
},
228
"shape": [
329
"VM.GPU.A10.1",
430
"VM.GPU.A10.2",
531
"BM.GPU.A10.4",
632
"BM.GPU4.8",
7-
"BM.GPU.A100-v2.8"
33+
"BM.GPU.A100-v2.8",
34+
"BM.GPU.H100.8",
35+
"VM.Standard.A1.Flex"
836
]
937
}

ads/aqua/config/resource_limit_names.json

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"BM.GPU.A10.4": "ds-gpu-a10-count",
33
"BM.GPU.A100-v2.8": "ds-gpu-a100-v2-count",
4+
"BM.GPU.H100.8": "ds-gpu-h100-count",
45
"BM.GPU4.8": "ds-gpu4-count",
56
"VM.GPU.A10.1": "ds-gpu-a10-count",
67
"VM.GPU.A10.2": "ds-gpu-a10-count"

ads/aqua/constants.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
DEFAULT_FT_REPLICA = 1
2222
DEFAULT_FT_BATCH_SIZE = 1
2323
DEFAULT_FT_VALIDATION_SET_SIZE = 0.1
24-
2524
MAXIMUM_ALLOWED_DATASET_IN_BYTE = 52428800 # 1024 x 1024 x 50 = 50MB
2625
JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING = "ME_STANDALONE"
2726
NB_SESSION_IDENTIFIER = "NB_SESSION_OCID"
@@ -34,6 +33,7 @@
3433
AQUA_MODEL_ARTIFACT_CONFIG = "config.json"
3534
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME = "_name_or_path"
3635
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE = "model_type"
36+
AQUA_MODEL_ARTIFACT_FILE = "model_file"
3737

3838
TRAINING_METRICS_FINAL = "training_metrics_final"
3939
VALIDATION_METRICS_FINAL = "validation_metrics_final"
@@ -74,3 +74,7 @@
7474
"--sharded",
7575
"--trust-remote-code",
7676
}
77+
LLAMA_CPP_INFERENCE_RESTRICTED_PARAMS = {
78+
"--port",
79+
"--host",
80+
}

ads/aqua/evaluation/entities.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32
# Copyright (c) 2024 Oracle and/or its affiliates.
43
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
54

ads/aqua/evaluation/evaluation.py

+47-14
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import re
88
import tempfile
99
from concurrent.futures import ThreadPoolExecutor, as_completed
10-
from dataclasses import asdict
10+
from dataclasses import asdict, fields
1111
from datetime import datetime, timedelta
1212
from pathlib import Path
1313
from threading import Lock
@@ -76,6 +76,7 @@
7676
ModelParams,
7777
)
7878
from ads.aqua.evaluation.errors import EVALUATION_JOB_EXIT_CODE_MESSAGE
79+
from ads.aqua.ui import AquaContainerConfig
7980
from ads.common.auth import default_signer
8081
from ads.common.object_storage_details import ObjectStorageDetails
8182
from ads.common.utils import get_console_link, get_files, get_log_links
@@ -90,7 +91,9 @@
9091
from ads.jobs.builders.runtimes.base import Runtime
9192
from ads.jobs.builders.runtimes.container_runtime import ContainerRuntime
9293
from ads.model.datascience_model import DataScienceModel
94+
from ads.model.deployment import ModelDeploymentContainerRuntime
9395
from ads.model.deployment.model_deployment import ModelDeployment
96+
from ads.model.generic_model import ModelDeploymentRuntimeType
9497
from ads.model.model_metadata import (
9598
MetadataTaxonomyKeys,
9699
ModelCustomMetadata,
@@ -157,24 +160,47 @@ def create(
157160
create_aqua_evaluation_details = CreateAquaEvaluationDetails(**kwargs)
158161
except Exception as ex:
159162
raise AquaValueError(
160-
"Invalid create evaluation parameters. Allowable parameters are: "
161-
f"{', '.join(list(asdict(CreateAquaEvaluationDetails).keys()))}."
163+
"Invalid create evaluation parameters. "
164+
"Allowable parameters are: "
165+
f"{', '.join([field.name for field in fields(CreateAquaEvaluationDetails)])}."
162166
) from ex
163167

164168
if not is_valid_ocid(create_aqua_evaluation_details.evaluation_source_id):
165169
raise AquaValueError(
166170
f"Invalid evaluation source {create_aqua_evaluation_details.evaluation_source_id}. "
167171
"Specify either a model or model deployment id."
168172
)
169-
170173
evaluation_source = None
174+
eval_inference_configuration = None
171175
if (
172176
DataScienceResource.MODEL_DEPLOYMENT
173177
in create_aqua_evaluation_details.evaluation_source_id
174178
):
175179
evaluation_source = ModelDeployment.from_id(
176180
create_aqua_evaluation_details.evaluation_source_id
177181
)
182+
try:
183+
if (
184+
evaluation_source.runtime.type
185+
== ModelDeploymentRuntimeType.CONTAINER
186+
):
187+
runtime = ModelDeploymentContainerRuntime.from_dict(
188+
evaluation_source.runtime.to_dict()
189+
)
190+
inference_config = AquaContainerConfig.from_container_index_json(
191+
enable_spec=True
192+
).inference
193+
for container in inference_config.values():
194+
if container.name == runtime.image.split(":")[0]:
195+
eval_inference_configuration = (
196+
container.spec.evaluation_configuration
197+
)
198+
except Exception:
199+
logger.debug(
200+
f"Could not load inference config details for the evaluation id: "
201+
f"{create_aqua_evaluation_details.evaluation_source_id}. Please check if the container"
202+
f" runtime has the correct SMC image information."
203+
)
178204
elif (
179205
DataScienceResource.MODEL
180206
in create_aqua_evaluation_details.evaluation_source_id
@@ -390,6 +416,9 @@ def create(
390416
report_path=create_aqua_evaluation_details.report_path,
391417
model_parameters=create_aqua_evaluation_details.model_parameters,
392418
metrics=create_aqua_evaluation_details.metrics,
419+
inference_configuration=eval_inference_configuration.to_filtered_dict()
420+
if eval_inference_configuration
421+
else {},
393422
)
394423
).create(**kwargs) ## TODO: decide what parameters will be needed
395424
logger.debug(
@@ -511,6 +540,7 @@ def _build_evaluation_runtime(
511540
report_path: str,
512541
model_parameters: dict,
513542
metrics: List = None,
543+
inference_configuration: dict = None,
514544
) -> Runtime:
515545
"""Builds evaluation runtime for Job."""
516546
# TODO the image name needs to be extracted from the mapping index.json file.
@@ -520,16 +550,19 @@ def _build_evaluation_runtime(
520550
.with_environment_variable(
521551
**{
522552
"AIP_SMC_EVALUATION_ARGUMENTS": json.dumps(
523-
asdict(
524-
self._build_launch_cmd(
525-
evaluation_id=evaluation_id,
526-
evaluation_source_id=evaluation_source_id,
527-
dataset_path=dataset_path,
528-
report_path=report_path,
529-
model_parameters=model_parameters,
530-
metrics=metrics,
531-
)
532-
)
553+
{
554+
**asdict(
555+
self._build_launch_cmd(
556+
evaluation_id=evaluation_id,
557+
evaluation_source_id=evaluation_source_id,
558+
dataset_path=dataset_path,
559+
report_path=report_path,
560+
model_parameters=model_parameters,
561+
metrics=metrics,
562+
),
563+
),
564+
**(inference_configuration or {}),
565+
},
533566
),
534567
"CONDA_BUCKET_NS": CONDA_BUCKET_NS,
535568
},

0 commit comments

Comments
 (0)