1
1
#!/usr/bin/env python
2
- # -*- coding: utf-8; -*-
3
2
4
- # Copyright (c) 2021, 2023 Oracle and/or its affiliates.
3
+ # Copyright (c) 2021, 2025 Oracle and/or its affiliates.
5
4
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
5
7
6
8
7
import collections
9
8
import copy
10
9
import datetime
11
- import oci
12
- import warnings
13
10
import time
14
- from typing import Dict , List , Union , Any
11
+ import warnings
12
+ from typing import Any , Dict , List , Union
15
13
14
+ import oci
16
15
import oci .loggingsearch
17
- from ads .common import auth as authutil
18
16
import pandas as pd
19
- from ads .model .serde .model_input import JsonModelInputSERDE
17
+ from oci .data_science .models import (
18
+ CreateModelDeploymentDetails ,
19
+ LogDetails ,
20
+ UpdateModelDeploymentDetails ,
21
+ )
22
+
23
+ from ads .common import auth as authutil
24
+ from ads .common import utils as ads_utils
20
25
from ads .common .oci_logging import (
21
26
LOG_INTERVAL ,
22
27
LOG_RECORDS_LIMIT ,
30
35
from ads .model .deployment .common .utils import send_request
31
36
from ads .model .deployment .model_deployment_infrastructure import (
32
37
DEFAULT_BANDWIDTH_MBPS ,
38
+ DEFAULT_MEMORY_IN_GBS ,
39
+ DEFAULT_OCPUS ,
33
40
DEFAULT_REPLICA ,
34
41
DEFAULT_SHAPE_NAME ,
35
- DEFAULT_OCPUS ,
36
- DEFAULT_MEMORY_IN_GBS ,
37
42
MODEL_DEPLOYMENT_INFRASTRUCTURE_TYPE ,
38
43
ModelDeploymentInfrastructure ,
39
44
)
45
50
ModelDeploymentRuntimeType ,
46
51
OCIModelDeploymentRuntimeType ,
47
52
)
53
+ from ads .model .serde .model_input import JsonModelInputSERDE
48
54
from ads .model .service .oci_datascience_model_deployment import (
49
55
OCIDataScienceModelDeployment ,
50
56
)
51
- from ads . common import utils as ads_utils
57
+
52
58
from .common import utils
53
59
from .common .utils import State
54
60
from .model_deployment_properties import ModelDeploymentProperties
55
- from oci .data_science .models import (
56
- LogDetails ,
57
- CreateModelDeploymentDetails ,
58
- UpdateModelDeploymentDetails ,
59
- )
60
61
61
62
DEFAULT_WAIT_TIME = 1200
62
63
DEFAULT_POLL_INTERVAL = 10
@@ -80,6 +81,11 @@ class ModelDeploymentLogType:
80
81
ACCESS = "access"
81
82
82
83
84
+ class ModelDeploymentType :
85
+ SINGLE_MODEL = "SINGLE_MODEL"
86
+ MODEL_GROUP = "MODEL_GROUP"
87
+
88
+
83
89
class LogNotConfiguredError (Exception ): # pragma: no cover
84
90
pass
85
91
@@ -964,7 +970,9 @@ def predict(
964
970
except oci .exceptions .ServiceError as ex :
965
971
# When bandwidth exceeds the allocated value, TooManyRequests error (429) will be raised by oci backend.
966
972
if ex .status == 429 :
967
- bandwidth_mbps = self .infrastructure .bandwidth_mbps or DEFAULT_BANDWIDTH_MBPS
973
+ bandwidth_mbps = (
974
+ self .infrastructure .bandwidth_mbps or DEFAULT_BANDWIDTH_MBPS
975
+ )
968
976
utils .get_logger ().warning (
969
977
f"Load balancer bandwidth exceeds the allocated { bandwidth_mbps } Mbps."
970
978
"To estimate the actual bandwidth, use formula: (payload size in KB) * (estimated requests per second) * 8 / 1024."
@@ -1644,36 +1652,36 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1644
1652
}
1645
1653
1646
1654
if infrastructure .subnet_id :
1647
- instance_configuration [
1648
- infrastructure .CONST_SUBNET_ID
1649
- ] = infrastructure . subnet_id
1655
+ instance_configuration [infrastructure . CONST_SUBNET_ID ] = (
1656
+ infrastructure .subnet_id
1657
+ )
1650
1658
1651
1659
if infrastructure .private_endpoint_id :
1652
1660
if not hasattr (
1653
1661
oci .data_science .models .InstanceConfiguration , "private_endpoint_id"
1654
1662
):
1655
1663
# TODO: add oci version with private endpoint support.
1656
- raise EnvironmentError (
1664
+ raise OSError (
1657
1665
"Private endpoint is not supported in the current OCI SDK installed."
1658
1666
)
1659
1667
1660
- instance_configuration [
1661
- infrastructure .CONST_PRIVATE_ENDPOINT_ID
1662
- ] = infrastructure . private_endpoint_id
1668
+ instance_configuration [infrastructure . CONST_PRIVATE_ENDPOINT_ID ] = (
1669
+ infrastructure .private_endpoint_id
1670
+ )
1663
1671
1664
1672
scaling_policy = {
1665
1673
infrastructure .CONST_POLICY_TYPE : "FIXED_SIZE" ,
1666
1674
infrastructure .CONST_INSTANCE_COUNT : infrastructure .replica
1667
1675
or DEFAULT_REPLICA ,
1668
1676
}
1669
1677
1670
- if not runtime .model_uri :
1678
+ if not ( runtime .model_uri or runtime . model_group_id ) :
1671
1679
raise ValueError (
1672
- "Missing parameter model uri. Try reruning it after model uri is configured."
1680
+ "Missing parameter model uri and model group id . Try reruning it after model or model group is configured."
1673
1681
)
1674
1682
1675
1683
model_id = runtime .model_uri
1676
- if not model_id .startswith ("ocid" ):
1684
+ if model_id and not model_id .startswith ("ocid" ):
1677
1685
from ads .model .datascience_model import DataScienceModel
1678
1686
1679
1687
dsc_model = DataScienceModel (
@@ -1704,7 +1712,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1704
1712
oci .data_science .models ,
1705
1713
"ModelDeploymentEnvironmentConfigurationDetails" ,
1706
1714
):
1707
- raise EnvironmentError (
1715
+ raise OSError (
1708
1716
"Environment variable hasn't been supported in the current OCI SDK installed."
1709
1717
)
1710
1718
@@ -1720,9 +1728,9 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1720
1728
and runtime .inference_server .upper ()
1721
1729
== MODEL_DEPLOYMENT_INFERENCE_SERVER_TRITON
1722
1730
):
1723
- environment_variables [
1724
- "CONTAINER_TYPE"
1725
- ] = MODEL_DEPLOYMENT_INFERENCE_SERVER_TRITON
1731
+ environment_variables ["CONTAINER_TYPE" ] = (
1732
+ MODEL_DEPLOYMENT_INFERENCE_SERVER_TRITON
1733
+ )
1726
1734
runtime .set_spec (runtime .CONST_ENV , environment_variables )
1727
1735
environment_configuration_details = {
1728
1736
runtime .CONST_ENVIRONMENT_CONFIG_TYPE : runtime .environment_config_type ,
@@ -1734,27 +1742,45 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1734
1742
oci .data_science .models ,
1735
1743
"OcirModelDeploymentEnvironmentConfigurationDetails" ,
1736
1744
):
1737
- raise EnvironmentError (
1745
+ raise OSError (
1738
1746
"Container runtime hasn't been supported in the current OCI SDK installed."
1739
1747
)
1740
1748
environment_configuration_details ["image" ] = runtime .image
1741
1749
environment_configuration_details ["imageDigest" ] = runtime .image_digest
1742
1750
environment_configuration_details ["cmd" ] = runtime .cmd
1743
1751
environment_configuration_details ["entrypoint" ] = runtime .entrypoint
1744
1752
environment_configuration_details ["serverPort" ] = runtime .server_port
1745
- environment_configuration_details [
1746
- "healthCheckPort"
1747
- ] = runtime . health_check_port
1753
+ environment_configuration_details ["healthCheckPort" ] = (
1754
+ runtime . health_check_port
1755
+ )
1748
1756
1749
1757
model_deployment_configuration_details = {
1750
- infrastructure .CONST_DEPLOYMENT_TYPE : " SINGLE_MODEL" ,
1758
+ infrastructure .CONST_DEPLOYMENT_TYPE : ModelDeploymentType . SINGLE_MODEL ,
1751
1759
infrastructure .CONST_MODEL_CONFIG_DETAILS : model_configuration_details ,
1752
1760
runtime .CONST_ENVIRONMENT_CONFIG_DETAILS : environment_configuration_details ,
1753
1761
}
1754
1762
1763
+ if runtime .model_group_id :
1764
+ model_deployment_configuration_details [
1765
+ infrastructure .CONST_DEPLOYMENT_TYPE
1766
+ ] = ModelDeploymentType .MODEL_GROUP
1767
+ model_deployment_configuration_details ["modelGroupConfigurationDetails" ] = {
1768
+ runtime .CONST_MODEL_GROUP_ID : runtime .model_group_id
1769
+ }
1770
+ model_deployment_configuration_details [
1771
+ "infrastructureConfigurationDetails"
1772
+ ] = {
1773
+ "infrastructureType" : "INSTANCE_POOL" ,
1774
+ infrastructure .CONST_BANDWIDTH_MBPS : infrastructure .bandwidth_mbps
1775
+ or DEFAULT_BANDWIDTH_MBPS ,
1776
+ infrastructure .CONST_INSTANCE_CONFIG : instance_configuration ,
1777
+ infrastructure .CONST_SCALING_POLICY : scaling_policy ,
1778
+ }
1779
+ model_configuration_details .pop (runtime .CONST_MODEL_ID )
1780
+
1755
1781
if runtime .deployment_mode == ModelDeploymentMode .STREAM :
1756
1782
if not hasattr (oci .data_science .models , "StreamConfigurationDetails" ):
1757
- raise EnvironmentError (
1783
+ raise OSError (
1758
1784
"Model deployment mode hasn't been supported in the current OCI SDK installed."
1759
1785
)
1760
1786
model_deployment_configuration_details [
@@ -1786,9 +1812,13 @@ def _build_category_log_details(self) -> Dict:
1786
1812
1787
1813
logs = {}
1788
1814
if (
1789
- self .infrastructure .access_log and
1790
- self .infrastructure .access_log .get (self .infrastructure .CONST_LOG_GROUP_ID , None )
1791
- and self .infrastructure .access_log .get (self .infrastructure .CONST_LOG_ID , None )
1815
+ self .infrastructure .access_log
1816
+ and self .infrastructure .access_log .get (
1817
+ self .infrastructure .CONST_LOG_GROUP_ID , None
1818
+ )
1819
+ and self .infrastructure .access_log .get (
1820
+ self .infrastructure .CONST_LOG_ID , None
1821
+ )
1792
1822
):
1793
1823
logs [self .infrastructure .CONST_ACCESS ] = {
1794
1824
self .infrastructure .CONST_LOG_GROUP_ID : self .infrastructure .access_log .get (
@@ -1799,9 +1829,13 @@ def _build_category_log_details(self) -> Dict:
1799
1829
),
1800
1830
}
1801
1831
if (
1802
- self .infrastructure .predict_log and
1803
- self .infrastructure .predict_log .get (self .infrastructure .CONST_LOG_GROUP_ID , None )
1804
- and self .infrastructure .predict_log .get (self .infrastructure .CONST_LOG_ID , None )
1832
+ self .infrastructure .predict_log
1833
+ and self .infrastructure .predict_log .get (
1834
+ self .infrastructure .CONST_LOG_GROUP_ID , None
1835
+ )
1836
+ and self .infrastructure .predict_log .get (
1837
+ self .infrastructure .CONST_LOG_ID , None
1838
+ )
1805
1839
):
1806
1840
logs [self .infrastructure .CONST_PREDICT ] = {
1807
1841
self .infrastructure .CONST_LOG_GROUP_ID : self .infrastructure .predict_log .get (
0 commit comments