Amazon SageMaker Service Update: Added inference components model data caching feature

AWS · AWS · commit 4ec1484e40c2 · 2025-10-24T18:08:07.000Z
diff --git a/.changes/next-release/feature-AmazonSageMakerService-f785f2b.json b/.changes/next-release/feature-AmazonSageMakerService-f785f2b.json
@@ -0,0 +1,6 @@
+{
+    "type": "feature",
+    "category": "Amazon SageMaker Service",
+    "contributor": "",
+    "description": "Added inference components model data caching feature"
+}
diff --git a/services/sagemaker/src/main/resources/codegen-resources/service-2.json b/services/sagemaker/src/main/resources/codegen-resources/service-2.json
@@ -12446,7 +12446,7 @@
         },
         "PlatformIdentifier":{
           "shape":"PlatformIdentifier",
-          "documentation":"<p>The platform identifier of the notebook instance runtime environment.</p>"
+          "documentation":"<p>The platform identifier of the notebook instance runtime environment. The default value is <code>notebook-al2-v2</code>.</p>"
         },
         "InstanceMetadataServiceConfiguration":{
           "shape":"InstanceMetadataServiceConfiguration",
@@ -21172,6 +21172,7 @@
       },
       "documentation":"<p>The configuration parameters that specify the IAM roles assumed by the execution role of SageMaker (assumable roles) and the cluster instances or job execution environments (execution roles or runtime roles) to manage and access resources required for running Amazon EMR clusters or Amazon EMR Serverless applications.</p>"
     },
+    "EnableCaching":{"type":"boolean"},
     "EnableCapture":{"type":"boolean"},
     "EnableInfraCheck":{
       "type":"boolean",
@@ -24706,6 +24707,30 @@
       "box":true,
       "min":0
     },
+    "InferenceComponentDataCacheConfig":{
+      "type":"structure",
+      "required":["EnableCaching"],
+      "members":{
+        "EnableCaching":{
+          "shape":"EnableCaching",
+          "documentation":"<p>Sets whether the endpoint that hosts the inference component caches the model artifacts and container image.</p> <p>With caching enabled, the endpoint caches this data in each instance that it provisions for the inference component. That way, the inference component deploys faster during the auto scaling process. If caching isn't enabled, the inference component takes longer to deploy because of the time it spends downloading the data.</p>",
+          "box":true
+        }
+      },
+      "documentation":"<p>Settings that affect how the inference component caches data.</p>"
+    },
+    "InferenceComponentDataCacheConfigSummary":{
+      "type":"structure",
+      "required":["EnableCaching"],
+      "members":{
+        "EnableCaching":{
+          "shape":"EnableCaching",
+          "documentation":"<p>Indicates whether the inference component caches model artifacts as part of the auto scaling process.</p>",
+          "box":true
+        }
+      },
+      "documentation":"<p>Settings that affect how the inference component caches data.</p>"
+    },
     "InferenceComponentDeploymentConfig":{
       "type":"structure",
       "required":["RollingUpdatePolicy"],
@@ -24811,6 +24836,10 @@
         "BaseInferenceComponentName":{
           "shape":"InferenceComponentName",
           "documentation":"<p>The name of an existing inference component that is to contain the inference component that you're creating with your request.</p> <p>Specify this parameter only if your request is meant to create an adapter inference component. An adapter inference component contains the path to an adapter model. The purpose of the adapter model is to tailor the inference output of a base foundation model, which is hosted by the base inference component. The adapter inference component uses the compute resources that you assigned to the base inference component.</p> <p>When you create an adapter inference component, use the <code>Container</code> parameter to specify the location of the adapter artifacts. In the parameter value, use the <code>ArtifactUrl</code> parameter of the <code>InferenceComponentContainerSpecification</code> data type.</p> <p>Before you can create an adapter inference component, you must have an existing inference component that contains the foundation model that you want to adapt.</p>"
+        },
+        "DataCacheConfig":{
+          "shape":"InferenceComponentDataCacheConfig",
+          "documentation":"<p>Settings that affect how the inference component caches data.</p>"
         }
       },
       "documentation":"<p>Details about the resources to deploy with this inference component, including the model, container, and compute resources.</p>"
@@ -24837,6 +24866,10 @@
         "BaseInferenceComponentName":{
           "shape":"InferenceComponentName",
           "documentation":"<p>The name of the base inference component that contains this inference component.</p>"
+        },
+        "DataCacheConfig":{
+          "shape":"InferenceComponentDataCacheConfigSummary",
+          "documentation":"<p>Settings that affect how the inference component caches data.</p>"
         }
       },
       "documentation":"<p>Details about the resources that are deployed with this inference component.</p>"
@@ -36136,7 +36169,7 @@
         },
         "S3DataDistributionType":{
           "shape":"ProcessingS3DataDistributionType",
-          "documentation":"<p>Whether to distribute the data from Amazon S3 to all processing instances with <code>FullyReplicated</code>, or whether the data from Amazon S3 is shared by Amazon S3 key, downloading one shard of data to each processing instance.</p>"
+          "documentation":"<p>Whether to distribute the data from Amazon S3 to all processing instances with <code>FullyReplicated</code>, or whether the data from Amazon S3 is sharded by Amazon S3 key, downloading one shard of data to each processing instance.</p>"
         },
         "S3CompressionType":{
           "shape":"ProcessingS3CompressionType",