Merge pull request #194 from mrgiba/main

krokoko · web-flow · commit 5fd2aa8e4d2a · 2025-05-06T09:27:03.000-05:00
Use Cross-region Inference; Retry request in case of Bedrock throttling
diff --git a/samples/contract-compliance-analysis/back-end/README.md b/samples/contract-compliance-analysis/back-end/README.md
@@ -128,17 +128,22 @@ You can then go the Amazon Cognito page at the AWS Console, search for the User
 #### Enable access to Bedrock models
 
 Models are not enabled by default on Amazon Bedrock, so if this is the first time you are going to use Amazon Bedrock, 
-it is recommended to first check if the access is already enabled.
+it is recommended to first check if the access is already enabled. 
 
-Go to the AWS Console, then go to Amazon Bedrock
+The default model is Anthropic Claude 3 Haiku v1, being used in [cross-region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference.html) mode. Please ensure this model is enabled in the regions listed in the  **US Anthropic Claude 3 Haiku** section from the [Supported Regions and models for inference profiles](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html) page.
 
-Click Model access at the left side
+Steps:
+
+- Go to the AWS Console, then go to Amazon Bedrock
+
+- Click Model access at the left side
 
 ![Bedrock Model Access](images/bedrock-model-access.png)
 
-Click the **Enable specific models** button and enable the checkbox for Anthropic Claude models
+- Click the **Enable specific models** button and enable the checkbox for Anthropic Claude models
+
+- Click **Next** and **Submit** buttons
 
-Click **Next** and **Submit** buttons
 
 ## How to customize contract analysis according to your use case  
 
@@ -172,7 +177,7 @@ The recommended sequence of steps:
 
 By default, the application uses Anthropic Claude 3 Haiku v1. Here are steps explaining how to update the model to use. For this example, we will use [Amazon Nova Pro v1](https://aws.amazon.com/blogs/aws/introducing-amazon-nova-frontier-intelligence-and-industry-leading-price-performance/):
 
-- Open the [app_properties.yaml](./app_properties.yaml) file and update the field ```claude_model_id``` to use the model you selected. In this case, we update the field to ```us.amazon.nova-pro-v1:0```. Replace it with the model id you want to use. The list of model ids available through Amazon Bedrock is available in the [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html). Ensure the model you are selecting is enabled in the console (Amazon Bedrock -> Model access) and available in your region.
+- Open the [app_properties.yaml](./app_properties.yaml) file and update the field ```claude_model_id``` to use the model you selected. In this case, we update the field to ```us.amazon.nova-pro-v1:0```. Replace it with the model id you want to use. The list of model ids available through Amazon Bedrock is available in the [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html). Ensure the model you are selecting is enabled in the console (Amazon Bedrock -> Model access) and available in your region. In case of using a predefined Inference Profile to use a model in a cross-region fashion, consult [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html) of all regions that needs to have model access enabled. 
 - Depending on the model selected, you might need to update some hardcoded values regarding the max number of new tokens generated. For instance, Amazon Nova Pro v1 supports 5000 output tokens, which doesn't require any modifications. However, some models might have a max output tokens of 3000, which requires some changes in the sample. Update the following lines if required:
     - In file [fn-preprocess-contract/index.py](./stack/sfn/preprocessing/fn-preprocess-contract/index.py), update line 96 to change the chunks size to a value smaller than the max tokens output for your model, as well as line 107 to match your model's max output tokens.
     - In file [scripts/utils/llm.py](./scripts/utils/llm.py), update the max tokens output line 28.
diff --git a/samples/contract-compliance-analysis/back-end/app_properties.yaml b/samples/contract-compliance-analysis/back-end/app_properties.yaml
@@ -14,7 +14,7 @@ language: English
 
 # Claude Model ID (Global configuration). To switch to a smaller Language Model for cost savings).
 # Disabling the property will let each prompt execution to its default model id
-claude_model_id: anthropic.claude-3-haiku-20240307-v1:0
+claude_model_id: us.anthropic.claude-3-haiku-20240307-v1:0
 
 # Thresholds determine the maximum number of clauses with risk that a contract can have without requiring human review,
 # per risk level
diff --git a/samples/contract-compliance-analysis/back-end/stack/sfn/common-layer/llm.py b/samples/contract-compliance-analysis/back-end/stack/sfn/common-layer/llm.py
@@ -15,6 +15,9 @@
 import logging
 import os
 
+from retrying import retry
+from botocore.config import Config
+from botocore.exceptions import ClientError
 from langchain_aws import ChatBedrock
 from langchain_core.messages import HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
@@ -24,8 +27,45 @@
 logger = logging.getLogger()
 logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
 
-bedrock_client = boto3.client('bedrock-runtime')
+bedrock_client = boto3.client('bedrock-runtime', config=Config(
+    connect_timeout=180,
+    read_timeout=180,
+    retries={
+        "max_attempts": 50,
+        "mode": "adaptive",
+    },
+))
 
+class BedrockRetryableError(Exception):
+    """Custom exception for retryable Bedrock errors"""
+    pass
+
+@retry(
+    wait_fixed=10000,  # 10 seconds between retries
+    stop_max_attempt_number=None,  # Keep retrying indefinitely
+    retry_on_exception=lambda ex: isinstance(ex, BedrockRetryableError),
+)
+def invoke_chain_with_retry(chain):
+    """Invoke Bedrock with retry logic for throttling"""
+    try:
+        return chain.invoke({})
+    except ClientError as exc:
+        logger.warning(f"Bedrock ClientError: {exc}")
+
+        if exc.response["Error"]["Code"] == "ThrottlingException":
+            logger.warning("Bedrock throttling. Retrying...")
+            raise BedrockRetryableError(str(exc))
+        elif exc.response["Error"]["Code"] == "ModelTimeoutException":
+            logger.warning("Bedrock ModelTimeoutException. Retrying...")
+            raise BedrockRetryableError(str(exc))
+        else:
+            raise
+    except bedrock_client.exceptions.ThrottlingException as throttlingExc:
+        logger.warning("Bedrock ThrottlingException. Retrying...")
+        raise BedrockRetryableError(str(throttlingExc))
+    except bedrock_client.exceptions.ModelTimeoutException as timeoutExc:
+        logger.warning("Bedrock ModelTimeoutException. Retrying...")
+        raise BedrockRetryableError(str(timeoutExc))
 
 def invoke_llm(prompt, model_id, temperature=0.5, top_k=None, top_p=0.8, max_new_tokens=4096, verbose=False):
     model_id = (model_id or CLAUDE_MODEL_ID)
@@ -57,7 +97,7 @@ def invoke_llm(prompt, model_id, temperature=0.5, top_k=None, top_p=0.8, max_new
     ])
     chain = prompt | chat
 
-    response = chain.invoke({})
+    response = invoke_chain_with_retry(chain)
     content = response.content
 
     usage_data = None
diff --git a/samples/contract-compliance-analysis/back-end/stack/sfn/common-layer/requirements.txt b/samples/contract-compliance-analysis/back-end/stack/sfn/common-layer/requirements.txt
@@ -0,0 +1,2 @@
+retrying==1.3.4
+botocore==1.38.9

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+retrying==1.3.4`
	`2`	`+botocore==1.38.9`