diff --git a/01-rag-concepts/02_managed_rag_custom_prompting_and_no_of_results.ipynb b/01-rag-concepts/02_managed_retreiveandgenerate_and_streamapi.ipynb similarity index 87% rename from 01-rag-concepts/02_managed_rag_custom_prompting_and_no_of_results.ipynb rename to 01-rag-concepts/02_managed_retreiveandgenerate_and_streamapi.ipynb index 1c4ec6d..47136df 100644 --- a/01-rag-concepts/02_managed_rag_custom_prompting_and_no_of_results.ipynb +++ b/01-rag-concepts/02_managed_retreiveandgenerate_and_streamapi.ipynb @@ -11,8 +11,10 @@ "This module contains:\n", "1. [Overview](#1-Overview)\n", "2. [Pre-requisites](#2-Pre-requisites)\n", - "3. [How to leverage maximum number of results](#3-how-to-leverage-the-maximum-number-of-results-feature)\n", - "4. [How to use custom prompting](#4-how-to-use-the-custom-prompting-feature)" + "3. [Understanding RetrieveAndGenerate API](#understanding-retrieveandgenerate-api)\n", + "4. [Sreaming response using RetrieveAndGenerate API](#streaming-response-with-retrieveandgenerate-api)\n", + "5. [Adjust 'maximum number of results' retrieval parameter](#3-how-to-leverage-the-maximum-number-of-results-feature)\n", + "6. [How to use custom prompting](#4-how-to-use-the-custom-prompting-feature)" ] }, { @@ -107,6 +109,7 @@ "import json\n", "import boto3\n", "import pprint\n", + "import sys\n", "from botocore.exceptions import ClientError\n", "from botocore.client import Config\n", "\n", @@ -134,8 +137,8 @@ }, "outputs": [], "source": [ - "%store -r kb_id\n", - "# kb_id = \"<>\" # Replace with your knowledge base id here." + "# %store -r kb_id\n", + "kb_id = \"<>\" # Replace with your knowledge base id here." ] }, { @@ -159,7 +162,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca915234", + "id": "bf0243f5", "metadata": {}, "outputs": [], "source": [ @@ -174,9 +177,17 @@ "$search_results$\n", "\n", "$output_format_instructions$\n", - "\"\"\"\n", - "\n", - "def retrieve_and_generate(query, kb_id, model_arn, max_results, prompt_template = default_prompt):\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ca915234", + "metadata": {}, + "outputs": [], + "source": [ + "def retrieve_and_generate(query, kb_id, model_arn, max_results=5, prompt_template = default_prompt):\n", " response = bedrock_agent_client.retrieve_and_generate(\n", " input={\n", " 'text': query\n", @@ -202,24 +213,10 @@ " return response\n" ] }, - { - "cell_type": "markdown", - "id": "a58b7808", - "metadata": {}, - "source": [ - "### How to leverage the maximum number of results feature\n", - "\n", - "In some use cases; the FM responses might be lacking enough context to provide relevant answers or relying that it couldn't find the requested info. Which could be fixed by modifying the maximum number of retrieved results.\n", - "\n", - "In the following example, we are going to run the following query with a few number of results (5):\n", - "\\\n", - "```Provide a list of risks for Octank financial in bulleted points.```\n" - ] - }, { "cell_type": "code", "execution_count": null, - "id": "e2918161", + "id": "ccd657e6", "metadata": {}, "outputs": [], "source": [ @@ -241,6 +238,104 @@ " pprint.pp(contexts)\n" ] }, + { + "cell_type": "markdown", + "id": "5f1d6784", + "metadata": {}, + "source": [ + "### Test RetrieveAndGenerate API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbefffdd", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"\"\"Provide a list of risks for Octank financial in numbered list without description.\"\"\"\n", + "\n", + "results = retrieve_and_generate(query = query, kb_id = kb_id, model_arn = model_arn)\n", + "\n", + "print_generation_results(results)" + ] + }, + { + "cell_type": "markdown", + "id": "f6d8439e", + "metadata": {}, + "source": [ + "### Streaming response with RetrieveAndGenerate API\n", + "\n", + "Using new [streaming API](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agent-runtime_RetrieveAndGenerateStream.html) customers can use `retrieve_and_generate_stream` API from Amazon Bedrock Knowledge Bases to receive the response as it is being generated by the Foundation Model (FM), rather than waiting for the complete response. This will help customers to reduce the time to first token in case of latency sensitive applications." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86a3a94a", + "metadata": {}, + "outputs": [], + "source": [ + "def retrieve_and_generate_stream(query, kb_id, model_arn, max_results=5, prompt_template = default_prompt):\n", + " response = bedrock_agent_client.retrieve_and_generate_stream(\n", + " input={\n", + " 'text': query\n", + " },\n", + " retrieveAndGenerateConfiguration={\n", + " 'type': 'KNOWLEDGE_BASE',\n", + " 'knowledgeBaseConfiguration': {\n", + " 'knowledgeBaseId': kb_id,\n", + " 'modelArn': model_arn, \n", + " 'retrievalConfiguration': {\n", + " 'vectorSearchConfiguration': {\n", + " 'numberOfResults': max_results # will fetch top N documents which closely match the query\n", + " }\n", + " },\n", + " 'generationConfiguration': {\n", + " 'promptTemplate': {\n", + " 'textPromptTemplate': prompt_template\n", + " }\n", + " }\n", + " }\n", + " }\n", + " )\n", + "\n", + " for event in response['stream']:\n", + " if 'output' in event:\n", + " chunk = event['output']\n", + " sys.stdout.write(chunk['text'])\n", + " sys.stdout.flush()\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a55d95ce", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"\"\"Provide a list of risks for Octank financial in numbered list without description.\"\"\"\n", + "\n", + "retrieve_and_generate_stream(query = query, kb_id = kb_id, model_arn = model_arn)" + ] + }, + { + "cell_type": "markdown", + "id": "a58b7808", + "metadata": {}, + "source": [ + "### Adjust 'maximum number of results' retrieval parameter\n", + "\n", + "In some use cases; the FM responses might be lacking enough context to provide relevant answers or relying that it couldn't find the requested info. Which could be fixed by modifying the maximum number of retrieved results.\n", + "\n", + "In the following example, we are going to run the following query with a few number of results (3):\n", + "\\\n", + "```Provide a list of risks for Octank financial in bulleted points.```\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -990,9 +1085,9 @@ ], "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3 (Data Science 3.0)", + "display_name": "Python 3", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1004,7 +1099,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/01-rag-concepts/05_document_level_kb_ingestion.ipynb b/01-rag-concepts/05_document_level_kb_ingestion.ipynb new file mode 100644 index 0000000..1b10e47 --- /dev/null +++ b/01-rag-concepts/05_document_level_kb_ingestion.ipynb @@ -0,0 +1,1121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Near real-time ingestion using Document level API (DLA) - End to end example \n", + "\n", + "With Document Level API (DLA), customers can now efficiently and cost-effectively ingest, update, or delete data directly from Amazon Bedrock Knowledge Bases using a single API call, without the need to perform a full sync with the data source periodically or after every change.\n", + "\n", + "To read more about DLA, see the [documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/kb-direct-ingestion-add.html)\n", + "\n", + "\n", + "#### Pre-requisites: \n", + "\n", + "- You have already created a Amazon Bedrock Knowledge base by running [01_create_ingest_documents_test_kb_multi_ds.ipynb](/knowledge-bases/01-rag-concepts/01_create_ingest_documents_test_kb_multi_ds.ipynb)\n", + "- Note down the KB id\n", + "\n", + "#### Test Knowledge base: \n", + "- Ingest document into Knowledge base using DLA.\n", + "- Start querying knowledge base for information\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "
\n", + "Note: Please make sure to enable `Anthropic Claude 3 Sonnet` and, `Titan Text Embeddings V2` model access in Amazon Bedrock Console.\n", + "
-------------------------------------------------------------------------------------------------------------------------------------------------------
\n", + " \n", + "Please run the notebook cell by cell instead of using \"Run All Cells\" option.\n", + "
\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 0 - Setup\n", + "Before running the rest of this notebook, you'll need to run the cells below to (ensure necessary libraries are installed and) connect to Bedrock.\n", + "\n", + "Please ignore any pip dependency error (if you see any while installing libraries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --force-reinstall -q -r ../../requirements.txt --quiet\n", + "%pip install --upgrade boto3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# restart kernel\n", + "from IPython.core.display import HTML\n", + "HTML(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "print(boto3.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import time\n", + "import boto3\n", + "import logging\n", + "import pprint\n", + "import json\n", + "import uuid\n", + "\n", + "# Set the path to import module\n", + "from pathlib import Path\n", + "current_path = Path().resolve()\n", + "current_path = current_path.parent\n", + "if str(current_path) not in sys.path:\n", + " sys.path.append(str(current_path))\n", + "# Print sys.path to verify\n", + "# print(sys.path)\n", + "\n", + "from utils.knowledge_base import BedrockKnowledgeBase\n", + "from utils.knowledge_base_operators import create_document_config, ingest_documents_dla\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Clients\n", + "s3_client = boto3.client('s3')\n", + "sts_client = boto3.client('sts')\n", + "session = boto3.session.Session()\n", + "region = session.region_name\n", + "account_id = sts_client.get_caller_identity()[\"Account\"]\n", + "bedrock_agent_client = boto3.client('bedrock-agent')\n", + "bedrock_agent_runtime_client = boto3.client('bedrock-agent-runtime') \n", + "logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)\n", + "logger = logging.getLogger(__name__)\n", + "region, account_id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ingest document directly into Knowledge base using Document Level API (INLINE)\n", + "\n", + "To ingest documents directly into a knowledge base, send an [IngestKnowledgeBaseDocuments](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_agent_IngestKnowledgeBaseDocuments.html) request by specifying the knowledge base ID and data source \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %store -r kb_id\n", + "\n", + "kb_id = \"\" \n", + "ds_id_list = bedrock_agent_client.list_data_sources( knowledgeBaseId=kb_id, maxResults=100)['dataSourceSummaries']\n", + "ds_id = ds_id_list[0]['dataSourceId']\n", + "\n", + "kb_id, ds_id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Currently You can use DLA only if your knowledge base is connected to one of the following data source types:\n", + "\n", + " - Amazon S3\n", + " - Custom " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Based on various configurations, there can be different types of ingest patterns as shown below. To read more about these patteren refer to API documentation [here](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent/client/ingest_knowledge_base_documents.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can choose between different Ingest patterns (based on Data source type i.e. s3 or Custom) using DLA\n", + "\n", + "print(\"Different DLA ingest pattern:\")\n", + "# For S3 Data source type\n", + "print(\"1. Data Source Type: S3 - Metadata: INLINE\")\n", + "print(\"2. Data Source Type: S3 - Metadata: S3_Location\")\n", + "\n", + "# For CUSTOM Data source type\n", + "print(\"3. Data Source Type: CUSTOM - Document source type: INLINE - conetent type: TEXT - Metadata: INLINE\")\n", + "print(\"4. Data Source Type: CUSTOM - Document source type: INLINE - conetent type: TEXT - Metadata: S3_Location\")\n", + "print(\"5. Data Source Type: CUSTOM - Document source type: INLINE - conetent type: BYTE - Metadata: INLINE\")\n", + "print(\"6. Data Source Type: CUSTOM - Document source type: INLINE - conetent type: BYTE - Metadata: S3_Location\")\n", + "print(\"7. Data Source Type: CUSTOM - Document source type: S3_LOCATION - Metadata: INLINE\")\n", + "print(\"8. Data Source Type: CUSTOM - Document source type: S3_LOCATION - Metadata: S3_Location\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For using DLA, you have to define the knowledgeBaseId, dataSourceId, and the documents (A list of objects, each of which contains information about the documents to add).\n", + "\n", + "- We have created a custom function named `create_document_config` , which will define the list of documents based on the ingest pattern you chose. this function accepts the following arguments:\n", + "\n", + " - data_source_type: Either 'CUSTOM' or 'S3'.\n", + " - document_id: The ID for a custom document.\n", + " - s3_uri: The S3 URI for S3 data source.\n", + " - inline_content: The inline content configuration for custom data source.\n", + " - Metadata: Metadata information that can be a list of inline attributes or an S3 location.\n", + "\n", + "For this notebook - we have implemented only four ingest patterns i.e. pattern 1,2,3 & 4. But you can extent it to pattern 5, 6, 7 & 8.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "
\n", + "Note: While using DLA, the dataSourceType specified in the content for each document must match the type of the data source that you specify otherwise ingestion will throw an error. \n", + "
    - if your KB data source is S3, then choose S3 as data source type while using DLA API
\n", + "
    - if your KB data source is CUSTOM, then choose CUSTOM as data source type while using DLA API
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Provide below information based on your ingest pattern\n", + "#---------------------------------------------------------------------------------------\n", + "# FOR INGEST PATTERN CHOICE = 1, i.e. Data Source Type: S3 - Metadata: INLINE\n", + "# **************************************************************************************\n", + "# S3 uri of the data to be ingetsed\n", + "document_s3_uri = 's3://standard-kb-7104855/octank_financial_10K (1).pdf'\n", + "\n", + "# INLINE Metadata details\n", + "metdata_1 = {'key': 'company', 'value': { 'stringValue': 'octank', 'type': 'STRING'}}\n", + "metdata_2 = {'key': 'document', 'value': { 'stringValue': '10k', 'type': 'STRING'}}\n", + "metadata_list =[metdata_1, metdata_2]\n", + "\n", + "inline_metadata ={'inlineAttributes':metadata_list}\n", + "\n", + "# Create document configuration for this ingest pattern\n", + "s3_doc_inline_metadata = create_document_config(\n", + " data_source_type='S3',\n", + " s3_uri=document_s3_uri,\n", + " metadata= inline_metadata\n", + ")\n", + "\n", + "# #---------------------------------------------------------------------------------------\n", + "# # FOR INGEST PATTERN CHOICE = 2, i.e. Data Source Type: S3 - Metadata: S3_Location\n", + "# # **************************************************************************************\n", + "# document_s3_uri = '' \n", + "# metadata_s3_uri = '' \n", + "# metadata_s3_accountid = '' \n", + "\n", + "# # if your metada is stored at S3_location\n", + "# metadata_s3_uri = '' \n", + "# metadata_s3_accountid = '' \n", + "# s3_metadata = {'uri': metadata_s3_uri, 'bucketOwnerAccountId': metadata_s3_accountid }\n", + "\n", + "# s3_doc_s3_metadata = create_document_config(\n", + "# data_source_type='S3',\n", + "# s3_uri='s3://standard-kb-7104855/octank_financial_10K (1).pdf',\n", + "# metadata= s3_metadata\n", + "# )\n", + "\n", + "\n", + "## ---------------------------------------------------------------------------------------\n", + "## FOR INGEST PATTERN CHOICE = 3, i.e. Data Source Type: CUSTOM - Document source type: INLINE - conetent type: TEXT - Metadata: INLINE\n", + "## **************************************************************************************\n", + "\n", + "## Example : USE DLA to ingest a custom document with TEXT inline content and inline metadata\n", + "\n", + "# document_content = '''This is sample document content'''\n", + "# document_id = ''\n", + "\n", + "# # if your Metadata is INLINE\n", + "# metdata_1 = {'key': 'company', 'value': { 'stringValue': 'octank', 'type': 'STRING'}}\n", + "# metdata_2 = {'key': 'document', 'value': { 'stringValue': '10k', 'type': 'STRING'}}\n", + "# metadata_list =[metdata_1, metdata_2]\n", + "\n", + "# inline_metadata ={'inlineAttributes': metadata_list}\n", + "\n", + "# custom_inline_text_inline_metadata = create_document_config(\n", + "# data_source_type='CUSTOM',\n", + "# document_id=document_id,\n", + "# inline_content={\n", + "# 'type': 'TEXT',\n", + "# 'data': document_content\n", + "# },\n", + "# metadata= inline_metadata\n", + "# )\n", + "\n", + "##---------------------------------------------------------------------------------------\n", + "## FOR INGEST PATTERN CHOICE = 4, i.e. Data Source Type: CUSTOM - Document source type: INLINE - conetent type: TEXT - Metadata: S3_Location\n", + "## **************************************************************************************\n", + "\n", + "## Example : USE DLA to ingest a custom document with TEXT inline content and S3 metadata\n", + "\n", + "# document_content = '''This is sample document content'''\n", + "# document_id = ''\n", + "\n", + "# # if your metada is stored at S3_location\n", + "# metadata_s3_uri = '' \n", + "# metadata_s3_accountid = '' \n", + "# s3_metadata = {'uri': metadata_s3_uri, 'bucketOwnerAccountId': metadata_s3_accountid }\n", + "\n", + "# custom_inline_text_s3_metadata = create_document_config(\n", + "# data_source_type='CUSTOM',\n", + "# document_id=document_id,\n", + "# inline_content={\n", + "# 'type': 'TEXT',\n", + "# 'data': document_content\n", + "# },\n", + "# metadata=s3_metadata\n", + "#)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After the document list has been configured, you can call the `ingest_documents_dla` (another custom function) function to ingest the documents into Knowledge base which will call [ingest_knowledge_base_documents](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent/client/ingest_knowledge_base_documents.html) API.\n", + "\n", + "- This function accepts the following arguments:\n", + "\n", + " - knowledge_base_id: The ID of the knowledge base.\n", + " - data_source_id: The ID of the data source.\n", + " - documents: A list of document configurations to ingest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Ingest the documents using DLA\n", + "response = ingest_documents_dla(\n", + " knowledge_base_id=kb_id,\n", + " data_source_id=ds_id,\n", + " documents=[ s3_doc_inline_metadata] # Based on the ingest pattern, this can be changed to [s3_doc_s3_metadata], [custom_inline_text_inline_metadata] or [custom_inline_text_s3_metadata]\n", + ")\n", + "\n", + "print(response)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check the status of the documents ingested via DLA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## To fetch the status of documents\n", + "# response = bedrock_agent_client.list_knowledge_base_documents(\n", + "# dataSourceId=ds_id,\n", + "# knowledgeBaseId=kb_id,\n", + "# )\n", + "# print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.2 Test the Knowledge Base\n", + "Now the Knowlegde Base is available we can test it out using the [**retrieve**](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve.html) and [**retrieve_and_generate**](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-agent-runtime/client/retrieve_and_generate.html) functions. \n", + "\n", + "#### Testing Knowledge Base with Retrieve and Generate API\n", + "\n", + "Let's first test the knowledge base using the retrieve and generate API. With this API, Bedrock takes care of retrieving the necessary references from the knowledge base and generating the final answer using a foundation model from Bedrock.\n", + "\n", + "query = `Provide a summary of consolidated statements of cash flows of Octank Financial for the fiscal years ended December 31, 2019.`\n", + "\n", + "The right response for this query as per ground truth QA pair is:\n", + "```\n", + "The cash flow statement for Octank Financial in the year ended December 31, 2019 reveals the following:\n", + "- Cash generated from operating activities amounted to $710 million, which can be attributed to a $700 million profit and non-cash charges such as depreciation and amortization.\n", + "- Cash outflow from investing activities totaled $240 million, with major expenditures being the acquisition of property, plant, and equipment ($200 million) and marketable securities ($60 million), partially offset by the sale of property, plant, and equipment ($40 million) and maturing marketable securities ($20 million).\n", + "- Financing activities resulted in a cash inflow of $350 million, stemming from the issuance of common stock ($200 million) and long-term debt ($300 million), while common stock repurchases ($50 million) and long-term debt payments ($100 million) reduced the cash inflow.\n", + "Overall, Octank Financial experienced a net cash enhancement of $120 million in 2019, bringing their total cash and cash equivalents to $210 million." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"Provide a summary of consolidated statements of cash flows of Octank Financial for the fiscal years ended December 31, 2019?\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "foundation_model = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n", + "\n", + "response = bedrock_agent_runtime_client.retrieve_and_generate(\n", + " input={\n", + " \"text\": query\n", + " },\n", + " retrieveAndGenerateConfiguration={\n", + " \"type\": \"KNOWLEDGE_BASE\",\n", + " \"knowledgeBaseConfiguration\": {\n", + " 'knowledgeBaseId': kb_id,\n", + " \"modelArn\": \"arn:aws:bedrock:{}::foundation-model/{}\".format(region, foundation_model),\n", + " \"retrievalConfiguration\": {\n", + " \"vectorSearchConfiguration\": {\n", + " \"numberOfResults\":5\n", + " } \n", + " }\n", + " }\n", + " }\n", + ")\n", + "\n", + "print(response['output']['text'],end='\\n'*2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, with the retrieve and generate API we get the final response directly and we don't see the different sources used to generate this response. Let's now retrieve the source information from the knowledge base with the retrieve API.\n", + "\n", + "#### Testing Knowledge Base with Retrieve API\n", + "If you need an extra layer of control, you can retrieve the chuncks that best match your query using the retrieve API. In this setup, we can configure the desired number of results and control the final answer with your own application logic. The API then provides you with the matching content, its S3 location, the similarity score and the chunk metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response_ret = bedrock_agent_runtime_client.retrieve(\n", + " knowledgeBaseId=kb_id, \n", + " nextToken='string',\n", + " retrievalConfiguration={\n", + " \"vectorSearchConfiguration\": {\n", + " \"numberOfResults\":5,\n", + " } \n", + " },\n", + " retrievalQuery={\n", + " \"text\": \"How many new positions were opened across Amazon's fulfillment and delivery network?\"\n", + " }\n", + ")\n", + "\n", + "def response_print(retrieve_resp):\n", + "#structure 'retrievalResults': list of contents. Each list has content, location, score, metadata\n", + " for num,chunk in enumerate(response_ret['retrievalResults'],1):\n", + " print(f'Chunk {num}: ',chunk['content']['text'],end='\\n'*2)\n", + " print(f'Chunk {num} Location: ',chunk['location'],end='\\n'*2)\n", + " print(f'Chunk {num} Score: ',chunk['score'],end='\\n'*2)\n", + " print(f'Chunk {num} Metadata: ',chunk['metadata'],end='\\n'*2)\n", + "\n", + "response_print(response_ret)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean up\n", + "Please make sure to uncomment and run the below section to delete all the resources." + ] + } + ], + "metadata": { + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], + "instance_type": "ml.t3.medium", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}