From c51b1b479825951d3db017b47b5898a9473146de Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Tue, 1 Oct 2024 15:09:58 -0400 Subject: [PATCH 1/9] Add AzureDBForPostgres connector --- .../third_party/postgres-memory.ipynb | 173 ++++++++++++++++-- .../memory/azure_db_for_postgres/__init__.py | 1 + .../azure_db_for_postgres_collection.py | 57 ++++++ .../azure_db_for_postgres_settings.py | 43 +++++ .../azure_db_for_postgres_store.py | 9 + .../memory/azure_db_for_postgres/constants.py | 3 + .../memory/azure_db_for_postgres/utils.py | 25 +++ .../memory/postgres/postgres_settings.py | 14 +- 8 files changed, 309 insertions(+), 16 deletions(-) create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py diff --git a/python/samples/getting_started/third_party/postgres-memory.ipynb b/python/samples/getting_started/third_party/postgres-memory.ipynb index b0069a59a1c7..872273efb9a4 100644 --- a/python/samples/getting_started/third_party/postgres-memory.ipynb +++ b/python/samples/getting_started/third_party/postgres-memory.ipynb @@ -33,6 +33,9 @@ ")\n", "from semantic_kernel.connectors.ai.open_ai.services.azure_text_embedding import AzureTextEmbedding\n", "from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_embedding import OpenAITextEmbedding\n", + "from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_collection import (\n", + " AzureDBForPostgresCollection,\n", + ")\n", "from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection\n", "from semantic_kernel.data.const import DistanceFunction, IndexKind\n", "from semantic_kernel.data.vector_store_model_decorator import vectorstoremodel\n", @@ -55,10 +58,23 @@ "\n", "To do this, copy the `.env.example` file to `.env` and fill in the necessary information.\n", "\n", + "Note that if you are using VS Code to execute this notebook, ensure you don't have alternate values in the .env file at the root of the workspace, as that will take precedence over the .env file in the notebook.\n", + "\n", "### Postgres configuration\n", "\n", "You'll need to provide a connection string to a Postgres database. You can use a local Postgres instance, or a cloud-hosted one.\n", - "You can provide a connection string, or provide environment variables with the connection information. See the .env.example file for `POSTGRES_` settings.\n", + "You can provide a connection string, or provide environment variables with the connection information. See the .env.example file for `POSTGRES_CONNECTION_STRING` and `PG*` settings.\n", + "\n", + "#### Using Azure DB for Postgres\n", + "\n", + "You can use Azure DB for Postgres by following the steps below:\n", + "\n", + "1. Create an Azure DB for Postgres instance. You can set the database to only allow Entra authentication to avoid\n", + " storing the password in the `.env` file.\n", + "2. Set the `PG*` settings, except for the password if using Entra authentication. If using entra, ensure you\n", + " are logged in via the Azure CLI. You can get the configuration values from the Azure portal Settings -> Connect\n", + " page.\n", + "3. Set \"USE_AZURE_DB_FOR_POSTGRES\" to True in the cell below.\n", "\n", "#### Using Docker\n", "\n", @@ -130,7 +146,13 @@ "USE_AZURE_OPENAI = True\n", "\n", "# The name of the OpenAI model or Azure OpenAI deployment to use\n", - "EMBEDDING_MODEL = \"text-embedding-3-small\"" + "EMBEDDING_MODEL = \"text-embedding-3-small\"\n", + "\n", + "# -- Postgres settings --\n", + "\n", + "# Use Azure DB For Postgres. This enables Entra authentication against the database instead of\n", + "# setting a password in the environment.\n", + "USE_AZURE_DB_FOR_POSTGRES = True" ] }, { @@ -240,9 +262,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 10 papers on 'generative ai'\n" + ] + } + ], "source": [ "arxiv_papers: list[ArxivPaper] = [\n", " ArxivPaper.from_arxiv_info(paper)\n", @@ -263,11 +293,36 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "cannot pickle '_thread.lock' object", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m USE_AZURE_DB_FOR_POSTGRES:\n\u001b[0;32m----> 2\u001b[0m collection \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresCollection\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mArxivPaper\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marxiv_papers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_model_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mArxivPaper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6\u001b[0m collection \u001b[38;5;241m=\u001b[39m PostgresCollection[\u001b[38;5;28mstr\u001b[39m, ArxivPaper](\n\u001b[1;32m 7\u001b[0m collection_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marxiv_papers\u001b[39m\u001b[38;5;124m\"\u001b[39m, data_model_type\u001b[38;5;241m=\u001b[39mArxivPaper, env_file_path\u001b[38;5;241m=\u001b[39menv_file_path\n\u001b[1;32m 8\u001b[0m )\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py:47\u001b[0m, in \u001b[0;36mAzureDBForPostgresCollection.__init__\u001b[0;34m(self, collection_name, data_model_type, data_model_definition, connection_pool, db_schema, env_file_path, env_file_encoding, settings)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# If the connection pool or settings were not provided, create the settings from the environment.\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# Passing this to the super class will enforce using Azure DB settings.\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m connection_pool \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m settings:\n\u001b[0;32m---> 47\u001b[0m settings \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresSettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_encoding\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 51\u001b[0m collection_name\u001b[38;5;241m=\u001b[39mcollection_name,\n\u001b[1;32m 52\u001b[0m data_model_type\u001b[38;5;241m=\u001b[39mdata_model_type,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 56\u001b[0m settings\u001b[38;5;241m=\u001b[39msettings,\n\u001b[1;32m 57\u001b[0m )\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/kernel_pydantic.py:56\u001b[0m, in \u001b[0;36mKernelBaseSettings.create\u001b[0;34m(cls, **data)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 55\u001b[0m data \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic_settings/main.py:144\u001b[0m, in \u001b[0;36mBaseSettings.__init__\u001b[0;34m(__pydantic_self__, _case_sensitive, _env_prefix, _env_file, _env_file_encoding, _env_ignore_empty, _env_nested_delimiter, _env_parse_none_str, _env_parse_enums, _cli_prog_name, _cli_parse_args, _cli_settings_source, _cli_parse_none_str, _cli_hide_none_type, _cli_avoid_json, _cli_enforce_required, _cli_use_class_docs_for_groups, _cli_exit_on_error, _cli_prefix, _secrets_dir, **values)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 121\u001b[0m __pydantic_self__,\n\u001b[1;32m 122\u001b[0m _case_sensitive: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 142\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# Uses something other than `self` the first arg to allow \"self\" as a settable attribute\u001b[39;00m\n\u001b[0;32m--> 144\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m__pydantic_self__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_settings_build_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 146\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[43m \u001b[49m\u001b[43m_case_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_case_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 149\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 150\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file_encoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 153\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_enums\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_enums\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prog_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prog_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_settings_source\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_settings_source\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m_secrets_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_secrets_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_model_construction.py:292\u001b[0m, in \u001b[0;36minit_private_attributes\u001b[0;34m(self, context)\u001b[0m\n\u001b[1;32m 290\u001b[0m pydantic_private \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, private_attr \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__private_attributes__\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 292\u001b[0m default \u001b[38;5;241m=\u001b[39m \u001b[43mprivate_attr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m default \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m PydanticUndefined:\n\u001b[1;32m 294\u001b[0m pydantic_private[name] \u001b[38;5;241m=\u001b[39m default\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/fields.py:936\u001b[0m, in \u001b[0;36mModelPrivateAttr.get_default\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_default\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 927\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Retrieve the default value of the object.\u001b[39;00m\n\u001b[1;32m 928\u001b[0m \n\u001b[1;32m 929\u001b[0m \u001b[38;5;124;03m If `self.default_factory` is `None`, the method will return a deep copy of the `self.default` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;124;03m The default value of the object.\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 936\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msmart_deepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory()\n", + "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_utils.py:318\u001b[0m, in \u001b[0;36msmart_deepcopy\u001b[0;34m(obj)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mRuntimeError\u001b[39;00m):\n\u001b[1;32m 315\u001b[0m \u001b[38;5;66;03m# do we really dare to catch ALL errors? Seems a bit risky\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 318\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/share/uv/python/cpython-3.10.15-linux-x86_64-gnu/lib/python3.10/copy.py:161\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 159\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce_ex__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m reductor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 161\u001b[0m rv \u001b[38;5;241m=\u001b[39m \u001b[43mreductor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 163\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", + "\u001b[0;31mTypeError\u001b[0m: cannot pickle '_thread.lock' object" + ] + } + ], "source": [ - "collection = PostgresCollection[str, ArxivPaper](\n", - " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", - ")" + "if USE_AZURE_DB_FOR_POSTGRES:\n", + " collection = AzureDBForPostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", + " )\n", + "else:\n", + " collection = PostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", + " )" ] }, { @@ -279,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -305,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -321,9 +376,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n" + ] + } + ], "source": [ "async with collection:\n", " await collection.create_collection_if_not_exists()\n", @@ -339,9 +402,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n", + "# Data Analysis in the Era of Generative AI\n", + "\n", + "Abstract: This paper explores the potential of AI-powered tools to reshape data\n", + "analysis, focusing on design considerations and challenges. We explore how the\n", + "emergence of large language and multimodal models offers new opportunities to\n", + "enhance various stages of data analysis workflow by translating high-level user\n", + "intentions into executable code, charts, and insights. We then examine human-\n", + "centered design principles that facilitate intuitive interactions, build user\n", + "trust, and streamline the AI-assisted analysis workflow across multiple apps.\n", + "Finally, we discuss the research challenges that impede the development of these\n", + "AI-based systems such as enhancing model capabilities, evaluating and\n", + "benchmarking, and understanding end-user needs.\n", + "Published: 2024-09-27 06:31:03\n", + "Link: http://arxiv.org/abs/2409.18475v1\n", + "PDF Link: http://arxiv.org/abs/2409.18475v1\n", + "Authors: Jeevana Priya Inala, Chenglong Wang, Steven Drucker, Gonzalo Ramos, Victor Dibia, Nathalie Riche, Dave Brown, Dan Marshall, Jianfeng Gao\n", + "Embedding: [ 0.0324665 0.03064382 0.04381268 ... -0.00220003 -0.01004753\n", + " 0.0266567 ]\n", + "\n", + "\n", + "# Speech to Reality: On-Demand Production using Natural Language, 3D Generative AI, and Discrete Robotic Assembly\n", + "\n", + "Abstract: We present a system that transforms speech into physical objects by combining\n", + "3D generative Artificial Intelligence with robotic assembly. The system\n", + "leverages natural language input to make design and manufacturing more\n", + "accessible, enabling individuals without expertise in 3D modeling or robotic\n", + "programming to create physical objects. We propose utilizing discrete robotic\n", + "assembly of lattice-based voxel components to address the challenges of using\n", + "generative AI outputs in physical production, such as design variability,\n", + "fabrication speed, structural integrity, and material waste. The system\n", + "interprets speech to generate 3D objects, discretizes them into voxel\n", + "components, computes an optimized assembly sequence, and generates a robotic\n", + "toolpath. The results are demonstrated through the assembly of various objects,\n", + "ranging from chairs to shelves, which are prompted via speech and realized\n", + "within 5 minutes using a 6-axis robotic arm.\n", + "Published: 2024-09-27 02:12:56\n", + "Link: http://arxiv.org/abs/2409.18390v1\n", + "PDF Link: http://arxiv.org/abs/2409.18390v1\n", + "Authors: Alexander Htet Kyaw, Se Hwan Jeon, Miana Smith, Neil Gershenfeld\n", + "Embedding: [ 0.00662689 0.03477224 0.01948195 ... 0.02736436 -0.00416199\n", + " 0.01341196]\n", + "\n", + "\n", + "# Deep Generative Model for Mechanical System Configuration Design\n", + "\n", + "Abstract: Generative AI has made remarkable progress in addressing various design\n", + "challenges. One prominent area where generative AI could bring significant value\n", + "is in engineering design. In particular, selecting an optimal set of components\n", + "and their interfaces to create a mechanical system that meets design\n", + "requirements is one of the most challenging and time-consuming tasks for\n", + "engineers. This configuration design task is inherently challenging due to its\n", + "categorical nature, multiple design requirements a solution must satisfy, and\n", + "the reliance on physics simulations for evaluating potential solutions. These\n", + "characteristics entail solving a combinatorial optimization problem with\n", + "multiple constraints involving black-box functions. To address this challenge,\n", + "we propose a deep generative model to predict the optimal combination of\n", + "components and interfaces for a given design problem. To demonstrate our\n", + "approach, we solve a gear train synthesis problem by first creating a synthetic\n", + "dataset using a grammar, a parts catalogue, and a physics simulator. We then\n", + "train a Transformer using this dataset, named GearFormer, which can not only\n", + "generate quality solutions on its own, but also augment search methods such as\n", + "an evolutionary algorithm and Monte Carlo tree search. We show that GearFormer\n", + "outperforms such search methods on their own in terms of satisfying the\n", + "specified design requirements with orders of magnitude faster generation time.\n", + "Additionally, we showcase the benefit of hybrid methods that leverage both\n", + "GearFormer and search methods, which further improve the quality of the\n", + "solutions.\n", + "Published: 2024-09-09 19:15:45\n", + "Link: http://arxiv.org/abs/2409.06016v2\n", + "PDF Link: http://arxiv.org/abs/2409.06016v2\n", + "Authors: Yasaman Etesam, Hyunmin Cheong, Mohammadmehdi Ataei, Pradeep Kumar Jayaraman\n", + "Embedding: [0.02961224 0.01000344 0.03739412 ... 0.00147997 0.02606801 0.044123 ]\n", + "\n", + "\n" + ] + } + ], "source": [ "async with collection:\n", " results = await collection.get_batch(keys[:3])\n", diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py new file mode 100644 index 000000000000..2a50eae89411 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py @@ -0,0 +1 @@ +# Copyright (c) Microsoft. All rights reserved. diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py new file mode 100644 index 000000000000..f0a408e58e94 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py @@ -0,0 +1,57 @@ +# Copyright (c) Microsoft. All rights reserved. +from typing import TypeVar + +from psycopg_pool import AsyncConnectionPool + +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_settings import ( + AzureDBForPostgresSettings, +) +from semantic_kernel.connectors.memory.postgres.constants import DEFAULT_SCHEMA +from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection +from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition + +TKey = TypeVar("TKey", str, int) +TModel = TypeVar("TModel") + + +class AzureDBForPostgresCollection(PostgresCollection[TKey, TModel]): + """AzureDBForPostgresCollection class.""" + + def __init__( + self, + collection_name: str, + data_model_type: type[TModel], + data_model_definition: VectorStoreRecordDefinition | None = None, + connection_pool: AsyncConnectionPool | None = None, + db_schema: str = DEFAULT_SCHEMA, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + settings: AzureDBForPostgresSettings | None = None, + ): + """Initialize the collection. + + Args: + collection_name: The name of the collection, which corresponds to the table name. + data_model_type (type[TModel]): The type of the data model. + data_model_definition: The data model definition. + connection_pool: The connection pool. + db_schema: The database schema. + env_file_path (str): Use the environment settings file as a fallback to environment variables. + env_file_encoding (str): The encoding of the environment settings file. + settings: The settings for the Azure DB for Postgres connection. If not provided, the settings will be + created from the environment. + """ + # If the connection pool or settings were not provided, create the settings from the environment. + # Passing this to the super class will enforce using Azure DB settings. + if not connection_pool and not settings: + settings = AzureDBForPostgresSettings.create( + env_file_path=env_file_path, env_file_encoding=env_file_encoding + ) + super().__init__( + collection_name=collection_name, + data_model_type=data_model_type, + data_model_definition=data_model_definition, + connection_pool=connection_pool, + db_schema=db_schema, + settings=settings, + ) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py new file mode 100644 index 000000000000..669dd0f3f0c4 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py @@ -0,0 +1,43 @@ +# Copyright (c) Microsoft. All rights reserved. +import sys +from typing import Any + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from azure.core.credentials import TokenCredential +from azure.core.credentials_async import AsyncTokenCredential +from azure.identity import DefaultAzureCredential +from psycopg.conninfo import conninfo_to_dict + +from semantic_kernel.connectors.memory.azure_db_for_postgres.utils import get_entra_token, get_entra_token_aysnc +from semantic_kernel.connectors.memory.postgres.postgres_settings import PostgresSettings + + +class AzureDBForPostgresSettings(PostgresSettings): + """Azure DB for Postgres model settings. + + This is the same as PostgresSettings, but does not a require a password. + If a password is not supplied, then Entra will use the Azure AD token. + You can also supply an Azure credential directly. + """ + + credential: AsyncTokenCredential | TokenCredential | None = None + + @override + def get_connection_args(self, **kwargs) -> dict[str, Any]: + """Get connection arguments.""" + password: Any = self.password.get_secret_value() if self.password else None + if not password and self.connection_string: + password = conninfo_to_dict(self.connection_string.get_secret_value()).get("password") + + if not password: + self.credential = self.credential or DefaultAzureCredential() + if isinstance(self.credential, AsyncTokenCredential): + password = get_entra_token_aysnc(self.credential) + else: + password = get_entra_token(self.credential) + + return super().get_connection_args(password=password) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py new file mode 100644 index 000000000000..eba3138b4cfe --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_store.py @@ -0,0 +1,9 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.connectors.memory.postgres.postgres_store import PostgresStore + + +class AzureDBForPostgresStore(PostgresStore): + """AzureDBForPostgresStore class.""" + + pass diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py new file mode 100644 index 000000000000..612b938173e1 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py @@ -0,0 +1,3 @@ +# Copyright (c) Microsoft. All rights reserved. + +AZURE_DB_FOR_POSTGRES_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py new file mode 100644 index 000000000000..f7dee848eea1 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py @@ -0,0 +1,25 @@ +# Copyright (c) Microsoft. All rights reserved. +import logging + +from azure.core.credentials import TokenCredential +from azure.core.credentials_async import AsyncTokenCredential + +from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE + +logger = logging.getLogger(__name__) + + +async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + + async with credential: + cred = await credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE) + return cred.token + + +def get_entra_token(credential: TokenCredential) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + + return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token diff --git a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py index 02a5d24616ec..0673586d7acd 100644 --- a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py +++ b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py @@ -74,8 +74,16 @@ class PostgresSettings(KernelBaseSettings): default_dimensionality: int = 100 max_rows_per_transaction: int = 1000 - def get_connection_args(self) -> dict[str, Any]: - """Get connection arguments.""" + def get_connection_args(self, **kwargs) -> dict[str, Any]: + """Get connection arguments. + + Args: + kwargs: dict[str, Any] - Additional arguments + Use this to override any connection arguments. + + Returns: + dict[str, Any]: Connection arguments that can be passed to psycopg.connect + """ result = conninfo_to_dict(self.connection_string.get_secret_value()) if self.connection_string else {} if self.host: @@ -89,6 +97,8 @@ def get_connection_args(self) -> dict[str, Any]: if self.password: result["password"] = self.password.get_secret_value() + result = {**result, **kwargs} + # Ensure required values if "host" not in result: raise MemoryConnectorInitializationError("host is required. Please set PGHOST or connection_string.") From 5ab384e49f516504f39807d918008830853f841e Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 2 Jan 2025 13:44:15 -0500 Subject: [PATCH 2/9] Add dynamic Entra credentials to AsyncConnectionPool --- python/samples/concepts/memory/new_memory.py | 10 +- .../third_party/postgres-memory.ipynb | 187 ++++++------------ .../azure_db_for_postgres_collection.py | 6 +- .../azure_db_for_postgres_settings.py | 83 ++++++-- .../azure_db_for_postgres/entra_connection.py | 81 ++++++++ .../memory/azure_db_for_postgres/utils.py | 17 +- 6 files changed, 232 insertions(+), 152 deletions(-) create mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py diff --git a/python/samples/concepts/memory/new_memory.py b/python/samples/concepts/memory/new_memory.py index 11f8d3b20b51..239ca0a75250 100644 --- a/python/samples/concepts/memory/new_memory.py +++ b/python/samples/concepts/memory/new_memory.py @@ -17,8 +17,11 @@ ) from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection from semantic_kernel.connectors.memory.azure_cosmos_db import AzureCosmosDBNoSQLCollection +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_collection import ( + AzureDBForPostgresCollection, +) from semantic_kernel.connectors.memory.in_memory import InMemoryVectorCollection -from semantic_kernel.connectors.memory.postgres import PostgresCollection +from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection from semantic_kernel.connectors.memory.qdrant import QdrantCollection from semantic_kernel.connectors.memory.redis import RedisHashsetCollection, RedisJsonCollection from semantic_kernel.connectors.memory.weaviate import WeaviateCollection @@ -110,6 +113,7 @@ class DataModelList: # A list of VectorStoreRecordCollection that can be used. # Available collections are: # - ai_search: Azure AI Search +# - azure_db_for_postgres: Azure DB for Postgres # - postgres: PostgreSQL # - redis_json: Redis JSON # - redis_hashset: Redis Hashset @@ -132,6 +136,10 @@ class DataModelList: "ai_search": lambda: AzureAISearchCollection[DataModel]( data_model_type=DataModel, ), + "azure_db_for_postgres": lambda: AzureDBForPostgresCollection[str, DataModel]( + data_model_type=DataModel, + collection_name=collection_name, + ), "postgres": lambda: PostgresCollection[str, DataModel]( data_model_type=DataModel, collection_name=collection_name, diff --git a/python/samples/getting_started/third_party/postgres-memory.ipynb b/python/samples/getting_started/third_party/postgres-memory.ipynb index 872273efb9a4..1aeca721b2d7 100644 --- a/python/samples/getting_started/third_party/postgres-memory.ipynb +++ b/python/samples/getting_started/third_party/postgres-memory.ipynb @@ -262,17 +262,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 10 papers on 'generative ai'\n" - ] - } - ], + "outputs": [], "source": [ "arxiv_papers: list[ArxivPaper] = [\n", " ArxivPaper.from_arxiv_info(paper)\n", @@ -293,27 +285,7 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "cannot pickle '_thread.lock' object", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m USE_AZURE_DB_FOR_POSTGRES:\n\u001b[0;32m----> 2\u001b[0m collection \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresCollection\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mArxivPaper\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43marxiv_papers\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_model_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mArxivPaper\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6\u001b[0m collection \u001b[38;5;241m=\u001b[39m PostgresCollection[\u001b[38;5;28mstr\u001b[39m, ArxivPaper](\n\u001b[1;32m 7\u001b[0m collection_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marxiv_papers\u001b[39m\u001b[38;5;124m\"\u001b[39m, data_model_type\u001b[38;5;241m=\u001b[39mArxivPaper, env_file_path\u001b[38;5;241m=\u001b[39menv_file_path\n\u001b[1;32m 8\u001b[0m )\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py:47\u001b[0m, in \u001b[0;36mAzureDBForPostgresCollection.__init__\u001b[0;34m(self, collection_name, data_model_type, data_model_definition, connection_pool, db_schema, env_file_path, env_file_encoding, settings)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# If the connection pool or settings were not provided, create the settings from the environment.\u001b[39;00m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# Passing this to the super class will enforce using Azure DB settings.\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m connection_pool \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m settings:\n\u001b[0;32m---> 47\u001b[0m settings \u001b[38;5;241m=\u001b[39m \u001b[43mAzureDBForPostgresSettings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[43menv_file_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menv_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menv_file_encoding\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 51\u001b[0m collection_name\u001b[38;5;241m=\u001b[39mcollection_name,\n\u001b[1;32m 52\u001b[0m data_model_type\u001b[38;5;241m=\u001b[39mdata_model_type,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 56\u001b[0m settings\u001b[38;5;241m=\u001b[39msettings,\n\u001b[1;32m 57\u001b[0m )\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/semantic_kernel/kernel_pydantic.py:56\u001b[0m, in \u001b[0;36mKernelBaseSettings.create\u001b[0;34m(cls, **data)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menv_file_encoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 55\u001b[0m data \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic_settings/main.py:144\u001b[0m, in \u001b[0;36mBaseSettings.__init__\u001b[0;34m(__pydantic_self__, _case_sensitive, _env_prefix, _env_file, _env_file_encoding, _env_ignore_empty, _env_nested_delimiter, _env_parse_none_str, _env_parse_enums, _cli_prog_name, _cli_parse_args, _cli_settings_source, _cli_parse_none_str, _cli_hide_none_type, _cli_avoid_json, _cli_enforce_required, _cli_use_class_docs_for_groups, _cli_exit_on_error, _cli_prefix, _secrets_dir, **values)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 121\u001b[0m __pydantic_self__,\n\u001b[1;32m 122\u001b[0m _case_sensitive: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 142\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 143\u001b[0m \u001b[38;5;66;03m# Uses something other than `self` the first arg to allow \"self\" as a settable attribute\u001b[39;00m\n\u001b[0;32m--> 144\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m__pydantic_self__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_settings_build_values\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 146\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 147\u001b[0m \u001b[43m \u001b[49m\u001b[43m_case_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_case_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 149\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 150\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_file_encoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_file_encoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 151\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_ignore_empty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 152\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_nested_delimiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 153\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[43m \u001b[49m\u001b[43m_env_parse_enums\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_env_parse_enums\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prog_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prog_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_settings_source\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_settings_source\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_parse_none_str\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_hide_none_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_avoid_json\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_enforce_required\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_use_class_docs_for_groups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_exit_on_error\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43m_cli_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_cli_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m_secrets_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_secrets_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_model_construction.py:292\u001b[0m, in \u001b[0;36minit_private_attributes\u001b[0;34m(self, context)\u001b[0m\n\u001b[1;32m 290\u001b[0m pydantic_private \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name, private_attr \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__private_attributes__\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m--> 292\u001b[0m default \u001b[38;5;241m=\u001b[39m \u001b[43mprivate_attr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m default \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m PydanticUndefined:\n\u001b[1;32m 294\u001b[0m pydantic_private[name] \u001b[38;5;241m=\u001b[39m default\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/fields.py:936\u001b[0m, in \u001b[0;36mModelPrivateAttr.get_default\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_default\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[1;32m 927\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Retrieve the default value of the object.\u001b[39;00m\n\u001b[1;32m 928\u001b[0m \n\u001b[1;32m 929\u001b[0m \u001b[38;5;124;03m If `self.default_factory` is `None`, the method will return a deep copy of the `self.default` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 934\u001b[0m \u001b[38;5;124;03m The default value of the object.\u001b[39;00m\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 936\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msmart_deepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_factory()\n", - "File \u001b[0;32m~/proj/sk/semantic-kernel/python/.venv/lib/python3.10/site-packages/pydantic/_internal/_utils.py:318\u001b[0m, in \u001b[0;36msmart_deepcopy\u001b[0;34m(obj)\u001b[0m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mRuntimeError\u001b[39;00m):\n\u001b[1;32m 315\u001b[0m \u001b[38;5;66;03m# do we really dare to catch ALL errors? Seems a bit risky\u001b[39;00m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m--> 318\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdeepcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.local/share/uv/python/cpython-3.10.15-linux-x86_64-gnu/lib/python3.10/copy.py:161\u001b[0m, in \u001b[0;36mdeepcopy\u001b[0;34m(x, memo, _nil)\u001b[0m\n\u001b[1;32m 159\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce_ex__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m reductor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 161\u001b[0m rv \u001b[38;5;241m=\u001b[39m \u001b[43mreductor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 163\u001b[0m reductor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(x, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__reduce__\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", - "\u001b[0;31mTypeError\u001b[0m: cannot pickle '_thread.lock' object" - ] - } - ], + "outputs": [], "source": [ "if USE_AZURE_DB_FOR_POSTGRES:\n", " collection = AzureDBForPostgresCollection[str, ArxivPaper](\n", @@ -334,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -360,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -376,17 +348,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n" - ] - } - ], + "outputs": [], "source": [ "async with collection:\n", " await collection.create_collection_if_not_exists()\n", @@ -397,96 +361,49 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "Note that we use the collection as a context manager. This opens and closes the connection pool that is created by the collection. If we want to maintain a persistent connection pool, which is more typical for a long-running application, we can create the connection pool outside of the context manager and pass it in. This is also useful if we want to use the same connection pool for multiple collections. \n", + "\n", + "The settings object PostgresSettings and AzureDBForPostgresSettings enable easy creation of collection pools. We use this technique in the next cell.\n", + "\n", "Here we retrieve the first few models from the database and print out their information." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_settings import (\n", + " AzureDBForPostgresSettings,\n", + ")\n", + "from semantic_kernel.connectors.memory.postgres.postgres_settings import PostgresSettings\n", + "\n", + "if USE_AZURE_DB_FOR_POSTGRES:\n", + " settings = AzureDBForPostgresSettings(env_file_path=env_file_path)\n", + " connection_pool = await settings.create_connection_pool()\n", + " collection = AzureDBForPostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\",\n", + " data_model_type=ArxivPaper,\n", + " connection_pool=connection_pool,\n", + " settings=AzureDBForPostgresSettings(env_file_path=env_file_path),\n", + " )\n", + "else:\n", + " settings = PostgresSettings(env_file_path=env_file_path)\n", + " connection_pool = await settings.create_connection_pool()\n", + " collection = PostgresCollection[str, ArxivPaper](\n", + " collection_name=\"arxiv_papers\", data_model_type=ArxivPaper, env_file_path=env_file_path\n", + " )\n", + "\n", + "# Open the connection pool\n", + "await connection_pool.open()" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SYNC TOKEN: eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyIsImtpZCI6Ik1jN2wzSXo5M2c3dXdnTmVFbW13X1dZR1BrbyJ9.eyJhdWQiOiJodHRwczovL29zc3JkYm1zLWFhZC5kYXRhYmFzZS53aW5kb3dzLm5ldCIsImlzcyI6Imh0dHBzOi8vc3RzLndpbmRvd3MubmV0LzcyZjk4OGJmLTg2ZjEtNDFhZi05MWFiLTJkN2NkMDExZGI0Ny8iLCJpYXQiOjE3Mjc3MTE3OTQsIm5iZiI6MTcyNzcxMTc5NCwiZXhwIjoxNzI3NzE2NzU1LCJfY2xhaW1fbmFtZXMiOnsiZ3JvdXBzIjoic3JjMSJ9LCJfY2xhaW1fc291cmNlcyI6eyJzcmMxIjp7ImVuZHBvaW50IjoiaHR0cHM6Ly9ncmFwaC53aW5kb3dzLm5ldC83MmY5ODhiZi04NmYxLTQxYWYtOTFhYi0yZDdjZDAxMWRiNDcvdXNlcnMvNGEyMzRmMDUtYjkzMC00NDBlLTkyNjMtZjVkYjFlZDhmNDRhL2dldE1lbWJlck9iamVjdHMifX0sImFjciI6IjEiLCJhaW8iOiJBVlFBcS84WUFBQUE2aWlkY3lVT1lGS0gycktQRHRTU3Qzc1l1dzY2cXBUWHRsWFJuVUpFZVNVblNHRm4yQ2R1MGFKQTVjTUFEMUVNR2hyNzd5dzZCd2o3WUthVndYajRwTFNkdDJydW1TSHhGQWc2L3NmQnVhMD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiJhNDdhZGE2Yi0zOGRhLTQ3MDItOWZhNy0wMWI5ZWI2OWEwMWUiLCJmYW1pbHlfbmFtZSI6IkVtYW51ZWxlIiwiZ2l2ZW5fbmFtZSI6IlJvYiIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjcxLjE3NS4xMzcuMTI1IiwibmFtZSI6IlJvYiBFbWFudWVsZSIsIm9pZCI6IjRhMjM0ZjA1LWI5MzAtNDQwZS05MjYzLWY1ZGIxZWQ4ZjQ0YSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS0xMjQ1MjUwOTUtNzA4MjU5NjM3LTE1NDMxMTkwMjEtMjAxMDY2NiIsInB1aWQiOiIxMDAzMjAwMEYxRDk4NUZCIiwicmgiOiIxLkFSb0F2NGo1Y3ZHR3IwR1JxeTE4MEJIYlIxRFlQQkxmMmIxQWxOWEo4SHRfb2dNYUFHSWFBQS4iLCJzY3AiOiJ1c2VyX2ltcGVyc29uYXRpb24iLCJzdWIiOiJoS0lKSkRHeGtldXh4R1hnYjlMWlJ3RlZ6cDAyek5taTQwb20wWWJ3YWEwIiwidGlkIjoiNzJmOTg4YmYtODZmMS00MWFmLTkxYWItMmQ3Y2QwMTFkYjQ3IiwidW5pcXVlX25hbWUiOiJyb2JlbWFudWVsZUBtaWNyb3NvZnQuY29tIiwidXBuIjoicm9iZW1hbnVlbGVAbWljcm9zb2Z0LmNvbSIsInV0aSI6Ik5CblFnNy1YZVVpNTI5QmxDMFlEQUEiLCJ2ZXIiOiIxLjAiLCJ4bXNfaWRyZWwiOiIyIDEifQ.vEevMMADNaEW0J2yDTWpmHzZx2oupvHOJ5sDXSBCaK8qUdKEevyzzKFL9qeV0D5hYPwxR4EE37oJJImliTPoiCstHUApgu_jfkQzXdqWRNxLquHiAreFzcOCWXWlJLA9Vlv5V9PbnIWScoa8Alv3OBH4uPDTyIGDuFZ7DYfjiYNWydJL-P2zwWVh3Ks6ODfVLzCq64dDx6gsIZW684Ou_aRwKwEb86cB6zcR00peQ4uzyULMjCJm7IbyJTPUI9ijkU6ezNgbUQJUqSkJgcmOilZsp_p3OMwHXdXoCCYaOiI3EVh1JPkSo9VP3015W5xW_NvSujPeteqlFkxgiVDr5Q\n", - "# Data Analysis in the Era of Generative AI\n", - "\n", - "Abstract: This paper explores the potential of AI-powered tools to reshape data\n", - "analysis, focusing on design considerations and challenges. We explore how the\n", - "emergence of large language and multimodal models offers new opportunities to\n", - "enhance various stages of data analysis workflow by translating high-level user\n", - "intentions into executable code, charts, and insights. We then examine human-\n", - "centered design principles that facilitate intuitive interactions, build user\n", - "trust, and streamline the AI-assisted analysis workflow across multiple apps.\n", - "Finally, we discuss the research challenges that impede the development of these\n", - "AI-based systems such as enhancing model capabilities, evaluating and\n", - "benchmarking, and understanding end-user needs.\n", - "Published: 2024-09-27 06:31:03\n", - "Link: http://arxiv.org/abs/2409.18475v1\n", - "PDF Link: http://arxiv.org/abs/2409.18475v1\n", - "Authors: Jeevana Priya Inala, Chenglong Wang, Steven Drucker, Gonzalo Ramos, Victor Dibia, Nathalie Riche, Dave Brown, Dan Marshall, Jianfeng Gao\n", - "Embedding: [ 0.0324665 0.03064382 0.04381268 ... -0.00220003 -0.01004753\n", - " 0.0266567 ]\n", - "\n", - "\n", - "# Speech to Reality: On-Demand Production using Natural Language, 3D Generative AI, and Discrete Robotic Assembly\n", - "\n", - "Abstract: We present a system that transforms speech into physical objects by combining\n", - "3D generative Artificial Intelligence with robotic assembly. The system\n", - "leverages natural language input to make design and manufacturing more\n", - "accessible, enabling individuals without expertise in 3D modeling or robotic\n", - "programming to create physical objects. We propose utilizing discrete robotic\n", - "assembly of lattice-based voxel components to address the challenges of using\n", - "generative AI outputs in physical production, such as design variability,\n", - "fabrication speed, structural integrity, and material waste. The system\n", - "interprets speech to generate 3D objects, discretizes them into voxel\n", - "components, computes an optimized assembly sequence, and generates a robotic\n", - "toolpath. The results are demonstrated through the assembly of various objects,\n", - "ranging from chairs to shelves, which are prompted via speech and realized\n", - "within 5 minutes using a 6-axis robotic arm.\n", - "Published: 2024-09-27 02:12:56\n", - "Link: http://arxiv.org/abs/2409.18390v1\n", - "PDF Link: http://arxiv.org/abs/2409.18390v1\n", - "Authors: Alexander Htet Kyaw, Se Hwan Jeon, Miana Smith, Neil Gershenfeld\n", - "Embedding: [ 0.00662689 0.03477224 0.01948195 ... 0.02736436 -0.00416199\n", - " 0.01341196]\n", - "\n", - "\n", - "# Deep Generative Model for Mechanical System Configuration Design\n", - "\n", - "Abstract: Generative AI has made remarkable progress in addressing various design\n", - "challenges. One prominent area where generative AI could bring significant value\n", - "is in engineering design. In particular, selecting an optimal set of components\n", - "and their interfaces to create a mechanical system that meets design\n", - "requirements is one of the most challenging and time-consuming tasks for\n", - "engineers. This configuration design task is inherently challenging due to its\n", - "categorical nature, multiple design requirements a solution must satisfy, and\n", - "the reliance on physics simulations for evaluating potential solutions. These\n", - "characteristics entail solving a combinatorial optimization problem with\n", - "multiple constraints involving black-box functions. To address this challenge,\n", - "we propose a deep generative model to predict the optimal combination of\n", - "components and interfaces for a given design problem. To demonstrate our\n", - "approach, we solve a gear train synthesis problem by first creating a synthetic\n", - "dataset using a grammar, a parts catalogue, and a physics simulator. We then\n", - "train a Transformer using this dataset, named GearFormer, which can not only\n", - "generate quality solutions on its own, but also augment search methods such as\n", - "an evolutionary algorithm and Monte Carlo tree search. We show that GearFormer\n", - "outperforms such search methods on their own in terms of satisfying the\n", - "specified design requirements with orders of magnitude faster generation time.\n", - "Additionally, we showcase the benefit of hybrid methods that leverage both\n", - "GearFormer and search methods, which further improve the quality of the\n", - "solutions.\n", - "Published: 2024-09-09 19:15:45\n", - "Link: http://arxiv.org/abs/2409.06016v2\n", - "PDF Link: http://arxiv.org/abs/2409.06016v2\n", - "Authors: Yasaman Etesam, Hyunmin Cheong, Mohammadmehdi Ataei, Pradeep Kumar Jayaraman\n", - "Embedding: [0.02961224 0.01000344 0.03739412 ... 0.00147997 0.02606801 0.044123 ]\n", - "\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "async with collection:\n", " results = await collection.get_batch(keys[:3])\n", @@ -505,6 +422,22 @@ " print()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we need to close the connection pool explicitly." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "await connection_pool.close()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py index f0a408e58e94..9427a390831a 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py @@ -32,12 +32,12 @@ def __init__( Args: collection_name: The name of the collection, which corresponds to the table name. - data_model_type (type[TModel]): The type of the data model. + data_model_type: The type of the data model. data_model_definition: The data model definition. connection_pool: The connection pool. db_schema: The database schema. - env_file_path (str): Use the environment settings file as a fallback to environment variables. - env_file_encoding (str): The encoding of the environment settings file. + env_file_path: Use the environment settings file as a fallback to environment variables. + env_file_encoding: The encoding of the environment settings file. settings: The settings for the Azure DB for Postgres connection. If not provided, the settings will be created from the environment. """ diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py index 669dd0f3f0c4..ea5e61e646b6 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py @@ -2,17 +2,21 @@ import sys from typing import Any +from psycopg.conninfo import conninfo_to_dict +from psycopg_pool import AsyncConnectionPool + +from semantic_kernel.connectors.memory.azure_db_for_postgres.entra_connection import AsyncEntraConnection +from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorInitializationError + if sys.version_info >= (3, 12): - from typing import override # pragma: no cover + pass # pragma: no cover else: - from typing_extensions import override # pragma: no cover + pass # pragma: no cover from azure.core.credentials import TokenCredential from azure.core.credentials_async import AsyncTokenCredential -from azure.identity import DefaultAzureCredential -from psycopg.conninfo import conninfo_to_dict -from semantic_kernel.connectors.memory.azure_db_for_postgres.utils import get_entra_token, get_entra_token_aysnc +from semantic_kernel import __version__ from semantic_kernel.connectors.memory.postgres.postgres_settings import PostgresSettings @@ -26,18 +30,59 @@ class AzureDBForPostgresSettings(PostgresSettings): credential: AsyncTokenCredential | TokenCredential | None = None - @override def get_connection_args(self, **kwargs) -> dict[str, Any]: - """Get connection arguments.""" - password: Any = self.password.get_secret_value() if self.password else None - if not password and self.connection_string: - password = conninfo_to_dict(self.connection_string.get_secret_value()).get("password") - - if not password: - self.credential = self.credential or DefaultAzureCredential() - if isinstance(self.credential, AsyncTokenCredential): - password = get_entra_token_aysnc(self.credential) - else: - password = get_entra_token(self.credential) - - return super().get_connection_args(password=password) + """Get connection arguments. + + Args: + kwargs: dict[str, Any] - Additional arguments + Use this to override any connection arguments. + + Returns: + dict[str, Any]: Connection arguments that can be passed to psycopg.connect + """ + result = conninfo_to_dict(self.connection_string.get_secret_value()) if self.connection_string else {} + + if self.host: + result["host"] = self.host + if self.port: + result["port"] = self.port + if self.dbname: + result["dbname"] = self.dbname + if self.user: + result["user"] = self.user + if self.password: + result["password"] = self.password.get_secret_value() + + result = {**result, **kwargs} + + # Ensure required values + if "host" not in result: + raise MemoryConnectorInitializationError("host is required. Please set PGHOST or connection_string.") + if "dbname" not in result: + raise MemoryConnectorInitializationError( + "database is required. Please set PGDATABASE or connection_string." + ) + + return result + + async def create_connection_pool(self) -> AsyncConnectionPool: + """Creates a connection pool based off of settings. + + Uses AsyncEntraConnection as the connection class, which + can set the user and password based on a Entra token. + """ + pool: AsyncConnectionPool = AsyncConnectionPool( + min_size=self.min_pool, + max_size=self.max_pool, + open=False, + kwargs={ + **self.get_connection_args(), + **{ + "credential": self.credential, + "application_name": f"semantic_kernel (python) v{__version__}", + }, + }, + connection_class=AsyncEntraConnection, + ) + await pool.open() + return pool diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py new file mode 100644 index 000000000000..2a2d044e3802 --- /dev/null +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py @@ -0,0 +1,81 @@ +# Copyright (c) Microsoft. All rights reserved. +import base64 +import json +import logging +from functools import lru_cache + +from azure.core.credentials import TokenCredential +from azure.core.credentials_async import AsyncTokenCredential +from azure.identity import DefaultAzureCredential +from psycopg import AsyncConnection + +from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE + +logger = logging.getLogger(__name__) + + +async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + + async with credential: + cred = await credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE) + return cred.token + + +def get_entra_token(credential: TokenCredential | None) -> str: + """Get the password from Entra using the provided credential.""" + logger.info("Acquiring Entra token for postgres password") + credential = credential or get_default_azure_credentials() + + return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token + + +@lru_cache(maxsize=1) +def get_default_azure_credentials() -> DefaultAzureCredential: + """Get the default Azure credentials. + + This method caches the credentials to avoid creating new instances. + """ + return DefaultAzureCredential() + + +def decode_jwt(token): + """Decode the JWT payload to extract claims.""" + payload = token.split(".")[1] + padding = "=" * (4 - len(payload) % 4) + decoded_payload = base64.urlsafe_b64decode(payload + padding) + return json.loads(decoded_payload) + + +async def get_entra_conninfo(credential: TokenCredential | AsyncTokenCredential | None) -> dict[str, str]: + """Fetches a token returns the username and token.""" + # Fetch a new token and extract the username + if isinstance(credential, AsyncTokenCredential): + token = await get_entra_token_aysnc(credential) + else: + token = get_entra_token(credential) + claims = decode_jwt(token) + username = claims.get("upn") or claims.get("preferred_username") or claims.get("unique_name") + if not username: + raise ValueError("Could not extract username from token. Have you logged in?") + + return {"user": username, "password": token} + + +class AsyncEntraConnection(AsyncConnection): + """Asynchronous connection class for using Entra auth with Azure DB for PostgreSQL.""" + + @classmethod + async def connect(cls, *args, **kwargs): + """Establish an asynchronous connection using Entra auth with Azure DB for PostgreSQL.""" + credential = kwargs.pop("credential", None) + if credential and not isinstance(credential, (TokenCredential, AsyncTokenCredential)): + raise ValueError("credential must be a TokenCredential or AsyncTokenCredential") + if credential or not kwargs.get("user") or not kwargs.get("password"): + entra_conninfo = await get_entra_conninfo(credential) + kwargs["password"] = entra_conninfo["password"] + if not kwargs.get("user"): + # If user isn't already set, use the username from the token + kwargs["user"] = entra_conninfo["user"] + return await super().connect(*args, **kwargs | entra_conninfo) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py index f7dee848eea1..ea693f908d3c 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py @@ -1,15 +1,17 @@ # Copyright (c) Microsoft. All rights reserved. import logging +from functools import lru_cache from azure.core.credentials import TokenCredential from azure.core.credentials_async import AsyncTokenCredential +from azure.identity import DefaultAzureCredential from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE logger = logging.getLogger(__name__) -async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: +async def get_entra_token_async(credential: AsyncTokenCredential) -> str: """Get the password from Entra using the provided credential.""" logger.info("Acquiring Entra token for postgres password") @@ -18,8 +20,19 @@ async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: return cred.token -def get_entra_token(credential: TokenCredential) -> str: +def get_entra_token(credential: TokenCredential | None) -> str: """Get the password from Entra using the provided credential.""" logger.info("Acquiring Entra token for postgres password") + credential = credential or get_default_azure_credentials() + print("HERE") return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token + + +@lru_cache(maxsize=1) +def get_default_azure_credentials() -> DefaultAzureCredential: + """Get the default Azure credentials. + + This method caches the credentials to avoid creating new instances. + """ + return DefaultAzureCredential() From bce5a1cae30678ed0fa8d9b0d24c2d443cd48701 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 2 Jan 2025 13:39:21 -0500 Subject: [PATCH 3/9] Add application name to postgres connector --- .../connectors/memory/postgres/postgres_settings.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py index 0673586d7acd..04b1156e29b2 100644 --- a/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py +++ b/python/semantic_kernel/connectors/memory/postgres/postgres_settings.py @@ -6,6 +6,7 @@ from psycopg_pool import AsyncConnectionPool from pydantic import Field, SecretStr +from semantic_kernel import __version__ from semantic_kernel.connectors.memory.postgres.constants import ( PGDATABASE_ENV_VAR, PGHOST_ENV_VAR, @@ -122,7 +123,12 @@ async def create_connection_pool(self) -> AsyncConnectionPool: min_size=self.min_pool, max_size=self.max_pool, open=False, - kwargs=self.get_connection_args(), + kwargs={ + **self.get_connection_args(), + **{ + "application_name": f"semantic_kernel (python) v{__version__}", + }, + }, ) await pool.open() except Exception as e: From e66e4de710f7a3186833126c11e402e7a76a2244 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 2 Jan 2025 13:39:48 -0500 Subject: [PATCH 4/9] Delete unused file; functionality moved to entra_connection.py --- .../memory/azure_db_for_postgres/utils.py | 38 ------------------- 1 file changed, 38 deletions(-) delete mode 100644 python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py deleted file mode 100644 index ea693f908d3c..000000000000 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/utils.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. -import logging -from functools import lru_cache - -from azure.core.credentials import TokenCredential -from azure.core.credentials_async import AsyncTokenCredential -from azure.identity import DefaultAzureCredential - -from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import AZURE_DB_FOR_POSTGRES_SCOPE - -logger = logging.getLogger(__name__) - - -async def get_entra_token_async(credential: AsyncTokenCredential) -> str: - """Get the password from Entra using the provided credential.""" - logger.info("Acquiring Entra token for postgres password") - - async with credential: - cred = await credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE) - return cred.token - - -def get_entra_token(credential: TokenCredential | None) -> str: - """Get the password from Entra using the provided credential.""" - logger.info("Acquiring Entra token for postgres password") - credential = credential or get_default_azure_credentials() - print("HERE") - - return credential.get_token(AZURE_DB_FOR_POSTGRES_SCOPE).token - - -@lru_cache(maxsize=1) -def get_default_azure_credentials() -> DefaultAzureCredential: - """Get the default Azure credentials. - - This method caches the credentials to avoid creating new instances. - """ - return DefaultAzureCredential() From 504c78e4e1dde94764de1aaeb51a1a257cdf5ff3 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 2 Jan 2025 13:40:26 -0500 Subject: [PATCH 5/9] Add __all__ to __init__.py --- .../memory/azure_db_for_postgres/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py index 2a50eae89411..8927ef5e4df1 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/__init__.py @@ -1 +1,11 @@ # Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_collection import ( + AzureDBForPostgresCollection, +) +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_settings import ( + AzureDBForPostgresSettings, +) +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_store import AzureDBForPostgresStore + +__all__ = ["AzureDBForPostgresCollection", "AzureDBForPostgresSettings", "AzureDBForPostgresStore"] From 2a745f1f48afee9ab9653d47fd5956e6f8f6600a Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Thu, 2 Jan 2025 13:55:12 -0500 Subject: [PATCH 6/9] Fix typo --- .../memory/azure_db_for_postgres/entra_connection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py index 2a2d044e3802..549df654fa05 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/entra_connection.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -async def get_entra_token_aysnc(credential: AsyncTokenCredential) -> str: +async def get_entra_token_async(credential: AsyncTokenCredential) -> str: """Get the password from Entra using the provided credential.""" logger.info("Acquiring Entra token for postgres password") @@ -52,7 +52,7 @@ async def get_entra_conninfo(credential: TokenCredential | AsyncTokenCredential """Fetches a token returns the username and token.""" # Fetch a new token and extract the username if isinstance(credential, AsyncTokenCredential): - token = await get_entra_token_aysnc(credential) + token = await get_entra_token_async(credential) else: token = get_entra_token(credential) claims = decode_jwt(token) From f910fa85fe54fa98cf11016d325e919e2818b5c6 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Fri, 3 Jan 2025 17:42:12 -0500 Subject: [PATCH 7/9] Fix import --- .../azure_db_for_postgres/azure_db_for_postgres_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py index 9427a390831a..d6fbc656775e 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_collection.py @@ -8,7 +8,7 @@ ) from semantic_kernel.connectors.memory.postgres.constants import DEFAULT_SCHEMA from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection -from semantic_kernel.data.vector_store_model_definition import VectorStoreRecordDefinition +from semantic_kernel.data.record_definition.vector_store_model_definition import VectorStoreRecordDefinition TKey = TypeVar("TKey", str, int) TModel = TypeVar("TModel") From 1a8ad95805b1c4150597f327ae0cb31b8669d26a Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Fri, 3 Jan 2025 17:43:48 -0500 Subject: [PATCH 8/9] Allow AZURE_DB_FOR_POSTGRES_CONNECTION_STRING --- python/.env.example | 1 + .../azure_db_for_postgres_settings.py | 13 ++++++++++++- .../memory/azure_db_for_postgres/constants.py | 8 ++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/python/.env.example b/python/.env.example index 8e46ec2bb6de..ef532e6dd80d 100644 --- a/python/.env.example +++ b/python/.env.example @@ -17,6 +17,7 @@ MONGODB_ATLAS_CONNECTION_STRING="" PINECONE_API_KEY="" PINECONE_ENVIRONMENT="" POSTGRES_CONNECTION_STRING="" +AZURE_DB_FOR_POSTGRES_CONNECTION_STRING="" WEAVIATE_URL="" WEAVIATE_API_KEY="" GOOGLE_SEARCH_ENGINE_ID="" diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py index ea5e61e646b6..6fcc1c8d987d 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/azure_db_for_postgres_settings.py @@ -4,7 +4,11 @@ from psycopg.conninfo import conninfo_to_dict from psycopg_pool import AsyncConnectionPool +from pydantic import Field, SecretStr +from semantic_kernel.connectors.memory.azure_db_for_postgres.constants import ( + AZURE_DB_FOR_POSTGRES_CONNECTION_STRING_ENV_VAR, +) from semantic_kernel.connectors.memory.azure_db_for_postgres.entra_connection import AsyncEntraConnection from semantic_kernel.exceptions.memory_connector_exceptions import MemoryConnectorInitializationError @@ -30,6 +34,12 @@ class AzureDBForPostgresSettings(PostgresSettings): credential: AsyncTokenCredential | TokenCredential | None = None + azure_db_connection_string: SecretStr | None = Field(None, alias=AZURE_DB_FOR_POSTGRES_CONNECTION_STRING_ENV_VAR) + """A azure db specific connection string. Can be supplied instead of POSTGRES_CONNECTION_STRING + + This is useful if settings for both an Azure DB and a regular Postgres database are needed. + """ + def get_connection_args(self, **kwargs) -> dict[str, Any]: """Get connection arguments. @@ -40,7 +50,8 @@ def get_connection_args(self, **kwargs) -> dict[str, Any]: Returns: dict[str, Any]: Connection arguments that can be passed to psycopg.connect """ - result = conninfo_to_dict(self.connection_string.get_secret_value()) if self.connection_string else {} + conn_string_setting = self.azure_db_connection_string or self.connection_string + result = conninfo_to_dict(conn_string_setting.get_secret_value()) if conn_string_setting else {} if self.host: result["host"] = self.host diff --git a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py index 612b938173e1..55cdea88a3de 100644 --- a/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py +++ b/python/semantic_kernel/connectors/memory/azure_db_for_postgres/constants.py @@ -1,3 +1,11 @@ # Copyright (c) Microsoft. All rights reserved. AZURE_DB_FOR_POSTGRES_SCOPE = "https://ossrdbms-aad.database.windows.net/.default" + +AZURE_DB_FOR_POSTGRES_CONNECTION_STRING_ENV_VAR = "AZURE_DB_FOR_POSTGRES_CONNECTION_STRING" +"""Azure DB for Postgres specific environment variable for the connection string. + +This is useful if settings for both an Azure DB and a regular Postgres database are needed. +If not set, the regular POSTGRES_CONNECTION_STRING environment variable or other standard +Postgres environment variables will be used. +""" From e379c80f9b0aa6f925faf57df7b92bdcb96c1372 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Fri, 3 Jan 2025 17:44:11 -0500 Subject: [PATCH 9/9] Add Azure DB for Postgres to PG integration tests --- .../postgres/test_postgres_int.py | 40 ++++++++++++++----- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/python/tests/integration/memory/vector_stores/postgres/test_postgres_int.py b/python/tests/integration/memory/vector_stores/postgres/test_postgres_int.py index fb280e047a39..49078b92e6b2 100644 --- a/python/tests/integration/memory/vector_stores/postgres/test_postgres_int.py +++ b/python/tests/integration/memory/vector_stores/postgres/test_postgres_int.py @@ -10,6 +10,10 @@ import pytest_asyncio from pydantic import BaseModel +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_settings import ( + AzureDBForPostgresSettings, +) +from semantic_kernel.connectors.memory.azure_db_for_postgres.azure_db_for_postgres_store import AzureDBForPostgresStore from semantic_kernel.connectors.memory.postgres import PostgresSettings, PostgresStore from semantic_kernel.data import ( DistanceFunction, @@ -40,8 +44,8 @@ connection_params_present = False pytestmark = pytest.mark.skipif( - not (psycopg_pool_installed or connection_params_present), - reason="psycopg_pool is not installed" if not psycopg_pool_installed else "No connection parameters provided", + not psycopg_pool_installed, + reason="psycopg_pool is not installed", ) @@ -85,15 +89,33 @@ def DataModelPandas(record) -> tuple: return definition, df -@pytest_asyncio.fixture -async def vector_store() -> AsyncGenerator[PostgresStore, None]: +@pytest_asyncio.fixture( + # Parametrize over all Postgres stores. + params=["PostgresStore", "AzureDBForPostgresStore"] +) +async def vector_store(request) -> AsyncGenerator[PostgresStore, None]: + store_type = request.param + if store_type == "PostgresStore": + settings = PostgresSettings.create() + elif store_type == "AzureDBForPostgresStore": + settings = AzureDBForPostgresSettings.create() + + try: + connection_params_present = any(settings.get_connection_args().values()) + except MemoryConnectorInitializationError: + connection_params_present = False + + if not connection_params_present: + pytest.skip(f"No connection parameters provided for {store_type}") + try: - async with await pg_settings.create_connection_pool() as pool: - yield PostgresStore(connection_pool=pool) + async with await settings.create_connection_pool() as pool: + if store_type == "PostgresStore": + yield PostgresStore(connection_pool=pool) + elif store_type == "AzureDBForPostgresStore": + yield AzureDBForPostgresStore(connection_pool=pool) except MemoryConnectorConnectionException: - pytest.skip("Postgres connection not available") - yield None - return + pytest.skip(f"{store_type} connection not available") @asynccontextmanager