crate · amotl · Oct 30, 2023 · Oct 29, 2023 · Oct 29, 2023 · Oct 29, 2023
diff --git a/.github/workflows/test-langchain.yml b/.github/workflows/test-langchain.yml
@@ -0,0 +1,62 @@
+name: LangChain
+on:
+
+  pull_request:
+    branches: ~
+    paths:
+    - '.github/workflows/test-langchain.yml'
+    - 'framework/langchain/**'
+    - 'testing/ngr.py'
+  push:
+    branches: [ main ]
+    paths:
+    - '.github/workflows/test-langchain.yml'
+    - 'framework/langchain/**'
+    - 'testing/ngr.py'
+
+  # Allow job to be triggered manually.
+  workflow_dispatch:
+
+# Cancel in-progress jobs when pushing to the same branch.
+concurrency:
+  cancel-in-progress: true
+  group: ${{ github.workflow }}-${{ github.ref }}
+
+jobs:
+  test:
+    name: "CrateDB: ${{ matrix.cratedb-version }}
+     Python: ${{ matrix.python-version }}
+     on ${{ matrix.os }}"
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ "ubuntu-latest" ]
+        cratedb-version: [ "nightly" ]
+        python-version: [ "3.11" ]
+
+    services:
+      cratedb:
+        image: crate/crate:nightly
+        ports:
+          - 4200:4200
+          - 5432:5432
+
+    steps:
+
+      - name: Acquire sources
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+          cache: 'pip'
+          cache-dependency-path: |
+            framework/langchain/requirements.txt
+            framework/langchain/requirements-dev.txt
+
+      - name: Validate framework/langchain
+        run: |
+          python testing/ngr.py --accept-no-venv framework/langchain
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
 .idea
 .venv*
+__pycache__
+.coverage
+coverage.xml
diff --git a/README.rst b/README.rst
@@ -50,6 +50,7 @@ Examples::
 
 More examples::
 
+    python testing/ngr.py framework/langchain
     python testing/ngr.py testing/testcontainers/java
 
 It is recommended to invoke ``ngr`` from within a Python virtualenv.

diff --git a/framework/langchain/.gitignore b/framework/langchain/.gitignore
@@ -0,0 +1 @@
+*.sql
diff --git a/framework/langchain/conftest.py b/framework/langchain/conftest.py
@@ -0,0 +1,52 @@
+import typing as t
+
+import pytest
+
+
+def monkeypatch_pytest_notebook_treat_cell_exit_as_notebook_skip():
+    """
+    Patch `pytest-notebook`, in fact `nbclient.client.NotebookClient`,
+    to propagate cell-level `pytest.exit()` invocations as signals
+    to mark the whole notebook as skipped.
+
+    In order not to be too intrusive, the feature only skips notebooks
+    when being explicitly instructed, by adding `[skip-notebook]` at the
+    end of the `reason` string. Example:
+
+        import pytest
+        if "ACME_API_KEY" not in os.environ:
+            pytest.exit("ACME_API_KEY not given [skip-notebook]")
+
+    https://github.com/chrisjsewell/pytest-notebook/issues/43
+    """
+    from nbclient.client import NotebookClient
+    from nbclient.exceptions import CellExecutionError
+    from nbformat import NotebookNode
+
+    async_execute_cell_dist = NotebookClient.async_execute_cell
+
+    async def async_execute_cell(
+            self,
+            cell: NotebookNode,
+            cell_index: int,
+            execution_count: t.Optional[int] = None,
+            store_history: bool = True,
+    ) -> NotebookNode:
+        try:
+            return await async_execute_cell_dist(
+                self,
+                cell,
+                cell_index,
+                execution_count=execution_count,
+                store_history=store_history,
+            )
+        except CellExecutionError as ex:
+            if ex.ename == "Exit" and ex.evalue.endswith("[skip-notebook]"):
+                raise pytest.skip(ex.evalue)
+            else:
+                raise
+
+    NotebookClient.async_execute_cell = async_execute_cell
+
+
+monkeypatch_pytest_notebook_treat_cell_exit_as_notebook_skip()
diff --git a/framework/langchain/conversational_memory.ipynb b/framework/langchain/conversational_memory.ipynb
@@ -58,12 +58,16 @@
    "source": [
     "from langchain.memory.chat_message_histories import CrateDBChatMessageHistory\n",
     "\n",
+    "# Connect to a self-managed CrateDB instance.\n",
     "CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
     "\n",
     "chat_message_history = CrateDBChatMessageHistory(\n",
     "\tsession_id=\"test_session\",\n",
     "\tconnection_string=CONNECTION_STRING\n",
-    ")"
+    ")\n",
+    "\n",
+    "# Make sure to start with a blank canvas.\n",
+    "chat_message_history.clear()"
    ],
    "metadata": {
     "collapsed": false
@@ -101,9 +105,11 @@
    "outputs": [
     {
      "data": {
-      "text/plain": "[HumanMessage(content='Hello', additional_kwargs={}, example=False),\n AIMessage(content='Hi', additional_kwargs={}, example=False)]"
+      "text/plain": [
+       "[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
+      ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -147,7 +153,6 @@
     "from datetime import datetime\n",
     "from typing import Any\n",
     "\n",
-    "from langchain.memory.chat_message_histories.cratedb import generate_autoincrement_identifier\n",
     "from langchain.memory.chat_message_histories.sql import BaseMessageConverter\n",
     "from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage\n",
     "\n",
@@ -161,7 +166,7 @@
     "class CustomMessage(Base):\n",
     "\t__tablename__ = \"custom_message_store\"\n",
     "\n",
-    "\tid = sa.Column(sa.BigInteger, primary_key=True, default=generate_autoincrement_identifier)\n",
+    "\tid = sa.Column(sa.BigInteger, primary_key=True, server_default=sa.func.now())\n",
     "\tsession_id = sa.Column(sa.Text)\n",
     "\ttype = sa.Column(sa.Text)\n",
     "\tcontent = sa.Column(sa.Text)\n",
@@ -215,6 +220,9 @@
     "\t\t)\n",
     "\t)\n",
     "\n",
+    "\t# Make sure to start with a blank canvas.\n",
+    "\tchat_message_history.clear()\n",
+    "\n",
     "\tchat_message_history.add_user_message(\"Hello\")\n",
     "\tchat_message_history.add_ai_message(\"Hi\")"
    ],
@@ -233,9 +241,11 @@
    "outputs": [
     {
      "data": {
-      "text/plain": "[HumanMessage(content='Hello', additional_kwargs={}, example=False),\n AIMessage(content='Hi', additional_kwargs={}, example=False)]"
+      "text/plain": [
+       "[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
+      ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -268,21 +278,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "outputs": [],
    "source": [
     "import json\n",
     "import typing as t\n",
     "\n",
-    "from langchain.memory.chat_message_histories.cratedb import generate_autoincrement_identifier, CrateDBMessageConverter\n",
+    "from langchain.memory.chat_message_histories.cratedb import CrateDBMessageConverter\n",
     "from langchain.schema import _message_to_dict\n",
     "\n",
     "\n",
     "Base = declarative_base()\n",
     "\n",
     "class MessageWithDifferentSessionIdColumn(Base):\n",
     "\t__tablename__ = \"message_store_different_session_id\"\n",
-    "\tid = sa.Column(sa.BigInteger, primary_key=True, default=generate_autoincrement_identifier)\n",
+    "\tid = sa.Column(sa.BigInteger, primary_key=True, server_default=sa.func.now())\n",
     "\tcustom_session_id = sa.Column(sa.Text)\n",
     "\tmessage = sa.Column(sa.Text)\n",
     "\n",
@@ -307,6 +317,9 @@
     "\t\tsession_id_field_name=\"custom_session_id\",\n",
     "\t)\n",
     "\n",
+    "\t# Make sure to start with a blank canvas.\n",
+    "\tchat_message_history.clear()\n",
+    "\n",
     "\tchat_message_history.add_user_message(\"Hello\")\n",
     "\tchat_message_history.add_ai_message(\"Hi\")"
    ],
@@ -316,11 +329,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "outputs": [
     {
      "data": {
-      "text/plain": "[HumanMessage(content='Hello', additional_kwargs={}, example=False),\n AIMessage(content='Hi', additional_kwargs={}, example=False)]"
+      "text/plain": [
+       "[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
+      ]
      },
      "execution_count": 9,
      "metadata": {},

diff --git a/framework/langchain/conversational_memory.py b/framework/langchain/conversational_memory.py
@@ -19,11 +19,17 @@
 from langchain.memory.chat_message_histories import CrateDBChatMessageHistory
 
 
+CONNECTION_STRING = os.environ.get(
+    "CRATEDB_CONNECTION_STRING",
+    "crate://crate@localhost/?schema=doc"
+)
+
+
 def main():
 
     chat_message_history = CrateDBChatMessageHistory(
         session_id="test_session",
-        connection_string=os.environ.get("CRATEDB_CONNECTION_STRING")
+        connection_string=CONNECTION_STRING,
     )
     chat_message_history.add_user_message("Hello")
     chat_message_history.add_ai_message("Hi")

diff --git a/framework/langchain/document_loader.ipynb b/framework/langchain/document_loader.ipynb
@@ -58,8 +58,8 @@
      "output_type": "stream",
      "text": [
       "\u001B[32mCONNECT OK\r\n",
-      "\u001B[0m\u001B[32mPSQL OK, 1 row affected (0.001 sec)\r\n",
-      "\u001B[0m\u001B[32mDELETE OK, 30 rows affected (0.010 sec)\r\n",
+      "\u001B[0m\u001B[32mPROVISIONING OK, 0 rows affected (0.001 sec)\r\n",
+      "\u001B[0m\u001B[32mCREATE OK, 1 row affected (0.010 sec)\r\n",
       "\u001B[0m\u001B[32mINSERT OK, 30 rows affected (0.011 sec)\r\n",
       "\u001B[0m\u001B[0m\u001B[32mCONNECT OK\r\n",
       "\u001B[0m\u001B[32mREFRESH OK, 1 row affected (0.026 sec)\r\n",
@@ -95,6 +95,7 @@
     "from langchain.document_loaders import CrateDBLoader\n",
     "from pprint import pprint\n",
     "\n",
+    "# Connect to a self-managed CrateDB instance.\n",
     "CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
     "\n",
     "loader = CrateDBLoader(\n",
@@ -115,11 +116,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[Document(page_content='Team: Angels\\nPayroll (millions): 154.49\\nWins: 89', metadata={}),\n",
-      " Document(page_content='Team: Astros\\nPayroll (millions): 60.65\\nWins: 55', metadata={}),\n",
-      " Document(page_content='Team: Athletics\\nPayroll (millions): 55.37\\nWins: 94', metadata={}),\n",
-      " Document(page_content='Team: Blue Jays\\nPayroll (millions): 75.48\\nWins: 73', metadata={}),\n",
-      " Document(page_content='Team: Braves\\nPayroll (millions): 83.31\\nWins: 94', metadata={})]\n"
+      "[Document(page_content='Team: Angels\\nPayroll (millions): 154.49\\nWins: 89'),\n",
+      " Document(page_content='Team: Astros\\nPayroll (millions): 60.65\\nWins: 55'),\n",
+      " Document(page_content='Team: Athletics\\nPayroll (millions): 55.37\\nWins: 94'),\n",
+      " Document(page_content='Team: Blue Jays\\nPayroll (millions): 75.48\\nWins: 73'),\n",
+      " Document(page_content='Team: Braves\\nPayroll (millions): 83.31\\nWins: 94')]\n"
      ]
     }
    ],

diff --git a/framework/langchain/document_loader.py b/framework/langchain/document_loader.py
@@ -28,10 +28,16 @@
 from pprint import pprint
 
 
+CONNECTION_STRING = os.environ.get(
+    "CRATEDB_CONNECTION_STRING",
+    "crate://crate@localhost/?schema=doc"
+)
+
+
 def main():
     loader = CrateDBLoader(
         query="SELECT * FROM mlb_teams_2012 LIMIT 3;",
-        url=os.environ.get("CRATEDB_CONNECTION_STRING"),
+        url=CONNECTION_STRING,
         include_rownum_into_metadata=True,
     )
     docs = loader.load()

diff --git a/framework/langchain/pyproject.toml b/framework/langchain/pyproject.toml
@@ -0,0 +1,45 @@
+[tool.pytest.ini_options]
+minversion = "2.0"
+addopts = """
+  -rfEX -p pytester --strict-markers --verbosity=3 --capture=no
+  """
+# --cov=. --cov-report=term-missing --cov-report=xml
+env = [
+    "CRATEDB_CONNECTION_STRING=crate://crate@localhost/?schema=testdrive",
+    "PYDEVD_DISABLE_FILE_VALIDATION=1",
+]
+
+#log_level = "DEBUG"
+#log_cli_level = "DEBUG"
+
+testpaths = [
+    "*.py",
+]
+xfail_strict = true
+markers = [
+]
+
+# pytest-notebook settings
+nb_test_files = true
+nb_coverage = true
+nb_diff_replace = [
+    # Compensate output of `crash`.
+    '"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
+]
+# `vector_search.py` does not include any output(s).
+nb_diff_ignore = [
+    "/metadata/language_info",
+    "/cells/*/execution_count",
+    "/cells/*/outputs/*/execution_count",
+]
+
+[tool.coverage.run]
+branch = false
+
+[tool.coverage.report]
+fail_under = 0
+show_missing = true
+omit = [
+    "conftest.py",
+    "test*.py",
+]