Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions .github/workflows/test-langchain.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: LangChain
on:

pull_request:
branches: ~
paths:
- '.github/workflows/test-langchain.yml'
- 'framework/langchain/**'
- 'testing/ngr.py'
push:
branches: [ main ]
paths:
- '.github/workflows/test-langchain.yml'
- 'framework/langchain/**'
- 'testing/ngr.py'

# Allow job to be triggered manually.
workflow_dispatch:

# Cancel in-progress jobs when pushing to the same branch.
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}

jobs:
test:
name: "CrateDB: ${{ matrix.cratedb-version }}
Python: ${{ matrix.python-version }}
on ${{ matrix.os }}"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ "ubuntu-latest" ]
cratedb-version: [ "nightly" ]
python-version: [ "3.11" ]

services:
cratedb:
image: crate/crate:nightly
ports:
- 4200:4200
- 5432:5432

steps:

- name: Acquire sources
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64
cache: 'pip'
cache-dependency-path: |
framework/langchain/requirements.txt
framework/langchain/requirements-dev.txt

- name: Validate framework/langchain
run: |
python testing/ngr.py --accept-no-venv framework/langchain
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
.idea
.venv*
__pycache__
.coverage
coverage.xml
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Examples::

More examples::

python testing/ngr.py framework/langchain
python testing/ngr.py testing/testcontainers/java

It is recommended to invoke ``ngr`` from within a Python virtualenv.
Expand Down
1 change: 1 addition & 0 deletions framework/langchain/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.sql
52 changes: 52 additions & 0 deletions framework/langchain/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import typing as t

import pytest


def monkeypatch_pytest_notebook_treat_cell_exit_as_notebook_skip():
"""
Patch `pytest-notebook`, in fact `nbclient.client.NotebookClient`,
to propagate cell-level `pytest.exit()` invocations as signals
to mark the whole notebook as skipped.
In order not to be too intrusive, the feature only skips notebooks
when being explicitly instructed, by adding `[skip-notebook]` at the
end of the `reason` string. Example:
import pytest
if "ACME_API_KEY" not in os.environ:
pytest.exit("ACME_API_KEY not given [skip-notebook]")
https://github.com/chrisjsewell/pytest-notebook/issues/43
"""
Comment on lines +6 to +21
Copy link
Member Author

@amotl amotl Oct 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a utility temporarily added here. Based on the outcome from chrisjsewell/pytest-notebook#43, it may be added to the upstream library, or to a different spot.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code has been added to another library, to not clutter the canonical cratedb-examples repository with such utility code.

-- https://github.com/pyveci/pueblo/blob/v0.0.1/pueblo/testing/notebook.py

from nbclient.client import NotebookClient
from nbclient.exceptions import CellExecutionError
from nbformat import NotebookNode

async_execute_cell_dist = NotebookClient.async_execute_cell

async def async_execute_cell(
self,
cell: NotebookNode,
cell_index: int,
execution_count: t.Optional[int] = None,
store_history: bool = True,
) -> NotebookNode:
try:
return await async_execute_cell_dist(
self,
cell,
cell_index,
execution_count=execution_count,
store_history=store_history,
)
except CellExecutionError as ex:
if ex.ename == "Exit" and ex.evalue.endswith("[skip-notebook]"):
raise pytest.skip(ex.evalue)
else:
raise

NotebookClient.async_execute_cell = async_execute_cell


monkeypatch_pytest_notebook_treat_cell_exit_as_notebook_skip()
39 changes: 27 additions & 12 deletions framework/langchain/conversational_memory.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,16 @@
"source": [
"from langchain.memory.chat_message_histories import CrateDBChatMessageHistory\n",
"\n",
"# Connect to a self-managed CrateDB instance.\n",
"CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
"\n",
"chat_message_history = CrateDBChatMessageHistory(\n",
"\tsession_id=\"test_session\",\n",
"\tconnection_string=CONNECTION_STRING\n",
")"
")\n",
"\n",
"# Make sure to start with a blank canvas.\n",
"chat_message_history.clear()"
],
"metadata": {
"collapsed": false
Expand Down Expand Up @@ -101,9 +105,11 @@
"outputs": [
{
"data": {
"text/plain": "[HumanMessage(content='Hello', additional_kwargs={}, example=False),\n AIMessage(content='Hi', additional_kwargs={}, example=False)]"
"text/plain": [
"[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -147,7 +153,6 @@
"from datetime import datetime\n",
"from typing import Any\n",
"\n",
"from langchain.memory.chat_message_histories.cratedb import generate_autoincrement_identifier\n",
"from langchain.memory.chat_message_histories.sql import BaseMessageConverter\n",
"from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage\n",
"\n",
Expand All @@ -161,7 +166,7 @@
"class CustomMessage(Base):\n",
"\t__tablename__ = \"custom_message_store\"\n",
"\n",
"\tid = sa.Column(sa.BigInteger, primary_key=True, default=generate_autoincrement_identifier)\n",
"\tid = sa.Column(sa.BigInteger, primary_key=True, server_default=sa.func.now())\n",
Comment on lines -164 to +169
Copy link
Member Author

@amotl amotl Oct 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is so good to have this tested now. 🚀

Even if the Notebook worked once upon a time, it did not in its current form, because the previous implementation changed.

It is difficult to catch such errors without running rigid software testing, minor things like this easily slip through, effectively rendering the notebook unusable for all people trying to exercise it.

"\tsession_id = sa.Column(sa.Text)\n",
"\ttype = sa.Column(sa.Text)\n",
"\tcontent = sa.Column(sa.Text)\n",
Expand Down Expand Up @@ -215,6 +220,9 @@
"\t\t)\n",
"\t)\n",
"\n",
"\t# Make sure to start with a blank canvas.\n",
"\tchat_message_history.clear()\n",
Comment on lines +223 to +224
Copy link
Member Author

@amotl amotl Oct 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idempotency flaws when testing. Fixed now.

"\n",
"\tchat_message_history.add_user_message(\"Hello\")\n",
"\tchat_message_history.add_ai_message(\"Hi\")"
],
Expand All @@ -233,9 +241,11 @@
"outputs": [
{
"data": {
"text/plain": "[HumanMessage(content='Hello', additional_kwargs={}, example=False),\n AIMessage(content='Hi', additional_kwargs={}, example=False)]"
"text/plain": [
"[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -268,21 +278,21 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"outputs": [],
"source": [
"import json\n",
"import typing as t\n",
"\n",
"from langchain.memory.chat_message_histories.cratedb import generate_autoincrement_identifier, CrateDBMessageConverter\n",
"from langchain.memory.chat_message_histories.cratedb import CrateDBMessageConverter\n",
"from langchain.schema import _message_to_dict\n",
"\n",
"\n",
"Base = declarative_base()\n",
"\n",
"class MessageWithDifferentSessionIdColumn(Base):\n",
"\t__tablename__ = \"message_store_different_session_id\"\n",
"\tid = sa.Column(sa.BigInteger, primary_key=True, default=generate_autoincrement_identifier)\n",
"\tid = sa.Column(sa.BigInteger, primary_key=True, server_default=sa.func.now())\n",
"\tcustom_session_id = sa.Column(sa.Text)\n",
"\tmessage = sa.Column(sa.Text)\n",
"\n",
Expand All @@ -307,6 +317,9 @@
"\t\tsession_id_field_name=\"custom_session_id\",\n",
"\t)\n",
"\n",
"\t# Make sure to start with a blank canvas.\n",
"\tchat_message_history.clear()\n",
"\n",
"\tchat_message_history.add_user_message(\"Hello\")\n",
"\tchat_message_history.add_ai_message(\"Hi\")"
],
Expand All @@ -316,11 +329,13 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"outputs": [
{
"data": {
"text/plain": "[HumanMessage(content='Hello', additional_kwargs={}, example=False),\n AIMessage(content='Hi', additional_kwargs={}, example=False)]"
"text/plain": [
"[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
]
},
"execution_count": 9,
"metadata": {},
Expand Down
8 changes: 7 additions & 1 deletion framework/langchain/conversational_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,17 @@
from langchain.memory.chat_message_histories import CrateDBChatMessageHistory


CONNECTION_STRING = os.environ.get(
"CRATEDB_CONNECTION_STRING",
"crate://crate@localhost/?schema=doc"
)


def main():

chat_message_history = CrateDBChatMessageHistory(
session_id="test_session",
connection_string=os.environ.get("CRATEDB_CONNECTION_STRING")
connection_string=CONNECTION_STRING,
)
chat_message_history.add_user_message("Hello")
chat_message_history.add_ai_message("Hi")
Expand Down
15 changes: 8 additions & 7 deletions framework/langchain/document_loader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@
"output_type": "stream",
"text": [
"\u001B[32mCONNECT OK\r\n",
"\u001B[0m\u001B[32mPSQL OK, 1 row affected (0.001 sec)\r\n",
"\u001B[0m\u001B[32mDELETE OK, 30 rows affected (0.010 sec)\r\n",
"\u001B[0m\u001B[32mPROVISIONING OK, 0 rows affected (0.001 sec)\r\n",
"\u001B[0m\u001B[32mCREATE OK, 1 row affected (0.010 sec)\r\n",
"\u001B[0m\u001B[32mINSERT OK, 30 rows affected (0.011 sec)\r\n",
"\u001B[0m\u001B[0m\u001B[32mCONNECT OK\r\n",
"\u001B[0m\u001B[32mREFRESH OK, 1 row affected (0.026 sec)\r\n",
Expand Down Expand Up @@ -95,6 +95,7 @@
"from langchain.document_loaders import CrateDBLoader\n",
"from pprint import pprint\n",
"\n",
"# Connect to a self-managed CrateDB instance.\n",
"CONNECTION_STRING = \"crate://crate@localhost/?schema=notebook\"\n",
"\n",
"loader = CrateDBLoader(\n",
Expand All @@ -115,11 +116,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(page_content='Team: Angels\\nPayroll (millions): 154.49\\nWins: 89', metadata={}),\n",
" Document(page_content='Team: Astros\\nPayroll (millions): 60.65\\nWins: 55', metadata={}),\n",
" Document(page_content='Team: Athletics\\nPayroll (millions): 55.37\\nWins: 94', metadata={}),\n",
" Document(page_content='Team: Blue Jays\\nPayroll (millions): 75.48\\nWins: 73', metadata={}),\n",
" Document(page_content='Team: Braves\\nPayroll (millions): 83.31\\nWins: 94', metadata={})]\n"
"[Document(page_content='Team: Angels\\nPayroll (millions): 154.49\\nWins: 89'),\n",
" Document(page_content='Team: Astros\\nPayroll (millions): 60.65\\nWins: 55'),\n",
" Document(page_content='Team: Athletics\\nPayroll (millions): 55.37\\nWins: 94'),\n",
" Document(page_content='Team: Blue Jays\\nPayroll (millions): 75.48\\nWins: 73'),\n",
" Document(page_content='Team: Braves\\nPayroll (millions): 83.31\\nWins: 94')]\n"
]
}
],
Expand Down
8 changes: 7 additions & 1 deletion framework/langchain/document_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,16 @@
from pprint import pprint


CONNECTION_STRING = os.environ.get(
"CRATEDB_CONNECTION_STRING",
"crate://crate@localhost/?schema=doc"
)


def main():
loader = CrateDBLoader(
query="SELECT * FROM mlb_teams_2012 LIMIT 3;",
url=os.environ.get("CRATEDB_CONNECTION_STRING"),
url=CONNECTION_STRING,
include_rownum_into_metadata=True,
)
docs = loader.load()
Expand Down
45 changes: 45 additions & 0 deletions framework/langchain/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[tool.pytest.ini_options]
minversion = "2.0"
addopts = """
-rfEX -p pytester --strict-markers --verbosity=3 --capture=no
"""
# --cov=. --cov-report=term-missing --cov-report=xml
env = [
"CRATEDB_CONNECTION_STRING=crate://crate@localhost/?schema=testdrive",
"PYDEVD_DISABLE_FILE_VALIDATION=1",
]

#log_level = "DEBUG"
#log_cli_level = "DEBUG"

testpaths = [
"*.py",
]
xfail_strict = true
markers = [
]

# pytest-notebook settings
nb_test_files = true
nb_coverage = true
nb_diff_replace = [
# Compensate output of `crash`.
'"/cells/*/outputs/*/text" "\(\d.\d+ sec\)" "(0.000 sec)"',
]
# `vector_search.py` does not include any output(s).
nb_diff_ignore = [
"/metadata/language_info",
"/cells/*/execution_count",
"/cells/*/outputs/*/execution_count",
]

[tool.coverage.run]
branch = false

[tool.coverage.report]
fail_under = 0
show_missing = true
omit = [
"conftest.py",
"test*.py",
]
Loading