Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
c0e260f
Fix pytest option parsing
amotl Sep 15, 2023
249580f
pgvector: Slight refactoring to make code a bit more reusable
amotl Sep 15, 2023
b752717
CrateDB vector: Add vector store support
amotl Sep 15, 2023
6cab9b5
CrateDB vector: Add documentation
amotl Sep 15, 2023
6444a46
Add SQLAlchemy document loader
amotl Sep 16, 2023
f494d64
CrateDB loader: Add document loader support
amotl Sep 16, 2023
5894310
Generalize `SQLChatMessageHistory` to make code a bit more reusable
amotl Sep 17, 2023
08f87b6
CrateDB memory: Add conversational memory support
amotl Sep 17, 2023
901fdcc
CrateDB vector: Fix usage when only reading, and not storing
amotl Oct 27, 2023
33d81e3
CrateDB vector: Unable to invoke `add_embeddings` without embeddings
amotl Oct 27, 2023
dfc9243
CrateDB vector: Improve SQLAlchemy model factory
amotl Nov 20, 2023
0e7f16b
CrateDB vector: Fix cascading deletes
amotl Nov 20, 2023
e5c947c
CrateDB vector: Add CrateDBVectorSearchMultiCollection
amotl Nov 21, 2023
2208963
CrateDB vector: Improve SQLAlchemy data model query utility functions
amotl Nov 21, 2023
d8429f7
CrateDB vector: Improve testing when initialized without dimensionality
amotl Nov 21, 2023
02cab14
pgvector: Use SA's `bulk_save_objects` method for inserting embeddings
amotl Nov 21, 2023
bcd304b
CrateDB vector: Test non-deterministic values by using pytest.approx
amotl Nov 22, 2023
dd64cd4
CrateDB vector: Fix initialization of vector dimensionality
amotl Nov 27, 2023
07ba7af
CrateDB vector: Refactor SQLAlchemy data model to provide two strategies
amotl Nov 22, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 232 additions & 0 deletions docs/docs/integrations/document_loaders/cratedb.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CrateDB\n",
"\n",
"This notebook demonstrates how to load documents from a [CrateDB] database,\n",
"using the [SQLAlchemy] document loader.\n",
"\n",
"It loads the result of a database query with one document per row.\n",
"\n",
"[CrateDB]: https://github.com/crate/crate\n",
"[SQLAlchemy]: https://www.sqlalchemy.org/"
]
},
{
"cell_type": "markdown",
"source": [
"## Prerequisites"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#!pip install crash 'langchain[cratedb]'"
]
},
{
"cell_type": "markdown",
"source": [
"Populate database."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001B[32mCONNECT OK\r\n",
"\u001B[0m\u001B[32mPSQL OK, 1 row affected (0.001 sec)\r\n",
"\u001B[0m\u001B[32mDELETE OK, 30 rows affected (0.008 sec)\r\n",
"\u001B[0m\u001B[32mINSERT OK, 30 rows affected (0.011 sec)\r\n",
"\u001B[0m\u001B[0m\u001B[32mCONNECT OK\r\n",
"\u001B[0m\u001B[32mREFRESH OK, 1 row affected (0.001 sec)\r\n",
"\u001B[0m\u001B[0m"
]
}
],
"source": [
"!crash < ./example_data/mlb_teams_2012.sql\n",
"!crash --command \"REFRESH TABLE mlb_teams_2012;\""
]
},
{
"cell_type": "markdown",
"source": [
"## Usage"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders import CrateDBLoader\n",
"from pprint import pprint\n",
"\n",
"CONNECTION_STRING = \"crate://crate@localhost/\"\n",
"\n",
"loader = CrateDBLoader(\n",
" 'SELECT * FROM mlb_teams_2012 ORDER BY \"Team\" LIMIT 5;',\n",
" url=CONNECTION_STRING,\n",
")\n",
"documents = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(page_content='Team: Angels\\nPayroll (millions): 154.49\\nWins: 89', metadata={}),\n",
" Document(page_content='Team: Astros\\nPayroll (millions): 60.65\\nWins: 55', metadata={}),\n",
" Document(page_content='Team: Athletics\\nPayroll (millions): 55.37\\nWins: 94', metadata={}),\n",
" Document(page_content='Team: Blue Jays\\nPayroll (millions): 75.48\\nWins: 73', metadata={}),\n",
" Document(page_content='Team: Braves\\nPayroll (millions): 83.31\\nWins: 94', metadata={})]\n"
]
}
],
"source": [
"pprint(documents)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Specifying Which Columns are Content vs Metadata"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"loader = CrateDBLoader(\n",
" 'SELECT * FROM mlb_teams_2012 ORDER BY \"Team\" LIMIT 5;',\n",
" url=CONNECTION_STRING,\n",
" page_content_columns=[\"Team\"],\n",
" metadata_columns=[\"Payroll (millions)\"],\n",
")\n",
"documents = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(page_content='Team: Angels', metadata={'Payroll (millions)': 154.49}),\n",
" Document(page_content='Team: Astros', metadata={'Payroll (millions)': 60.65}),\n",
" Document(page_content='Team: Athletics', metadata={'Payroll (millions)': 55.37}),\n",
" Document(page_content='Team: Blue Jays', metadata={'Payroll (millions)': 75.48}),\n",
" Document(page_content='Team: Braves', metadata={'Payroll (millions)': 83.31})]\n"
]
}
],
"source": [
"pprint(documents)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Adding Source to Metadata"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"loader = CrateDBLoader(\n",
" 'SELECT * FROM mlb_teams_2012 ORDER BY \"Team\" LIMIT 5;',\n",
" url=CONNECTION_STRING,\n",
" source_columns=[\"Team\"],\n",
")\n",
"documents = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(page_content='Team: Angels\\nPayroll (millions): 154.49\\nWins: 89', metadata={'source': 'Angels'}),\n",
" Document(page_content='Team: Astros\\nPayroll (millions): 60.65\\nWins: 55', metadata={'source': 'Astros'}),\n",
" Document(page_content='Team: Athletics\\nPayroll (millions): 55.37\\nWins: 94', metadata={'source': 'Athletics'}),\n",
" Document(page_content='Team: Blue Jays\\nPayroll (millions): 75.48\\nWins: 73', metadata={'source': 'Blue Jays'}),\n",
" Document(page_content='Team: Braves\\nPayroll (millions): 83.31\\nWins: 94', metadata={'source': 'Braves'})]\n"
]
}
],
"source": [
"pprint(documents)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- Provisioning table "mlb_teams_2012".
--
-- crash < mlb_teams_2012.sql
-- psql postgresql://postgres@localhost < mlb_teams_2012.sql

DROP TABLE IF EXISTS mlb_teams_2012;
CREATE TABLE mlb_teams_2012 ("Team" VARCHAR, "Payroll (millions)" FLOAT, "Wins" BIGINT);
INSERT INTO mlb_teams_2012
("Team", "Payroll (millions)", "Wins")
VALUES
('Nationals', 81.34, 98),
('Reds', 82.20, 97),
('Yankees', 197.96, 95),
('Giants', 117.62, 94),
('Braves', 83.31, 94),
('Athletics', 55.37, 94),
('Rangers', 120.51, 93),
('Orioles', 81.43, 93),
('Rays', 64.17, 90),
('Angels', 154.49, 89),
('Tigers', 132.30, 88),
('Cardinals', 110.30, 88),
('Dodgers', 95.14, 86),
('White Sox', 96.92, 85),
('Brewers', 97.65, 83),
('Phillies', 174.54, 81),
('Diamondbacks', 74.28, 81),
('Pirates', 63.43, 79),
('Padres', 55.24, 76),
('Mariners', 81.97, 75),
('Mets', 93.35, 74),
('Blue Jays', 75.48, 73),
('Royals', 60.91, 72),
('Marlins', 118.07, 69),
('Red Sox', 173.18, 69),
('Indians', 78.43, 68),
('Twins', 94.08, 66),
('Rockies', 78.06, 64),
('Cubs', 88.19, 61),
('Astros', 60.65, 55)
;
Loading