Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/snippets/basic_usage.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export const PyBasicImports = "import json\n\nimport lancedb\nimport pandas as p

export const PyBasicOpenTable = "table = db.open_table(\"camelot\")\n";

export const PyBasicVectorSearch = "query_vector = [0.03, 0.85, 0.61, 0.90]\ntable.search(query_vector).limit(5).to_polars()\n";
export const PyBasicVectorSearch = "query_vector = [0.03, 0.85, 0.61, 0.90]\nresult = table.search(query_vector).limit(5).to_polars()\nprint(result)\n";

export const PyBasicVectorSearchQ1 = "# Who are the characters similar to \"wizard\"?\nquery_vector_1 = [0.03, 0.85, 0.61, 0.90]\nr1 = (\n table.search(query_vector_1)\n .limit(5)\n .select([\"name\", \"role\", \"description\"])\n .to_polars()\n)\nprint(r1)\n";

Expand Down Expand Up @@ -52,7 +52,7 @@ export const TsBasicImports = "import * as lancedb from \"@lancedb/lancedb\";\ni

export const TsBasicOpenTable = "table = await db.openTable(\"camelot\");\n";

export const TsBasicVectorSearch = "const queryVector = [0.03, 0.85, 0.61, 0.9];\nawait table.search(queryVector).limit(5).toArray();\n";
export const TsBasicVectorSearch = "const queryVector = [0.03, 0.85, 0.61, 0.9];\nconst result = await table.search(queryVector).limit(5).toArray();\nconsole.log(result);\n";

export const TsBasicVectorSearchQ1 = "// Who are the characters similar to \"wizard\"?\nconst queryVector1 = [0.03, 0.85, 0.61, 0.9];\nconst r1 = await table\n .search(queryVector1)\n .limit(5)\n .select([\"name\", \"role\", \"description\"])\n .toArray();\nconsole.log(r1);\n";

Expand Down Expand Up @@ -88,7 +88,7 @@ export const RsBasicImports = "use arrow_array::types::Float32Type;\nuse arrow_a

export const RsBasicOpenTable = "table = db.open_table(\"camelot\").execute().await.unwrap();\n";

export const RsBasicVectorSearch = "let query_vector = [0.03, 0.85, 0.61, 0.90];\nlet _ = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(5)\n .execute()\n .await\n .unwrap()\n .try_collect::<Vec<_>>()\n .await\n .unwrap();\n";
export const RsBasicVectorSearch = "let query_vector = [0.03, 0.85, 0.61, 0.90];\nlet result = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(5)\n .execute()\n .await\n .unwrap()\n .try_collect::<Vec<_>>()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\n";

export const RsBasicVectorSearchQ1 = "// Who are the characters similar to \"wizard\"?\nlet query_vector_1 = [0.03, 0.85, 0.61, 0.90];\nlet r1 = table\n .query()\n .nearest_to(&query_vector_1)\n .unwrap()\n .limit(5)\n .select(Select::Columns(vec![\n \"name\".to_string(),\n \"role\".to_string(),\n \"description\".to_string(),\n ]))\n .execute()\n .await\n .unwrap()\n .try_collect::<Vec<_>>()\n .await\n .unwrap();\nprintln!(\"{r1:?}\");\n";

Expand Down
16 changes: 8 additions & 8 deletions docs/snippets/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ export const PyQuickstartCreateTableNoOverwrite = "table = db.create_table(\"adv

export const PyQuickstartOpenTable = "table = db.open_table(\"adventurers\")\n";

export const PyQuickstartOutputPandas = "# Ensure you run `pip install pandas` beforehand\nresult = table.search(query_vector).limit(2).to_pandas()\n";
export const PyQuickstartOutputPandas = "# Ensure you run `pip install pandas` beforehand\nresult = table.search(query_vector).limit(2).to_pandas()\nprint(result)\n";

export const PyQuickstartVectorSearch1 = "# Let's search for vectors similar to \"warrior\"\nquery_vector = [0.8, 0.3, 0.8]\n\n# Ensure you run `pip install polars` beforehand\nresult = table.search(query_vector).limit(2).to_polars()\n";
export const PyQuickstartVectorSearch1 = "# Let's search for vectors similar to \"warrior\"\nquery_vector = [0.8, 0.3, 0.8]\n\n# Ensure you run `pip install polars` beforehand\nresult = table.search(query_vector).limit(2).to_polars()\nprint(result)\n";

export const PyQuickstartVectorSearch2 = "# Let's search for vectors similar to \"wizard\"\nquery_vector = [0.7, 0.3, 0.5]\n\nresults = table.search(query_vector).limit(2).to_polars()\nprint(results)\n";

Expand All @@ -22,13 +22,13 @@ export const TsQuickstartCreateTableNoOverwrite = "table = await db.createTable(

export const TsQuickstartOpenTable = "table = await db.openTable(\"adventurers\");\n";

export const TsQuickstartOutputArray = "result = await table.search(queryVector).limit(2).toArray();\n";
export const TsQuickstartOutputArray = "result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n";

export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n";

export const TsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet queryVector = [0.8, 0.3, 0.8];\n\nlet result = await table.search(queryVector).limit(2).toArray();\n";
export const TsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet queryVector = [0.8, 0.3, 0.8];\n\nlet result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n";

export const TsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nqueryVector = [0.7, 0.3, 0.5];\n\nconst results = await table.search(queryVector).limit(2).toArray();\nconsole.log(results);\n";
export const TsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nqueryVector = [0.7, 0.3, 0.5];\n\nconst results = await table.search(queryVector).limit(2).toArray();\nconsole.table(results);\n";

export const RsQuickstartAddData = "let more_data = vec![\n Adventurer {\n id: \"7\".to_string(),\n text: \"mage\".to_string(),\n vector: [0.6, 0.3, 0.4],\n },\n Adventurer {\n id: \"8\".to_string(),\n text: \"bard\".to_string(),\n vector: [0.3, 0.8, 0.4],\n },\n];\n\n// Add data to table\ntable\n .add(adventurers_to_reader(schema.clone(), &more_data))\n .execute()\n .await\n .unwrap();\n";

Expand All @@ -40,9 +40,9 @@ export const RsQuickstartDefineStruct = "// Define a struct representing the dat

export const RsQuickstartOpenTable = "let table: Table = db.open_table(\"adventurers\").execute().await.unwrap();\n";

export const RsQuickstartOutputArray = "let result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nlet text_col = result.column(\"text\").unwrap().str().unwrap();\nlet top_two = vec![\n text_col.get(0).unwrap().to_string(),\n text_col.get(1).unwrap().to_string(),\n];\n";
export const RsQuickstartOutputArray = "let result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\nlet text_col = result.column(\"text\").unwrap().str().unwrap();\nlet top_two = vec![\n text_col.get(0).unwrap().to_string(),\n text_col.get(1).unwrap().to_string(),\n];\n";

export const RsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet query_vector = [0.8, 0.3, 0.8];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\n";
export const RsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet query_vector = [0.8, 0.3, 0.8];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\n";

export const RsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nlet query_vector = [0.7, 0.3, 0.5];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nlet text_col = result.column(\"text\").unwrap().str().unwrap();\nlet top_two = vec![\n text_col.get(0).unwrap().to_string(),\n text_col.get(1).unwrap().to_string(),\n];\n";
export const RsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nlet query_vector = [0.7, 0.3, 0.5];\n\nlet result: DataFrame = table\n .query()\n .nearest_to(&query_vector)\n .unwrap()\n .limit(2)\n .select(Select::Columns(vec![\"text\".to_string()]))\n .execute()\n .await\n .unwrap()\n .into_polars()\n .await\n .unwrap();\nprintln!(\"{result:?}\");\nlet text_col = result.column(\"text\").unwrap().str().unwrap();\nlet top_two = vec![\n text_col.get(0).unwrap().to_string(),\n text_col.get(1).unwrap().to_string(),\n];\n";

20 changes: 10 additions & 10 deletions docs/tables/create.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,18 @@ document: struct<content: string not null, source: string not null> not null

#### Validators

Note that neither Pydantic nor PyArrow automatically validates that input data
is of the correct timezone, but this is easy to add as a custom field validator:
Because `LanceModel` inherits from Pydantic's `BaseModel`, you can combine them with Pydantic's
[field validators](https://docs.pydantic.dev/latest/concepts/validators). The example
below shows how to add a validator to ensure that only valid timezone-aware datetime objects are used
for a `created_at` field.

<CodeGroup>
<CodeBlock filename="Python" language="Python" icon="python">
{TablesTzValidator}
</CodeBlock>
</CodeGroup>

When you run this code it should print "A ValidationError was raised."

#### Pydantic custom types

LanceDB does NOT yet support converting pydantic custom types. If this is something you need,
please file a feature request on the [LanceDB Github repo](https://github.com/lancedb/lancedb/issues/new).
When you run this code it, should raise the `ValidationError`.

### Using Iterators / Writing Large Datasets

Expand Down Expand Up @@ -198,7 +195,9 @@ If you forget the name of your table, you can always get a listing of all table
</CodeGroup>

## Creating empty table
You can create an empty table for scenarios where you want to add data to the table later. An example would be when you want to collect data from a stream/external file and then add it to a table in batches.
You can create an empty table for scenarios where you want to add data to the table later.
An example would be when you want to collect data from a stream/external file and then add it to a table in
batches.

An empty table can be initialized via a PyArrow schema.

Expand All @@ -218,7 +217,8 @@ that has been extended to support LanceDB specific types like `Vector`.
</CodeBlock>
</CodeGroup>

Once the empty table has been created, you can add data to it, as explained in the next section on [working with data](/tables/update).
Once the empty table has been created, you can append to it or modify its contents,
as explained in the [updating and modifying tables](/tables/update) section.

## Drop a table

Expand Down
97 changes: 50 additions & 47 deletions docs/tables/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,9 @@ initial testing).
</CodeBlock>
</CodeGroup >

<Note>
<Warning>
If you want to avoid overwriting an existing table, omit the overwrite mode.
</Note>
</Warning>

### From Pandas DataFrames
<Badge color="green">Python Only</Badge>
Expand Down Expand Up @@ -287,14 +287,13 @@ do so by defining an Arrow schema explicitly.
Once the empty table is defined, LanceDB is ready to accept new data via
the `add` method, as shown in the next section.

<Tip>
**Display table schema**

<Card title="Display table schema" icon="book">
LanceDB tables are type-aware, leveraging Apache Arrow under the hood.
In Python, you can display a given table's schema using the `schema` property.
For example running `print(table.schema)` would show something like the following:
You can display a given table's schema using the `schema` property or
method. For example, in Python, running `print(table.schema)` would show
something like the following:

```
```txt expandable=true
id: int64
name: string
role: string
Expand All @@ -307,7 +306,7 @@ stats: struct<courage: int64, magic: int64, strength: int64, wisdom: int64>
child 2, strength: int64
child 3, wisdom: int64
```
</Tip>
</Card>

## Append data to a table

Expand Down Expand Up @@ -378,8 +377,8 @@ the desired columns).
| Queen Guinevere | Queen of Camelot | Arthur's queen, admired for he… |
| Sir Galahad | Knight of the Round Table | The purest and most virtuous k… |

We have Merlin, The Lady of the Lake, and Morgan le Fay in the top results, which
makes sense.
We have Merlin, The Lady of the Lake, and Morgan le Fay in the top results, who
all have magical abilities.

Next, let's try to answer a more complex question that involves filtering on a
nested struct field. Filtering is done using the `where` method, where you can
Expand Down Expand Up @@ -408,15 +407,15 @@ pass in SQL-like expressions.
| Morgan le Fay | Sorceress | A powerful enchantress, Arthur… |

Only three characters have magical abilities greater than 3. Merlin is
clearly the most magical of them all.
clearly the most magical of them all!

## Filtered search

You can also run traditional analytics-style search queries that do not
involve vectors. For example, let's find the strongest characters in
the dataset. In the query below, we leave the `search` method empty to indicate
that we don't want to use any vector for similarity search (in TypeScript, use `query()` instead),
and use the `where` method to filter on the `strength` field.
that we don't want to use any vector for similarity search (in TypeScript/Rust,
use `query()` instead), and use the `where` method to filter on the `strength` field.

> Q3: _Who are the strongest characters?_

Expand Down Expand Up @@ -471,30 +470,10 @@ print(duckdb_tbl)
| Sir Percival | Knight of the Round Table | A loyal and innocent knight whose bravery and … |
| Mordred | Traitor Knight | Arthur's treacherous son or nephew who ultimat… |

## Delete data

You can delete rows from a LanceDB table using the `delete` method with
a filtering expression.

Say we want to throw away Mordred, the traitor knight, from our table.

<CodeGroup >
<CodeBlock filename="Python" language="Python">
{PyBasicDeleteRows}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsBasicDeleteRows}
</CodeBlock>

<CodeBlock filename="Rust" language="Rust" icon="rust">
{RsBasicDeleteRows}
</CodeBlock>
</CodeGroup>

This will delete the row(s) where the `role` column matches "Traitor Knight".
You can verify that the row has been deleted by running a search query again,
and confirming that Mordred no longer appears in the results.
<Info>
Under the hood, Lance tables are Arrow-based, leveraging Arrow's type system. This is why
it's trivial to query a Lance table using DuckDB, which also natively supports Arrow tables.
</Info>

## Add column

Expand All @@ -516,9 +495,9 @@ of each character's strength, courage, magic, and wisdom stats.
</CodeBlock>
</CodeGroup>

The example above shows how a `cast` expression can be used to ensure the
average total stats is available as a float column, that is then converted to
an Arrow float type under the hood for the Lance table.
The example above sums up the individual stats and divides by 4 to compute the average.
The resulting average total stats is cast to an Arrow float type under the hood for the
Lance table.

We can display the results of this column in descending order of power.

Expand All @@ -539,11 +518,11 @@ We can display the results of this column in descending order of power.
</CodeGroup>

Note that LanceDB's `where` only filters rows, but doesn't sort them by applying an `ORDER BY`
clause that you may be used to when working with SQL databases. In TypeScript, you can sort the
returned array in application code.
clause that you may be used to when working with SQL databases.

<Badge color="green">Python Only</Badge>
You can also sort the results after converting them to a Polars DataFrame, as shown in the example above.
You can also sort the results after converting them to a Polars DataFrame.
In TypeScript/Rust, you can sort the
returned array in application code.

```python Python icon="python"
# Sort Polars DataFrame by power in descending order
Expand All @@ -558,9 +537,33 @@ print(r1.sort("power", descending=True).limit(5))
| Sir Lancelot | Knight of the Round Table | Arthur's most skilled knight, … | 3.5 |
| Sir Gawain | Knight of the Round Table | A noble and honorable knight k… | 3.5 |

Merlin and Sir Galahad top the list of powerful characters when considering all their
abilities! Sir Lancelot and the Lady of the Lake follow closely behind.
Merlin and Sir Galahad are the most powerful characters when considering the average of
all their abilities! Sir Lancelot and the Lady of the Lake follow closely behind.

## Delete data

You can delete rows from a LanceDB table using the `delete` method with
a filtering expression.

Say we want to remove Mordred, the traitor knight, from our table.

<CodeGroup >
<CodeBlock filename="Python" language="Python">
{PyBasicDeleteRows}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsBasicDeleteRows}
</CodeBlock>

<CodeBlock filename="Rust" language="Rust" icon="rust">
{RsBasicDeleteRows}
</CodeBlock>
</CodeGroup>

This will delete the row(s) where the `role` value matches "Traitor Knight".
You can verify that the row has been deleted by running a search query again,
and confirming that Mordred no longer appears in the results.

## Drop column

Expand Down
Loading