diff --git a/docs/docs/core/data_types.mdx b/docs/docs/core/data_types.mdx
index d3209b88..daa30f07 100644
--- a/docs/docs/core/data_types.mdx
+++ b/docs/docs/core/data_types.mdx
@@ -46,6 +46,7 @@ This is the list of all primitive types supported by CocoIndex:
| *Bytes* | `bytes` | | |
| *Str* | `str` | | |
| *Bool* | `bool` | | |
+| *Enum* | `str`, `cocoindex.typing.Enum()` | | |
| *Int64* | `cocoindex.Int64`, `int`, `numpy.int64` | | |
| *Float32* | `cocoindex.Float32`, `numpy.float32` | *Float64* | |
| *Float64* | `cocoindex.Float64`, `float`, `numpy.float64` | | |
@@ -84,6 +85,9 @@ Notes:
In Python, it's represented by `cocoindex.Json`.
It's useful to hold data without fixed schema known at flow definition time.
+#### Enum Type
+
+*Enum* represents a string-like enumerated type. In Python, use the helper from `cocoindex.typing`.
#### Vector Types
diff --git a/docs/docs/examples/examples/docs_to_knowledge_graph.md b/docs/docs/examples/examples/docs_to_knowledge_graph.md
index 0c644f41..ad3a9918 100644
--- a/docs/docs/examples/examples/docs_to_knowledge_graph.md
+++ b/docs/docs/examples/examples/docs_to_knowledge_graph.md
@@ -373,4 +373,4 @@ You can open it at [http://localhost:7474](http://localhost:7474), and run the f
MATCH p=()-->() RETURN p
```
-
\ No newline at end of file
+
diff --git a/docs/docs/sources/index.md b/docs/docs/sources/index.md
index 09cbe166..0857b146 100644
--- a/docs/docs/sources/index.md
+++ b/docs/docs/sources/index.md
@@ -17,6 +17,6 @@ In CocoIndex, a source is the data origin you import from (e.g., files, database
| [Postgres](/docs/sources/postgres) | Relational database (Postgres) |
Related:
-- [Life cycle of a indexing flow](/docs/core/basics#life-cycle-of-an-indexing-flow)
-- [Live Update Tutorial](/docs/tutorials/live_updates)
+- [Life cycle of a indexing flow](/docs/core/basics#life-cycle-of-an-indexing-flow)
+- [Live Update Tutorial](/docs/tutorials/live_updates)
for change capture mechanisms.
diff --git a/docs/docs/targets/index.md b/docs/docs/targets/index.md
index c90d7654..f90a5c32 100644
--- a/docs/docs/targets/index.md
+++ b/docs/docs/targets/index.md
@@ -334,6 +334,3 @@ You can find end-to-end examples fitting into any of supported property graphs i
*
*
-
-
-
diff --git a/docs/docs/targets/kuzu.md b/docs/docs/targets/kuzu.md
index 441e9e78..dc741063 100644
--- a/docs/docs/targets/kuzu.md
+++ b/docs/docs/targets/kuzu.md
@@ -13,7 +13,7 @@ Exports data to a [Kuzu](https://kuzu.com/) graph database.
## Get Started
-Read [Property Graph Targets](./index.md#property-graph-targets) for more information to get started on how it works in CocoIndex.
+Read [Property Graph Targets](./index.md#property-graph-targets) for more information to get started on how it works in CocoIndex.
## Spec
@@ -59,4 +59,4 @@ You can then access the explorer at [http://localhost:8124](http://localhost:812
href="https://github.com/cocoindex-io/cocoindex/tree/main/examples/docs_to_knowledge_graph"
text="Docs to Knowledge Graph"
margin="16px 0 24px 0"
-/>
\ No newline at end of file
+/>
diff --git a/docs/docs/targets/neo4j.md b/docs/docs/targets/neo4j.md
index ab9e0d16..5e4fdb22 100644
--- a/docs/docs/targets/neo4j.md
+++ b/docs/docs/targets/neo4j.md
@@ -11,7 +11,7 @@ import { ExampleButton } from '../../src/components/GitHubButton';
## Get Started
-Read [Property Graph Targets](./index.md#property-graph-targets) for more information to get started on how it works in CocoIndex.
+Read [Property Graph Targets](./index.md#property-graph-targets) for more information to get started on how it works in CocoIndex.
## Spec
@@ -59,4 +59,4 @@ If you are building multiple CocoIndex flows from different projects to neo4j, w
This way, you can clean up the data for each flow independently.
-In case you need to clean up the data in the same database, you can do it manually by running `cocoindex drop ` from the project you want to clean up.
\ No newline at end of file
+In case you need to clean up the data in the same database, you can do it manually by running `cocoindex drop ` from the project you want to clean up.
diff --git a/examples/product_recommendation/README.md b/examples/product_recommendation/README.md
index f3ce29b0..314464cf 100644
--- a/examples/product_recommendation/README.md
+++ b/examples/product_recommendation/README.md
@@ -8,7 +8,7 @@ Please drop [CocoIndex on Github](https://github.com/cocoindex-io/cocoindex) a s
## Prerequisite
-* [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres)
+* [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres)
* Install [Neo4j](https://cocoindex.io/docs/targets/neo4j)
* [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai).
diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py
index c4b0ef60..00244167 100644
--- a/python/cocoindex/typing.py
+++ b/python/cocoindex/typing.py
@@ -13,6 +13,8 @@
Literal,
NamedTuple,
Protocol,
+ Optional,
+ Sequence,
TypeVar,
overload,
Self,
@@ -64,6 +66,19 @@ def __init__(self, key: str, value: Any):
LocalDateTime = Annotated[datetime.datetime, TypeKind("LocalDateTime")]
OffsetDateTime = Annotated[datetime.datetime, TypeKind("OffsetDateTime")]
+
+def Enum(*, variants: Optional[Sequence[str]] = None) -> Any:
+ """
+ String-like enumerated type. Use `variants` to hint allowed values.
+ Example:
+ color: Enum(variants=["red", "green", "blue"])
+ At runtime this is a plain `str`; `variants` are emitted as schema attrs.
+ """
+ if variants is not None:
+ return Annotated[str, TypeKind("Enum"), TypeAttr("variants", list(variants))]
+ return Annotated[str, TypeKind("Enum")]
+
+
if TYPE_CHECKING:
T_co = TypeVar("T_co", covariant=True)
Dim_co = TypeVar("Dim_co", bound=int | None, covariant=True, default=None)
@@ -587,6 +602,7 @@ class BasicValueType:
"OffsetDateTime",
"TimeDelta",
"Json",
+ "Enum",
"Vector",
"Union",
]
diff --git a/src/base/json_schema.rs b/src/base/json_schema.rs
index c7a9756c..3c14ea7e 100644
--- a/src/base/json_schema.rs
+++ b/src/base/json_schema.rs
@@ -1,6 +1,6 @@
use crate::prelude::*;
-
use crate::utils::immutable::RefList;
+use indexmap::IndexMap;
use schemars::schema::{
ArrayValidation, InstanceType, ObjectValidation, Schema, SchemaObject, SingleOrVec,
SubschemaValidation,
@@ -74,6 +74,9 @@ impl JsonSchemaBuilder {
schema::BasicValueType::Str => {
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
}
+ schema::BasicValueType::Enum => {
+ schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
+ }
schema::BasicValueType::Bytes => {
schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
}
@@ -245,15 +248,34 @@ impl JsonSchemaBuilder {
field_path.prepend(&f.name),
);
if self.options.fields_always_required && f.value_type.nullable {
- if let Some(instance_type) = &mut field_schema.instance_type {
- let mut types = match instance_type {
- SingleOrVec::Single(t) => vec![**t],
- SingleOrVec::Vec(t) => std::mem::take(t),
+ if field_schema.enum_values.is_some() {
+ // Keep the enum as-is and support null via oneOf
+ let non_null = Schema::Object(field_schema);
+ let null_branch = Schema::Object(SchemaObject {
+ instance_type: Some(SingleOrVec::Single(Box::new(
+ InstanceType::Null,
+ ))),
+ ..Default::default()
+ });
+ field_schema = SchemaObject {
+ subschemas: Some(Box::new(SubschemaValidation {
+ one_of: Some(vec![non_null, null_branch]),
+ ..Default::default()
+ })),
+ ..Default::default()
};
- types.push(InstanceType::Null);
- *instance_type = SingleOrVec::Vec(types);
+ } else {
+ if let Some(instance_type) = &mut field_schema.instance_type {
+ let mut types = match instance_type {
+ SingleOrVec::Single(t) => vec![**t],
+ SingleOrVec::Vec(t) => std::mem::take(t),
+ };
+ types.push(InstanceType::Null);
+ *instance_type = SingleOrVec::Vec(types);
+ }
}
}
+
(f.name.to_string(), field_schema.into())
})
.collect(),
@@ -298,9 +320,26 @@ impl JsonSchemaBuilder {
enriched_value_type: &schema::EnrichedValueType,
field_path: RefList<'_, &'_ spec::FieldName>,
) -> SchemaObject {
- self.for_value_type(schema_base, &enriched_value_type.typ, field_path)
- }
+ let mut out = self.for_value_type(schema_base, &enriched_value_type.typ, field_path);
+
+ if let schema::ValueType::Basic(schema::BasicValueType::Enum) = &enriched_value_type.typ {
+ if let Some(variants) = enriched_value_type.attrs.get("variants") {
+ if let Some(arr) = variants.as_array() {
+ let enum_values: Vec = arr
+ .iter()
+ .filter_map(|v| {
+ v.as_str().map(|s| serde_json::Value::String(s.to_string()))
+ })
+ .collect();
+ if !enum_values.is_empty() {
+ out.enum_values = Some(enum_values);
+ }
+ }
+ }
+ }
+ out
+ }
fn build_extra_instructions(&self) -> Result