diff --git a/examples/docs_to_knowledge_graph/README.md b/examples/docs_to_knowledge_graph/README.md index 3a374ceb..fd296f9e 100644 --- a/examples/docs_to_knowledge_graph/README.md +++ b/examples/docs_to_knowledge_graph/README.md @@ -16,7 +16,7 @@ Please drop [Cocoindex on Github](https://github.com/cocoindex-io/cocoindex) a s * [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. * Install [Neo4j](https://cocoindex.io/docs/ops/targets#neo4j-dev-instance) or [Kuzu](https://cocoindex.io/docs/ops/targets#kuzu-dev-instance) if you don't have one. * The example uses Neo4j by default for now. If you want to use Kuzu, find out the "SELECT ONE GRAPH DATABASE TO USE" section and switch the active branch. -* [Configure your OpenAI API key](https://cocoindex.io/docs/ai/llm#openai). +* Install / configure LLM API. In this example we use Ollama, which runs LLM model locally. You need to get it ready following [this guide](https://cocoindex.io/docs/ai/llm#ollama). Alternatively, you can also follow the comments in source code to switch to OpenAI, and [configure OpenAI API key](https://cocoindex.io/docs/ai/llm#openai) before running the example. ## Documentation You can read the official CocoIndex Documentation for Property Graph Targets [here](https://cocoindex.io/docs/ops/targets#property-graph-targets). diff --git a/examples/docs_to_knowledge_graph/main.py b/examples/docs_to_knowledge_graph/main.py index 7150809e..438f08b0 100644 --- a/examples/docs_to_knowledge_graph/main.py +++ b/examples/docs_to_knowledge_graph/main.py @@ -82,9 +82,14 @@ def docs_to_kg_flow( cocoindex.functions.ExtractByLlm( llm_spec=cocoindex.LlmSpec( # Supported LLM: https://cocoindex.io/docs/ai/llm - api_type=cocoindex.LlmApiType.OPENAI, - model="gpt-4o", + api_type=cocoindex.LlmApiType.OLLAMA, + model="llama3.2", ), + # Alternative: Use OpenAI API model instead of Ollama + # llm_spec=cocoindex.LlmSpec( + # api_type=cocoindex.LlmApiType.OPENAI, + # model="gpt-4o", + # ), output_type=DocumentSummary, instruction="Please summarize the content of the document.", ) @@ -100,9 +105,14 @@ def docs_to_kg_flow( cocoindex.functions.ExtractByLlm( llm_spec=cocoindex.LlmSpec( # Supported LLM: https://cocoindex.io/docs/ai/llm - api_type=cocoindex.LlmApiType.OPENAI, - model="gpt-4o", + api_type=cocoindex.LlmApiType.OLLAMA, + model="llama3.2", ), + # Alternative: Use OpenAI API model instead of Ollama + # llm_spec=cocoindex.LlmSpec( + # api_type=cocoindex.LlmApiType.OPENAI, + # model="gpt-4o", + # ), output_type=list[Relationship], instruction=( "Please extract relationships from CocoIndex documents. " diff --git a/src/base/spec.rs b/src/base/spec.rs index 671e96dc..54bea7a1 100644 --- a/src/base/spec.rs +++ b/src/base/spec.rs @@ -384,7 +384,7 @@ impl fmt::Display for VectorSimilarityMetric { } } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(tag = "kind")] pub enum VectorIndexMethod { Hnsw { diff --git a/src/ops/targets/neo4j.rs b/src/ops/targets/neo4j.rs index 49517dd9..1035c6bf 100644 --- a/src/ops/targets/neo4j.rs +++ b/src/ops/targets/neo4j.rs @@ -568,9 +568,6 @@ impl SetupState { .map(|f| (f.name.as_str(), &f.value_type.typ)) .collect::>(); for index_def in index_options.vector_indexes.iter() { - if index_def.method.is_some() { - api_bail!("Vector index method is not configurable for Neo4j yet"); - } sub_components.push(ComponentState { object_label: schema.elem_type.clone(), index_def: IndexDef::from_vector_index_def( @@ -644,6 +641,7 @@ enum IndexDef { field_name: String, metric: spec::VectorSimilarityMetric, vector_size: usize, + method: Option, }, } @@ -652,6 +650,10 @@ impl IndexDef { index_def: &spec::VectorIndexDef, field_typ: &schema::ValueType, ) -> Result { + let method = index_def.method.clone(); + if let Some(spec::VectorIndexMethod::IvfFlat { .. }) = method { + api_bail!("IVFFlat vector index method is not supported for Neo4j"); + } Ok(Self::VectorIndex { field_name: index_def.field_name.clone(), vector_size: (match field_typ { @@ -664,6 +666,7 @@ impl IndexDef { api_error!("Vector index field must be a vector with fixed dimension") })?, metric: index_def.metric, + method, }) } } @@ -723,9 +726,14 @@ impl components::SetupOperator for SetupComponentOperator { field_name, metric, vector_size, + method, } => { + let method_str = method + .as_ref() + .map(|m| format!(", method: {}", m)) + .unwrap_or_default(); format!( - "{key_desc} ON {label} (field_name: {field_name}, vector_size: {vector_size}, metric: {metric})", + "{key_desc} ON {label} (field_name: {field_name}, vector_size: {vector_size}, metric: {metric}{method_str})", ) } } @@ -752,17 +760,32 @@ impl components::SetupOperator for SetupComponentOperator { field_name, metric, vector_size, + method, } => { + let mut parts = vec![]; + + parts.push(format!("`vector.dimensions`: {}", vector_size)); + parts.push(format!("`vector.similarity_function`: '{}'", metric)); + + if let Some(spec::VectorIndexMethod::Hnsw { m, ef_construction }) = method { + if let Some(m_val) = m { + parts.push(format!("`vector.hnsw.m`: {}", m_val)); + } + if let Some(ef_val) = ef_construction { + parts.push(format!("`vector.hnsw.ef_construction`: {}", ef_val)); + } + } + formatdoc! {" CREATE VECTOR INDEX {name} IF NOT EXISTS FOR {matcher} ON {qualifier}.{field_name} OPTIONS {{ indexConfig: {{ - `vector.dimensions`: {vector_size}, - `vector.similarity_function`: '{metric}' + {config} }} }}", name = key.name, + config = parts.join(", ") } } });