Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ docs-site/.vitepress/.temp

# Claude Code local settings
.claude/
docs/tasks
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
## [Unreleased]

### Added
- **Mixedbread model support**: Added first-class support for Mixedbread embedding and reranking models
- Embedding model: `mxbai-xsmall` (`mixedbread-ai/mxbai-embed-xsmall-v1`) - 384 dimensions, 4K context window
- Reranker: `mxbai` (`mixedbread-ai/mxbai-rerank-xsmall-v1`) - Neural cross-encoder reranker
- Fully local inference using ONNX Runtime with quantized models
- Provider abstraction for clean model selection and routing
- Model registry integration with `mxbai-xsmall` alias
- CLI support: `--model mxbai-xsmall` and `--rerank-model mxbai`
- MCP server support for Mixedbread models in semantic/hybrid search tools
- **VitePress documentation site**: Comprehensive documentation with improved navigation, search, and structure in `docs-site/` directory
- **Documentation features**: Guide pages, feature documentation, CLI reference, embedding model guide, architecture docs, and contributing guides
- **Local search**: Built-in search functionality in documentation site
Expand Down
84 changes: 81 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,9 @@ pdf-extract = "0.9"
uuid = { version = "1.8", features = ["v4", "serde"] }
base64 = "0.22"
sha2 = "0.10"
hf-hub = { version = "0.4.3", default-features = false, features = ["ureq"] }
tokenizers = "0.20.1"
ort = { version = "2.0.0-rc.10", default-features = false, features = ["download-binaries"] }
once_cell = "1.19"
ndarray = { version = "0.16", default-features = false, features = ["std"] }
num_cpus = "1.16"
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ Choose the right embedding model for your needs:
# Default: BGE-Small (fast, precise chunking)
ck --index .

# Mixedbread xsmall: Optimized for local semantic search (4K context, 384 dims)
ck --index --model mxbai-xsmall .

# Enhanced: Nomic V1.5 (8K context, optimal for large functions)
ck --index --model nomic-v1.5 .

Expand All @@ -253,6 +256,7 @@ ck --index --model jina-code .

**Model Comparison:**
- **`bge-small`** (default): 400-token chunks, fast indexing, good for most code
- **`mxbai-xsmall`**: 4K context window, 384 dimensions, optimized for local inference (Mixedbread)
- **`nomic-v1.5`**: 1024-token chunks with 8K model capacity, better for large functions
- **`jina-code`**: 1024-token chunks with 8K model capacity, specialized for code understanding

Expand All @@ -264,6 +268,7 @@ ck --status .

# Clean up and rebuild / switch models
ck --clean .
ck --switch-model mxbai-xsmall .
ck --switch-model nomic-v1.5 .
ck --switch-model nomic-v1.5 --force . # Force rebuild

Expand Down
68 changes: 17 additions & 51 deletions ck-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ struct Cli {
#[arg(
long = "model",
value_name = "MODEL",
help = "Embedding model to use for indexing (bge-small, nomic-v1.5, jina-code) [default: bge-small]. Only used with --index."
help = "Embedding model to use for indexing (bge-small, nomic-v1.5, jina-code, mxbai-xsmall) [default: bge-small]. Only used with --index."
)]
model: Option<String>,

Expand All @@ -342,7 +342,7 @@ struct Cli {
#[arg(
long = "rerank-model",
value_name = "MODEL",
help = "Reranking model to use (jina, bge) [default: jina]"
help = "Reranking model to use (jina, bge, mxbai) [default: jina]"
)]
rerank_model: Option<String>,

Expand Down Expand Up @@ -451,46 +451,6 @@ fn build_exclude_patterns(cli: &Cli) -> Vec<String> {
ck_core::build_exclude_patterns(&cli.exclude, !cli.no_default_excludes)
}

fn resolve_model_selection(
registry: &ck_models::ModelRegistry,
requested: Option<&str>,
) -> Result<(String, ck_models::ModelConfig)> {
match requested {
Some(name) => {
if let Some(config) = registry.get_model(name) {
return Ok((name.to_string(), config.clone()));
}

if let Some((alias, config)) = registry
.models
.iter()
.find(|(_, config)| config.name == name)
{
return Ok((alias.clone(), config.clone()));
}

anyhow::bail!(
"Unknown model '{}'. Available models: {}",
name,
registry
.models
.keys()
.cloned()
.collect::<Vec<_>>()
.join(", ")
);
}
None => {
let alias = registry.default_model.clone();
let config = registry
.get_default_model()
.ok_or_else(|| anyhow::anyhow!("No default model configured"))?
.clone();
Ok((alias, config))
}
}
}

async fn run_index_workflow(
status: &StatusReporter,
path: &Path,
Expand Down Expand Up @@ -983,7 +943,9 @@ async fn run_cli_mode(cli: Cli) -> Result<()> {
.unwrap_or_else(|| PathBuf::from("."));

let registry = ck_models::ModelRegistry::default();
let (model_alias, model_config) = resolve_model_selection(&registry, Some(model_name))?;
let (model_alias, model_config) = registry
.resolve(Some(model_name))
.map_err(|e| anyhow::anyhow!(e.to_string()))?;

if !cli.force {
let manifest_path = path.join(".ck").join("manifest.json");
Expand All @@ -992,7 +954,7 @@ async fn run_cli_mode(cli: Cli) -> Result<()> {
&& let Ok(manifest) = serde_json::from_slice::<ck_index::IndexManifest>(&data)
&& let Some(existing_model) = manifest.embedding_model.clone()
&& let Ok((existing_alias, existing_config)) =
resolve_model_selection(&registry, Some(existing_model.as_str()))
registry.resolve(Some(existing_model.as_str()))
&& existing_config.name == model_config.name
{
status.section_header("Switching Embedding Model");
Expand Down Expand Up @@ -1042,7 +1004,9 @@ async fn run_cli_mode(cli: Cli) -> Result<()> {
.unwrap_or_else(|| PathBuf::from("."));

let registry = ck_models::ModelRegistry::default();
let (model_alias, model_config) = resolve_model_selection(&registry, cli.model.as_deref())?;
let (model_alias, model_config) = registry
.resolve(cli.model.as_deref())
.map_err(|e| anyhow::anyhow!(e.to_string()))?;

run_index_workflow(
&status,
Expand Down Expand Up @@ -1596,22 +1560,24 @@ async fn run_search(
let resolved_model =
ck_engine::resolve_model_for_path(&options.path, options.embedding_model.as_deref())?;

if resolved_model.alias == resolved_model.canonical_name {
if resolved_model.alias == resolved_model.canonical_name() {
eprintln!(
"🤖 Model: {} ({} dims)",
resolved_model.canonical_name, resolved_model.dimensions
resolved_model.canonical_name(),
resolved_model.dimensions()
);
} else {
eprintln!(
"🤖 Model: {} (alias '{}', {} dims)",
resolved_model.canonical_name, resolved_model.alias, resolved_model.dimensions
resolved_model.canonical_name(),
resolved_model.alias,
resolved_model.dimensions()
);
}

let max_tokens =
ck_chunk::TokenEstimator::get_model_limit(resolved_model.canonical_name.as_str());
let max_tokens = ck_chunk::TokenEstimator::get_model_limit(resolved_model.canonical_name());
let (chunk_tokens, overlap_tokens) =
ck_chunk::get_model_chunk_config(Some(resolved_model.canonical_name.as_str()));
ck_chunk::get_model_chunk_config(Some(resolved_model.canonical_name()));

eprintln!("📏 FastEmbed Config: {} token limit", max_tokens);
eprintln!(
Expand Down
Loading