Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions openviking/models/embedder/litellm_embedders.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,19 @@ def __init__(
)
self._dimension = dimension

def _truncate_vector(self, vector: List[float]) -> List[float]:
"""Truncate vector to target dimension if needed.

Args:
vector: Input vector from API

Returns:
Truncated vector if dimension is set and smaller than input, otherwise original vector
"""
if self.dimension is not None and len(vector) > self.dimension:
return vector[: self.dimension]
return vector

def _build_kwargs(self, is_query: bool = False) -> Dict[str, Any]:
"""Build kwargs dict for litellm.embedding() call."""
kwargs: Dict[str, Any] = {"model": self.model_name}
Expand All @@ -95,8 +108,9 @@ def _build_kwargs(self, is_query: bool = False) -> Dict[str, Any]:
kwargs["api_base"] = self.api_base
if self.extra_headers:
kwargs["extra_headers"] = self.extra_headers
if self.dimension:
kwargs["dimensions"] = self.dimension
# Don't pass dimensions parameter to API - some models don't support it
# (e.g., Qwen3-Embedding-4B doesn't support matryoshka representation)
# Instead, we'll truncate the result vector if needed

# Non-symmetric embedding support
active_param = None
Expand Down Expand Up @@ -171,6 +185,8 @@ def _call() -> EmbedResult:
response = litellm.embedding(**kwargs)
self._update_telemetry_token_usage(response)
vector = response.data[0]["embedding"]
# Truncate vector if needed
vector = self._truncate_vector(vector)
return EmbedResult(dense_vector=vector)

try:
Expand Down Expand Up @@ -221,7 +237,11 @@ def _call() -> List[EmbedResult]:
kwargs["input"] = texts
response = litellm.embedding(**kwargs)
self._update_telemetry_token_usage(response)
return [EmbedResult(dense_vector=item["embedding"]) for item in response.data]
# Truncate vectors if needed
return [
EmbedResult(dense_vector=self._truncate_vector(item["embedding"]))
for item in response.data
]

try:
return self._run_with_retry(
Expand Down
Loading