diff --git a/openviking/models/embedder/litellm_embedders.py b/openviking/models/embedder/litellm_embedders.py index 4f7419619..ddee76493 100644 --- a/openviking/models/embedder/litellm_embedders.py +++ b/openviking/models/embedder/litellm_embedders.py @@ -85,6 +85,19 @@ def __init__( ) self._dimension = dimension + def _truncate_vector(self, vector: List[float]) -> List[float]: + """Truncate vector to target dimension if needed. + + Args: + vector: Input vector from API + + Returns: + Truncated vector if dimension is set and smaller than input, otherwise original vector + """ + if self.dimension is not None and len(vector) > self.dimension: + return vector[: self.dimension] + return vector + def _build_kwargs(self, is_query: bool = False) -> Dict[str, Any]: """Build kwargs dict for litellm.embedding() call.""" kwargs: Dict[str, Any] = {"model": self.model_name} @@ -95,8 +108,9 @@ def _build_kwargs(self, is_query: bool = False) -> Dict[str, Any]: kwargs["api_base"] = self.api_base if self.extra_headers: kwargs["extra_headers"] = self.extra_headers - if self.dimension: - kwargs["dimensions"] = self.dimension + # Don't pass dimensions parameter to API - some models don't support it + # (e.g., Qwen3-Embedding-4B doesn't support matryoshka representation) + # Instead, we'll truncate the result vector if needed # Non-symmetric embedding support active_param = None @@ -171,6 +185,8 @@ def _call() -> EmbedResult: response = litellm.embedding(**kwargs) self._update_telemetry_token_usage(response) vector = response.data[0]["embedding"] + # Truncate vector if needed + vector = self._truncate_vector(vector) return EmbedResult(dense_vector=vector) try: @@ -221,7 +237,11 @@ def _call() -> List[EmbedResult]: kwargs["input"] = texts response = litellm.embedding(**kwargs) self._update_telemetry_token_usage(response) - return [EmbedResult(dense_vector=item["embedding"]) for item in response.data] + # Truncate vectors if needed + return [ + EmbedResult(dense_vector=self._truncate_vector(item["embedding"])) + for item in response.data + ] try: return self._run_with_retry(