diff --git a/codegen/apis b/codegen/apis index d5ac93191..4eac4da9f 160000 --- a/codegen/apis +++ b/codegen/apis @@ -1 +1 @@ -Subproject commit d5ac93191def1d9666946d2c0e67edd3140b0f0d +Subproject commit 4eac4da9fe2083c9cce5e3cf494b582d637cd8a3 diff --git a/pinecone/db_control/request_factory.py b/pinecone/db_control/request_factory.py index 7ae4e16e0..9874c610a 100644 --- a/pinecone/db_control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -44,6 +44,12 @@ CreateIndexFromBackupRequest, ) from pinecone.db_control.models import ServerlessSpec, PodSpec, ByocSpec, IndexModel, IndexEmbed +from pinecone.db_control.models.deployment import ( + ServerlessDeployment, + PodDeployment, + ByocDeployment, +) +from pinecone.db_control.models.schema_fields import DenseVectorField, SparseVectorField from pinecone.db_control.enums import ( Metric, @@ -361,6 +367,131 @@ def __parse_index_spec(spec: Dict | ServerlessSpec | PodSpec | ByocSpec) -> Inde return cast(IndexSpec, index_spec) + @staticmethod + def _translate_legacy_request( + spec: Dict | ServerlessSpec | PodSpec | ByocSpec, + dimension: int | None = None, + metric: (Metric | str) | None = None, + vector_type: (VectorType | str) | None = VectorType.DENSE, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Translate legacy spec-based request to deployment + schema format. + + This method converts legacy index creation parameters (spec, dimension, metric, + vector_type) to the new API format using deployment and schema structures. + + :param spec: The legacy spec (ServerlessSpec, PodSpec, ByocSpec, or dict). + :param dimension: The vector dimension (for dense vectors). + :param metric: The distance metric (cosine, euclidean, dotproduct). + :param vector_type: The vector type (dense or sparse). + :returns: A tuple of (deployment_dict, schema_dict) for the new API format. + + **Translation Mappings:** + + * `ServerlessSpec(cloud, region)` → `deployment` with `deployment_type="serverless"` + * `PodSpec(environment, ...)` → `deployment` with `deployment_type="pod"` + * `ByocSpec(environment)` → `deployment` with `deployment_type="byoc"` + * `dimension` + `metric` + `vector_type="dense"` → `schema.fields._values` (dense_vector) + * `vector_type="sparse"` → `schema.fields._sparse_values` (sparse_vector) + + Example:: + + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=ServerlessSpec(cloud="aws", region="us-east-1"), + dimension=1536, + metric="cosine", + vector_type="dense" + ) + # Returns: + # ( + # {"deployment_type": "serverless", "cloud": "aws", "region": "us-east-1"}, + # {"fields": {"_values": {"type": "dense_vector", "dimension": 1536, "metric": "cosine"}}} + # ) + """ + # Convert metric to string if it's an enum + if metric is not None: + metric = convert_enum_to_string(metric) + if vector_type is not None: + vector_type = convert_enum_to_string(vector_type) + + # Translate spec to deployment + deployment_dict: dict[str, Any] + if isinstance(spec, dict): + if "serverless" in spec: + serverless_spec = spec["serverless"] + # Convert enum values to strings for consistency with __parse_index_spec + cloud = convert_enum_to_string(serverless_spec.get("cloud", "")) + region = convert_enum_to_string(serverless_spec.get("region", "")) + deployment = ServerlessDeployment(cloud=cloud, region=region) + deployment_dict = deployment.to_dict() + elif "pod" in spec: + pod_spec = spec["pod"] + # Convert enum values to strings for consistency with __parse_index_spec + environment = convert_enum_to_string(pod_spec.get("environment", "")) + pod_type = convert_enum_to_string(pod_spec.get("pod_type", "p1.x1")) + deployment = PodDeployment( + environment=environment, + pod_type=pod_type, + replicas=pod_spec.get("replicas", 1), + shards=pod_spec.get("shards", 1), + pods=pod_spec.get("pods"), + ) + deployment_dict = deployment.to_dict() + elif "byoc" in spec: + byoc_spec = spec["byoc"] + # Convert enum values to strings for consistency with __parse_index_spec + environment = convert_enum_to_string(byoc_spec.get("environment", "")) + deployment = ByocDeployment(environment=environment) + deployment_dict = deployment.to_dict() + else: + raise ValueError("spec must contain either 'serverless', 'pod', or 'byoc' key") + elif isinstance(spec, ServerlessSpec): + deployment = ServerlessDeployment(cloud=spec.cloud, region=spec.region) + deployment_dict = deployment.to_dict() + elif isinstance(spec, PodSpec): + # PodDeployment requires pod_type, but PodSpec defaults to "p1.x1" + pod_type = spec.pod_type if spec.pod_type is not None else "p1.x1" + # Use explicit None check to preserve 0 values (consistent with dict handling) + replicas = spec.replicas if spec.replicas is not None else 1 + shards = spec.shards if spec.shards is not None else 1 + deployment = PodDeployment( + environment=spec.environment, + pod_type=pod_type, + replicas=replicas, + shards=shards, + pods=spec.pods, + ) + deployment_dict = deployment.to_dict() + elif isinstance(spec, ByocSpec): + deployment = ByocDeployment(environment=spec.environment) + deployment_dict = deployment.to_dict() + else: + raise TypeError("spec must be of type dict, ServerlessSpec, PodSpec, or ByocSpec") + + # Translate dimension/metric/vector_type to schema + schema_dict: dict[str, Any] = {"fields": {}} + if vector_type == VectorType.SPARSE.value: + # Sparse vector: use _sparse_values field + if metric is None: + metric = "dotproduct" # Default for sparse vectors + sparse_field = SparseVectorField(metric=metric) + schema_dict["fields"]["_sparse_values"] = sparse_field.to_dict() + elif vector_type == VectorType.DENSE.value: + # Dense vector: use _values field + if dimension is None: + raise ValueError("dimension is required for dense vector indexes") + if metric is None: + metric = Metric.COSINE.value # Default for dense vectors + dense_field = DenseVectorField(dimension=dimension, metric=metric) + schema_dict["fields"]["_values"] = dense_field.to_dict() + elif vector_type is not None: + # Invalid vector_type value - raise error instead of silently returning empty schema + raise ValueError( + f"Invalid vector_type: '{vector_type}'. Must be '{VectorType.DENSE.value}' or '{VectorType.SPARSE.value}'" + ) + # If vector_type is None, return empty schema fields (no vector index) + + return deployment_dict, schema_dict + @staticmethod def create_index_request( name: str, diff --git a/tests/unit/db_control/test_index_request_factory.py b/tests/unit/db_control/test_index_request_factory.py index 96bc3dc4c..389115060 100644 --- a/tests/unit/db_control/test_index_request_factory.py +++ b/tests/unit/db_control/test_index_request_factory.py @@ -7,6 +7,8 @@ AwsRegion, PodType, PodIndexEnvironment, + VectorType, + Metric, ) # type: ignore[attr-defined] from pinecone.db_control.request_factory import PineconeDBControlRequestFactory @@ -293,3 +295,257 @@ def test_parse_read_capacity_dedicated_missing_scaling(self): read_capacity ) assert "scaling" in str(exc_info.value).lower() + + +class TestTranslateLegacyRequest: + """Tests for _translate_legacy_request method.""" + + def test_translate_serverless_spec_to_deployment_and_schema_dense(self): + """Test translating ServerlessSpec with dense vector to deployment + schema.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, metric="cosine", vector_type="dense" + ) + + assert deployment == { + "deployment_type": "serverless", + "cloud": "aws", + "region": "us-east-1", + } + assert schema == { + "fields": {"_values": {"type": "dense_vector", "dimension": 1536, "metric": "cosine"}} + } + + def test_translate_serverless_spec_to_deployment_and_schema_sparse(self): + """Test translating ServerlessSpec with sparse vector to deployment + schema.""" + spec = ServerlessSpec(cloud="gcp", region="us-central1") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, metric="dotproduct", vector_type="sparse" + ) + + assert deployment == { + "deployment_type": "serverless", + "cloud": "gcp", + "region": "us-central1", + } + assert schema == { + "fields": {"_sparse_values": {"type": "sparse_vector", "metric": "dotproduct"}} + } + + def test_translate_pod_spec_to_deployment_and_schema(self): + """Test translating PodSpec to deployment + schema.""" + spec = PodSpec(environment="us-west1-gcp", pod_type="p1.x1", replicas=2, shards=1, pods=2) + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1024, metric="euclidean", vector_type="dense" + ) + + assert deployment == { + "deployment_type": "pod", + "environment": "us-west1-gcp", + "pod_type": "p1.x1", + "replicas": 2, + "shards": 1, + "pods": 2, + } + assert schema == { + "fields": { + "_values": {"type": "dense_vector", "dimension": 1024, "metric": "euclidean"} + } + } + + def test_translate_pod_spec_with_defaults(self): + """Test translating PodSpec with default values.""" + spec = PodSpec(environment="us-east-1-aws") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=768, metric="cosine", vector_type="dense" + ) + + assert deployment == { + "deployment_type": "pod", + "environment": "us-east-1-aws", + "pod_type": "p1.x1", # Default + "replicas": 1, # Default + "shards": 1, # Default + } + assert "pods" not in deployment # Should not be included if None + assert schema == { + "fields": {"_values": {"type": "dense_vector", "dimension": 768, "metric": "cosine"}} + } + + def test_translate_byoc_spec_to_deployment_and_schema(self): + """Test translating ByocSpec to deployment + schema.""" + spec = ByocSpec(environment="aws-us-east-1-b921") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=512, metric="dotproduct", vector_type="dense" + ) + + assert deployment == {"deployment_type": "byoc", "environment": "aws-us-east-1-b921"} + assert schema == { + "fields": { + "_values": {"type": "dense_vector", "dimension": 512, "metric": "dotproduct"} + } + } + + def test_translate_serverless_spec_dict_to_deployment_and_schema(self): + """Test translating ServerlessSpec as dict to deployment + schema.""" + spec = {"serverless": {"cloud": "aws", "region": "us-east-1"}} + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, metric="cosine", vector_type="dense" + ) + + assert deployment == { + "deployment_type": "serverless", + "cloud": "aws", + "region": "us-east-1", + } + assert schema == { + "fields": {"_values": {"type": "dense_vector", "dimension": 1536, "metric": "cosine"}} + } + + def test_translate_pod_spec_dict_to_deployment_and_schema(self): + """Test translating PodSpec as dict to deployment + schema.""" + spec = { + "pod": {"environment": "us-west1-gcp", "pod_type": "p1.x2", "replicas": 3, "shards": 2} + } + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=2048, metric="cosine", vector_type="dense" + ) + + assert deployment == { + "deployment_type": "pod", + "environment": "us-west1-gcp", + "pod_type": "p1.x2", + "replicas": 3, + "shards": 2, + } + assert schema == { + "fields": {"_values": {"type": "dense_vector", "dimension": 2048, "metric": "cosine"}} + } + + def test_translate_byoc_spec_dict_to_deployment_and_schema(self): + """Test translating ByocSpec as dict to deployment + schema.""" + spec = {"byoc": {"environment": "gcp-us-central1-b123"}} + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=256, metric="euclidean", vector_type="dense" + ) + + assert deployment == {"deployment_type": "byoc", "environment": "gcp-us-central1-b123"} + assert schema == { + "fields": {"_values": {"type": "dense_vector", "dimension": 256, "metric": "euclidean"}} + } + + def test_translate_sparse_vector_default_metric(self): + """Test that sparse vector defaults to dotproduct metric.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, vector_type="sparse" + ) + + assert schema == { + "fields": { + "_sparse_values": { + "type": "sparse_vector", + "metric": "dotproduct", # Default + } + } + } + + def test_translate_dense_vector_default_metric(self): + """Test that dense vector defaults to cosine metric.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, vector_type="dense" + ) + + assert schema == { + "fields": { + "_values": { + "type": "dense_vector", + "dimension": 1536, + "metric": "cosine", # Default + } + } + } + + def test_translate_dense_vector_with_enum_metric(self): + """Test translating with Metric enum.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, metric=Metric.EUCLIDEAN, vector_type=VectorType.DENSE + ) + + assert schema == { + "fields": { + "_values": {"type": "dense_vector", "dimension": 1536, "metric": "euclidean"} + } + } + + def test_translate_dense_vector_requires_dimension(self): + """Test that dense vector requires dimension.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + with pytest.raises(ValueError, match="dimension is required"): + PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, vector_type="dense" + ) + + def test_translate_invalid_spec_type(self): + """Test that invalid spec type raises TypeError.""" + with pytest.raises(TypeError, match="spec must be of type"): + PineconeDBControlRequestFactory._translate_legacy_request( + spec="invalid", dimension=1536, vector_type="dense" + ) + + def test_translate_invalid_spec_dict(self): + """Test that invalid spec dict raises ValueError.""" + with pytest.raises(ValueError, match="spec must contain"): + PineconeDBControlRequestFactory._translate_legacy_request( + spec={"invalid": {}}, dimension=1536, vector_type="dense" + ) + + def test_translate_dict_spec_with_enum_values(self): + """Test that dict specs with enum values are converted to strings.""" + spec = {"serverless": {"cloud": CloudProvider.AWS, "region": AwsRegion.US_EAST_1}} + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, metric="cosine", vector_type="dense" + ) + + assert deployment["cloud"] == "aws" # Enum converted to string + assert deployment["region"] == "us-east-1" # Enum converted to string + + def test_translate_pod_spec_with_zero_replicas(self): + """Test that zero replicas/shards are preserved (not converted to 1).""" + spec = PodSpec(environment="us-east-1-aws", replicas=0, shards=0) + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, metric="cosine", vector_type="dense" + ) + + assert deployment["replicas"] == 0 # Zero preserved + assert deployment["shards"] == 0 # Zero preserved + + def test_translate_dict_spec_with_zero_replicas(self): + """Test that zero replicas/shards in dict specs are preserved.""" + spec = {"pod": {"environment": "us-east-1-aws", "replicas": 0, "shards": 0}} + deployment, schema = PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, metric="cosine", vector_type="dense" + ) + + assert deployment["replicas"] == 0 # Zero preserved + assert deployment["shards"] == 0 # Zero preserved + + def test_translate_invalid_vector_type_raises_error(self): + """Test that invalid vector_type raises ValueError instead of silently failing.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + with pytest.raises(ValueError, match="Invalid vector_type"): + PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, dimension=1536, vector_type="invalid_type" + ) + + def test_translate_invalid_vector_type_typo(self): + """Test that typos in vector_type raise error.""" + spec = ServerlessSpec(cloud="aws", region="us-east-1") + with pytest.raises(ValueError, match="Invalid vector_type"): + PineconeDBControlRequestFactory._translate_legacy_request( + spec=spec, + dimension=1536, + vector_type="desnse", # Typo + ) diff --git a/tests/unit/models/test_deployment.py b/tests/unit/models/test_deployment.py index 0c8c50f8a..9a2f6ea18 100644 --- a/tests/unit/models/test_deployment.py +++ b/tests/unit/models/test_deployment.py @@ -1,34 +1,6 @@ """Tests for deployment model classes.""" -import os -import sys -import types - - -def _load_deployment_module(): - """Load deployment.py as a standalone module to avoid broken imports.""" - module_name = "pinecone.db_control.models.deployment" - module = types.ModuleType(module_name) - module.__file__ = os.path.join( - os.path.dirname(__file__), - "..", - "..", - "..", - "pinecone", - "db_control", - "models", - "deployment.py", - ) - sys.modules[module_name] = module - with open(module.__file__) as f: - exec(compile(f.read(), module.__file__, "exec"), module.__dict__) - return module - - -_deployment = _load_deployment_module() -ServerlessDeployment = _deployment.ServerlessDeployment -ByocDeployment = _deployment.ByocDeployment -PodDeployment = _deployment.PodDeployment +from pinecone import ServerlessDeployment, ByocDeployment, PodDeployment class TestServerlessDeployment: