diff --git a/pinecone/db_data/interfaces.py b/pinecone/db_data/interfaces.py index 091a21659..f8f8bda73 100644 --- a/pinecone/db_data/interfaces.py +++ b/pinecone/db_data/interfaces.py @@ -244,11 +244,38 @@ def upsert_from_dataframe( ): """Upserts a dataframe into the index. - Args: - df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. - namespace: The namespace to upsert into. - batch_size: The number of rows to upsert in a single batch. - show_progress: Whether to show a progress bar. + :param df: A pandas dataframe with the following columns: id, values, sparse_values, and metadata. + :type df: pandas.DataFrame + :param namespace: The namespace to upsert into. + :type namespace: str, optional + :param batch_size: The number of rows to upsert in a single batch. + :type batch_size: int, optional + :param show_progress: Whether to show a progress bar. + :type show_progress: bool, optional + + .. code-block:: python + + import pandas as pd + from pinecone import Pinecone + + pc = Pinecone() + idx = pc.Index(host="your-index-host") + + # Create a dataframe with vector data + df = pd.DataFrame({ + 'id': ['id1', 'id2', 'id3'], + 'values': [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]], + 'metadata': [{'key': 'value1'}, {'key': 'value2'}, {'key': 'value3'}] + }) + + # Upsert the dataframe + idx.upsert_from_dataframe( + df=df, + namespace="my-namespace", + batch_size=100, + show_progress=True + ) + """ pass @@ -276,7 +303,7 @@ def upsert_records(self, namespace: str, records: list[dict]) -> UpsertResponse: Pinecone, CloudProvider, AwsRegion, - EmbedModel + EmbedModel, IndexEmbed ) @@ -382,7 +409,7 @@ def search( Pinecone, CloudProvider, AwsRegion, - EmbedModel + EmbedModel, IndexEmbed ) diff --git a/pinecone/legacy_pinecone_interface.py b/pinecone/legacy_pinecone_interface.py deleted file mode 100644 index 42ce852a7..000000000 --- a/pinecone/legacy_pinecone_interface.py +++ /dev/null @@ -1,940 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod - -from typing import Dict, TYPE_CHECKING, Any - -if TYPE_CHECKING: - from pinecone.db_control.models import ( - ServerlessSpec, - PodSpec, - ByocSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, - BackupModel, - BackupList, - RestoreJobModel, - RestoreJobList, - ) - from pinecone.db_control.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, - ) - from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed - from pinecone.db_control.models.serverless_spec import ( - ReadCapacityDict, - MetadataSchemaFieldConfig, - ) - from pinecone.core.openapi.db_control.model.read_capacity import ReadCapacity - from pinecone.core.openapi.db_control.model.read_capacity_on_demand_spec import ( - ReadCapacityOnDemandSpec, - ) - from pinecone.core.openapi.db_control.model.read_capacity_dedicated_spec import ( - ReadCapacityDedicatedSpec, - ) - from pinecone.core.openapi.db_control.model.backup_model_schema import BackupModelSchema - - -class LegacyPineconeDBControlInterface(ABC): - @abstractmethod - def __init__( - self, - api_key: str | None = None, - host: str | None = None, - proxy_url: str | None = None, - proxy_headers: dict[str, str] | None = None, - ssl_ca_certs: str | None = None, - ssl_verify: bool | None = None, - additional_headers: dict[str, str] | None = {}, - pool_threads: int | None = 1, - **kwargs, - ): - pass - - @abstractmethod - def create_index( - self, - name: str, - spec: Dict | "ServerlessSpec" | "PodSpec" | "ByocSpec", - dimension: int | None, - metric: ("Metric" | str) | None = "Metric.COSINE", - timeout: int | None = None, - deletion_protection: ("DeletionProtection" | str) | None = "DeletionProtection.DISABLED", - vector_type: ("VectorType" | str) | None = "VectorType.DENSE", - tags: dict[str, str] | None = None, - ) -> "IndexModel": - """Creates a Pinecone index. - - :param name: The name of the index to create. Must be unique within your project and - cannot be changed once created. Allowed characters are lowercase letters, numbers, - and hyphens and the name may not begin or end with hyphens. Maximum length is 45 characters. - :type name: str - :param metric: Type of similarity metric used in the vector index when querying, one of ``{"cosine", "dotproduct", "euclidean"}``. - :type metric: str, optional - :param spec: A dictionary containing configurations describing how the index should be deployed. For serverless indexes, - specify region and cloud. Optionally, you can specify ``read_capacity`` to configure dedicated read capacity mode - (OnDemand or Dedicated) and ``schema`` to configure which metadata fields are filterable. For pod indexes, specify - replicas, shards, pods, pod_type, metadata_config, and source_collection. - Alternatively, use the ``ServerlessSpec``, ``PodSpec``, or ``ByocSpec`` objects to specify these configurations. - :type spec: Dict - :param dimension: If you are creating an index with ``vector_type="dense"`` (which is the default), you need to specify ``dimension`` to indicate the size of your vectors. - This should match the dimension of the embeddings you will be inserting. For example, if you are using - OpenAI's CLIP model, you should use ``dimension=1536``. Dimension is a required field when - creating an index with ``vector_type="dense"`` and should not be passed when ``vector_type="sparse"``. - :type dimension: int - :type timeout: int, optional - :param timeout: Specify the number of seconds to wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; - if -1, return immediately and do not wait. - :param deletion_protection: If enabled, the index cannot be deleted. If disabled, the index can be deleted. - :type deletion_protection: Optional[Literal["enabled", "disabled"]] - :param vector_type: The type of vectors to be stored in the index. One of ``{"dense", "sparse"}``. - :type vector_type: str, optional - :param tags: Tags are key-value pairs you can attach to indexes to better understand, organize, and identify your resources. Some example use cases include tagging indexes with the name of the model that generated the embeddings, the date the index was created, or the purpose of the index. - :type tags: Optional[dict[str, str]] - :return: A ``IndexModel`` instance containing a description of the index that was created. - - Examples: - - .. code-block:: python - :caption: Creating a serverless index - - import os - from pinecone import ( - Pinecone, - ServerlessSpec, - CloudProvider, - AwsRegion, - Metric, - DeletionProtection, - VectorType - ) - - pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) - - pc.create_index( - name="my_index", - dimension=1536, - metric=Metric.COSINE, - spec=ServerlessSpec( - cloud=CloudProvider.AWS, - region=AwsRegion.US_WEST_2, - read_capacity={ - "mode": "Dedicated", - "dedicated": { - "node_type": "t1", - "scaling": "Manual", - "manual": {"shards": 2, "replicas": 2}, - }, - }, - schema={ - "genre": {"filterable": True}, - "year": {"filterable": True}, - "rating": {"filterable": True}, - }, - ), - deletion_protection=DeletionProtection.DISABLED, - vector_type=VectorType.DENSE, - tags={ - "model": "clip", - "app": "image-search", - "env": "production" - } - ) - - .. code-block:: python - :caption: Creating a pod index - - import os - from pinecone import ( - Pinecone, - PodSpec, - PodIndexEnvironment, - PodType, - Metric, - DeletionProtection, - VectorType - ) - - pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) - - pc.create_index( - name="my_index", - dimension=1536, - metric=Metric.COSINE, - spec=PodSpec( - environment=PodIndexEnvironment.US_EAST4_GCP, - pod_type=PodType.P1_X1 - ), - deletion_protection=DeletionProtection.DISABLED, - tags={ - "model": "clip", - "app": "image-search", - "env": "testing" - } - ) - """ - pass - - @abstractmethod - def create_index_from_backup( - self, - *, - name: str, - backup_id: str, - deletion_protection: ("DeletionProtection" | str) | None = "disabled", - tags: dict[str, str] | None = None, - timeout: int | None = None, - ) -> "IndexModel": - """ - Create an index from a backup. - - Call ``list_backups`` to get a list of backups for your project. - - :param name: The name of the index to create. - :type name: str - :param backup_id: The ID of the backup to restore. - :type backup_id: str - :param deletion_protection: If enabled, the index cannot be deleted. If disabled, the index can be deleted. This setting can be changed with ``configure_index``. - :type deletion_protection: Optional[Literal["enabled", "disabled"]] - :param tags: Tags are key-value pairs you can attach to indexes to better understand, organize, and identify your resources. Some example use cases include tagging indexes with the name of the model that generated the embeddings, the date the index was created, or the purpose of the index. - :type tags: Optional[dict[str, str]] - :param timeout: Specify the number of seconds to wait until index is ready to receive data. If None, wait indefinitely; if >=0, time out after this many seconds; - if -1, return immediately and do not wait. - :return: A description of the index that was created. - :rtype: IndexModel - """ - pass - - @abstractmethod - def create_index_for_model( - self, - *, - name: str, - cloud: "CloudProvider" | str, - region: "AwsRegion" | "GcpRegion" | "AzureRegion" | str, - embed: "IndexEmbed" | "CreateIndexForModelEmbedTypedDict", - tags: dict[str, str] | None = None, - deletion_protection: ("DeletionProtection" | str) | None = "DeletionProtection.DISABLED", - read_capacity: ( - "ReadCapacityDict" - | "ReadCapacity" - | "ReadCapacityOnDemandSpec" - | "ReadCapacityDedicatedSpec" - ) - | None = None, - schema: ( - dict[ - str, "MetadataSchemaFieldConfig" - ] # Direct field mapping: {field_name: {filterable: bool}} - | dict[ - str, dict[str, Any] - ] # Dict with "fields" wrapper: {"fields": {field_name: {...}}, ...} - | "BackupModelSchema" # OpenAPI model instance - ) - | None = None, - timeout: int | None = None, - ) -> "IndexModel": - """ - :param name: The name of the index to create. Must be unique within your project and - cannot be changed once created. Allowed characters are lowercase letters, numbers, - and hyphens and the name may not begin or end with hyphens. Maximum length is 45 characters. - :type name: str - :param cloud: The cloud provider to use for the index. One of ``{"aws", "gcp", "azure"}``. - :type cloud: str - :param region: The region to use for the index. Enum objects ``AwsRegion``, ``GcpRegion``, and ``AzureRegion`` are also available to help you quickly set these parameters, but may not be up to date as new regions become available. - :type region: str - :param embed: The embedding configuration for the index. This param accepts a dictionary or an instance of the ``IndexEmbed`` object. - :type embed: Union[Dict, IndexEmbed] - :param tags: Tags are key-value pairs you can attach to indexes to better understand, organize, and identify your resources. Some example use cases include tagging indexes with the name of the model that generated the embeddings, the date the index was created, or the purpose of the index. - :type tags: Optional[dict[str, str]] - :param deletion_protection: If enabled, the index cannot be deleted. If disabled, the index can be deleted. This setting can be changed with ``configure_index``. - :type deletion_protection: Optional[Literal["enabled", "disabled"]] - :param read_capacity: Optional read capacity configuration. You can specify ``read_capacity`` to configure dedicated read capacity mode - (OnDemand or Dedicated). See ``ServerlessSpec`` documentation for details on read capacity configuration. - :type read_capacity: Optional[Union[ReadCapacityDict, ReadCapacity, ReadCapacityOnDemandSpec, ReadCapacityDedicatedSpec]] - :param schema: Optional metadata schema configuration. You can specify ``schema`` to configure which metadata fields are filterable. - The schema can be provided as a dictionary mapping field names to their configurations (e.g., ``{"genre": {"filterable": True}}``) - or as a dictionary with a ``fields`` key (e.g., ``{"fields": {"genre": {"filterable": True}}}``). - :type schema: Optional[Union[dict[str, MetadataSchemaFieldConfig], dict[str, dict[str, Any]], BackupModelSchema]] - :type timeout: Optional[int] - :param timeout: Specify the number of seconds to wait until index is ready to receive data. If None, wait indefinitely; if >=0, time out after this many seconds; - if -1, return immediately and do not wait. - :return: A description of the index that was created. - :rtype: IndexModel - - This method is used to create a Serverless index that is configured for use with Pinecone's integrated inference models. - - The resulting index can be described, listed, configured, and deleted like any other Pinecone index with the ``describe_index``, ``list_indexes``, ``configure_index``, and ``delete_index`` methods. - - After the model is created, you can upsert records into the index with the ``upsert_records`` method, and search your records with the ``search`` method. - - .. code-block:: python - - from pinecone import ( - Pinecone, - IndexEmbed, - CloudProvider, - AwsRegion, - EmbedModel, - Metric, - ) - - pc = Pinecone() - - if not pc.has_index("book-search"): - desc = pc.create_index_for_model( - name="book-search", - cloud=CloudProvider.AWS, - region=AwsRegion.US_EAST_1, - embed=IndexEmbed( - model=EmbedModel.Multilingual_E5_Large, - metric=Metric.COSINE, - field_map={ - "text": "description", - }, - ) - ) - - .. code-block:: python - :caption: Creating an index for model with schema and dedicated read capacity - - from pinecone import ( - Pinecone, - IndexEmbed, - CloudProvider, - AwsRegion, - EmbedModel, - Metric, - ) - - pc = Pinecone() - - if not pc.has_index("book-search"): - desc = pc.create_index_for_model( - name="book-search", - cloud=CloudProvider.AWS, - region=AwsRegion.US_EAST_1, - embed=IndexEmbed( - model=EmbedModel.Multilingual_E5_Large, - metric=Metric.COSINE, - field_map={ - "text": "description", - }, - ), - read_capacity={ - "mode": "Dedicated", - "dedicated": { - "node_type": "t1", - "scaling": "Manual", - "manual": {"shards": 2, "replicas": 2}, - }, - }, - schema={ - "genre": {"filterable": True}, - "year": {"filterable": True}, - "rating": {"filterable": True}, - }, - ) - - .. seealso:: - - Official docs on `available cloud regions `_ - - `Model Gallery `_ to learn about available models - - """ - pass - - @abstractmethod - def delete_index(self, name: str, timeout: int | None = None): - """ - :param name: the name of the index. - :type name: str - :param timeout: Number of seconds to poll status checking whether the index has been deleted. If None, - wait indefinitely; if >=0, time out after this many seconds; - if -1, return immediately and do not wait. - :type timeout: int, optional - - Deletes a Pinecone index. - - Deleting an index is an irreversible operation. All data in the index will be lost. - When you use this command, a request is sent to the Pinecone control plane to delete - the index, but the termination is not synchronous because resources take a few moments to - be released. - - By default the ``delete_index`` method will block until polling of the ``describe_index`` method - shows that the delete operation has completed. If you prefer to return immediately and not - wait for the index to be deleted, you can pass ``timeout=-1`` to the method. - - After the delete request is submitted, polling ``describe_index`` will show that the index - transitions into a ``Terminating`` state before eventually resulting in a 404 after it has been removed. - - This operation can fail if the index is configured with ``deletion_protection="enabled"``. - In this case, you will need to call ``configure_index`` to disable deletion protection before - you can delete the index. - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - index_name = "my_index" - desc = pc.describe_index(name=index_name) - - if desc.deletion_protection == "enabled": - # If for some reason deletion protection is enabled, you will need to disable it first - # before you can delete the index. But use caution as this operation is not reversible - # and if somebody enabled deletion protection, they probably had a good reason. - pc.configure_index(name=index_name, deletion_protection="disabled") - - pc.delete_index(name=index_name) - - """ - pass - - @abstractmethod - def list_indexes(self) -> "IndexList": - """ - :return: Returns an ``IndexList`` object, which is iterable and contains a - list of ``IndexModel`` objects. The ``IndexList`` also has a convenience method ``names()`` - which returns a list of index names for situations where you just want to iterate over - all index names. - - Lists all indexes in your project. - - The results include a description of all indexes in your project, including the - index name, dimension, metric, status, and spec. - - If you simply want to check whether an index exists, see the ``has_index()`` convenience method. - - You can use the ``list_indexes()`` method to iterate over descriptions of every index in your project. - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - for index in pc.list_indexes(): - print(index.name) - print(index.dimension) - print(index.metric) - print(index.status) - print(index.host) - print(index.spec) - - """ - pass - - @abstractmethod - def describe_index(self, name: str) -> "IndexModel": - """ - :param name: the name of the index to describe. - :return: Returns an ``IndexModel`` object - which gives access to properties such as the - index name, dimension, metric, host url, status, - and spec. - - Describes a Pinecone index. - - **Getting your index host url** - - In a real production situation, you probably want to - store the host url in an environment variable so you - don't have to call describe_index and re-fetch it - every time you want to use the index. But this example - shows how to get the value from the API using describe_index. - - .. code-block:: python - - from pinecone import Pinecone, Index - - pc = Pinecone() - - index_name="my_index" - description = pc.describe_index(name=index_name) - print(description) - # { - # "name": "my_index", - # "metric": "cosine", - # "host": "my_index-dojoi3u.svc.aped-4627-b74a.pinecone.io", - # "spec": { - # "serverless": { - # "cloud": "aws", - # "region": "us-east-1" - # } - # }, - # "status": { - # "ready": true, - # "state": "Ready" - # }, - # "vector_type": "dense", - # "dimension": 1024, - # "deletion_protection": "enabled", - # "tags": { - # "environment": "production" - # } - # } - - print(f"Your index is hosted at {description.host}") - - index = pc.Index(host=description.host) - index.upsert(vectors=[...]) - - """ - pass - - @abstractmethod - def has_index(self, name: str) -> bool: - """ - :param name: The name of the index to check for existence. - :return: Returns ``True`` if the index exists, ``False`` otherwise. - - Checks if a Pinecone index exists. - - .. code-block:: python - - from pinecone import Pinecone, ServerlessSpec - - pc = Pinecone() - - index_name = "my_index" - if not pc.has_index(index_name): - print("Index does not exist, creating...") - pc.create_index( - name=index_name, - dimension=768, - metric="cosine", - spec=ServerlessSpec(cloud="aws", region="us-west-2") - ) - """ - pass - - @abstractmethod - def configure_index( - self, - name: str, - replicas: int | None = None, - pod_type: ("PodType" | str) | None = None, - deletion_protection: ("DeletionProtection" | str) | None = None, - tags: dict[str, str] | None = None, - embed: ("ConfigureIndexEmbed" | Dict) | None = None, - read_capacity: ( - "ReadCapacityDict" - | "ReadCapacity" - | "ReadCapacityOnDemandSpec" - | "ReadCapacityDedicatedSpec" - ) - | None = None, - ): - """ - :param name: the name of the Index - :type name: str, required - :param replicas: the desired number of replicas, lowest value is 0. - :type replicas: int, optional - :param pod_type: the new ``pod_type`` for the index. To learn more about the - available pod types, please see `Understanding Indexes `_. - Note that pod type is only available for pod-based indexes. - :type pod_type: str or PodType, optional - :param deletion_protection: If set to ``'enabled'``, the index cannot be deleted. If ``'disabled'``, the index can be deleted. - :type deletion_protection: str or DeletionProtection, optional - :param tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed. - :type tags: dict[str, str], optional - :param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map. - The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model. - You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed. - :type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional - :param read_capacity: Optional read capacity configuration for serverless indexes. You can specify ``read_capacity`` to configure dedicated read capacity mode - (OnDemand or Dedicated). See ``ServerlessSpec`` documentation for details on read capacity configuration. - Note that read capacity configuration is only available for serverless indexes. - :type read_capacity: Optional[Union[ReadCapacityDict, ReadCapacity, ReadCapacityOnDemandSpec, ReadCapacityDedicatedSpec]] - - This method is used to modify an index's configuration. It can be used to: - - * Configure read capacity for serverless indexes using ``read_capacity`` - * Scale a pod-based index horizontally using ``replicas`` - * Scale a pod-based index vertically using ``pod_type`` - * Enable or disable deletion protection using ``deletion_protection`` - * Add, change, or remove tags using ``tags`` - - **Configuring read capacity for serverless indexes** - - To configure read capacity for serverless indexes, pass the ``read_capacity`` parameter to the ``configure_index`` method. - You can configure either OnDemand or Dedicated read capacity mode. - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - # Configure to OnDemand read capacity (default) - pc.configure_index( - name="my_index", - read_capacity={"mode": "OnDemand"} - ) - - # Configure to Dedicated read capacity with manual scaling - pc.configure_index( - name="my_index", - read_capacity={ - "mode": "Dedicated", - "dedicated": { - "node_type": "t1", - "scaling": "Manual", - "manual": {"shards": 1, "replicas": 1} - } - } - ) - - # Verify the configuration was applied - desc = pc.describe_index("my_index") - assert desc.spec.serverless.read_capacity.mode == "Dedicated" - - **Scaling pod-based indexes** - - To scale your pod-based index, you pass a ``replicas`` and/or ``pod_type`` param to the ``configure_index`` method. ``pod_type`` may be a string or a value from the ``PodType`` enum. - - .. code-block:: python - - from pinecone import Pinecone, PodType - - pc = Pinecone() - pc.configure_index( - name="my_index", - replicas=2, - pod_type=PodType.P1_X2 - ) - - After providing these new configurations, you must call ``describe_index`` to see the status of the index as the changes are applied. - - **Enabling or disabling deletion protection** - - To enable or disable deletion protection, pass the ``deletion_protection`` parameter to the ``configure_index`` method. When deletion protection - is enabled, the index cannot be deleted with the ``delete_index`` method. - - .. code-block:: python - - from pinecone import Pinecone, DeletionProtection - - pc = Pinecone() - - # Enable deletion protection - pc.configure_index( - name="my_index", - deletion_protection=DeletionProtection.ENABLED - ) - - # Call describe_index to see the change was applied. - assert pc.describe_index("my_index").deletion_protection == "enabled" - - # Disable deletion protection - pc.configure_index( - name="my_index", - deletion_protection=DeletionProtection.DISABLED - ) - - **Adding, changing, or removing tags** - - To add, change, or remove tags, pass the ``tags`` parameter to the ``configure_index`` method. When tags are passed using ``configure_index``, - they are merged with any existing tags already on the index. To remove a tag, set the value of the key to an empty string. - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - # Add a tag - pc.configure_index(name="my_index", tags={"environment": "staging"}) - - # Change a tag - pc.configure_index(name="my_index", tags={"environment": "production"}) - - # Remove a tag - pc.configure_index(name="my_index", tags={"environment": ""}) - - # Call describe_index to view the tags are changed - print(pc.describe_index("my_index").tags) - - """ - pass - - @abstractmethod - def create_collection(self, name: str, source: str) -> None: - """Create a collection from a pod-based index - - :param name: Name of the collection - :type name: str, required - :param source: Name of the source index - :type source: str, required - """ - pass - - @abstractmethod - def list_collections(self) -> "CollectionList": - """List all collections - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - for collection in pc.list_collections(): - print(collection.name) - print(collection.source) - - # You can also iterate specifically over the collection - # names with the .names() helper. - collection_name="my_collection" - for collection_name in pc.list_collections().names(): - print(collection_name) - - """ - pass - - @abstractmethod - def delete_collection(self, name: str) -> None: - """ - :param str name: The name of the collection to delete. - - Deletes a collection. - - Deleting a collection is an irreversible operation. All data - in the collection will be lost. - - This method tells Pinecone you would like to delete a collection, - but it takes a few moments to complete the operation. Use the - ``describe_collection()`` method to confirm that the collection - has been deleted. - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - pc.delete_collection(name="my_collection") - - """ - pass - - @abstractmethod - def describe_collection(self, name: str): - """Describes a collection. - - :param str name: The name of the collection - - :return: Description of the collection - - .. code-block:: python - - from pinecone import Pinecone - - pc = Pinecone() - - description = pc.describe_collection("my_collection") - print(description.name) - print(description.source) - print(description.status) - print(description.size) - - """ - pass - - @abstractmethod - def create_backup( - self, *, index_name: str, backup_name: str, description: str = "" - ) -> "BackupModel": - """Create a backup of an index. - - Args: - index_name (str): The name of the index to backup. - backup_name (str): The name to give the backup. - description (str, optional): Optional description of the backup. - """ - pass - - @abstractmethod - def list_backups( - self, - *, - index_name: str | None = None, - limit: int | None = 10, - pagination_token: str | None = None, - ) -> "BackupList": - """List backups. - - If ``index_name`` is provided, the backups will be filtered by index. If no ``index_name`` is provided, all backups in the project will be returned. - - Args: - index_name (str, optional): The name of the index to list backups for. - limit (int, optional): The maximum number of backups to return. - pagination_token (str, optional): The pagination token to use for pagination. - """ - pass - - @abstractmethod - def describe_backup(self, *, backup_id: str) -> "BackupModel": - """Describe a backup. - - Args: - backup_id (str): The ID of the backup to describe. - """ - pass - - @abstractmethod - def delete_backup(self, *, backup_id: str) -> None: - """Delete a backup. - - Args: - backup_id (str): The ID of the backup to delete. - """ - pass - - @abstractmethod - def list_restore_jobs( - self, *, limit: int | None = 10, pagination_token: str | None = None - ) -> "RestoreJobList": - """List restore jobs. - - Args: - limit (int): The maximum number of restore jobs to return. - pagination_token (str): The pagination token to use for pagination. - """ - pass - - @abstractmethod - def describe_restore_job(self, *, job_id: str) -> "RestoreJobModel": - """Describe a restore job. - - Args: - job_id (str): The ID of the restore job to describe. - """ - pass - - @abstractmethod - def Index(self, name: str = "", host: str = "", **kwargs): - """ - :param name: The name of the index to target. If you specify the name of the index, the client will - fetch the host url from the Pinecone control plane. - :type name: str, optional - :param host: The host url of the index to target. If you specify the host url, the client will use - the host url directly without making any additional calls to the control plane. - :type host: str, optional - :param pool_threads: The number of threads to use when making parallel requests by calling index methods with optional kwarg async_req=True, or using methods that make use of thread-based parallelism automatically such as query_namespaces(). - :type pool_threads: int, optional - :param connection_pool_maxsize: The maximum number of connections to keep in the connection pool. - :type connection_pool_maxsize: int, optional - :return: An instance of the ``Index`` class. - - Target an index for data operations. - - **Target an index by host url** - - In production situations, you want to uspert or query your data as quickly - as possible. If you know in advance the host url of your index, you can - eliminate a round trip to the Pinecone control plane by specifying the - host of the index. If instead you pass the name of the index, the client - will need to make an additional call to api.pinecone.io to get the host url - before any data operations can take place. - - .. code-block:: python - - import os - from pinecone import Pinecone - - api_key = os.environ.get("PINECONE_API_KEY") - index_host = os.environ.get("PINECONE_INDEX_HOST") - - pc = Pinecone(api_key=api_key) - index = pc.Index(host=index_host) - - # Now you're ready to perform data operations - index.query(vector=[...], top_k=10) - - To find your host url, you can use the describe_index method to call api.pinecone.io. - The host url is returned in the response. Or, alternatively, the - host is displayed in the Pinecone web console. - - .. code-block:: python - - import os - from pinecone import Pinecone - - pc = Pinecone( - api_key=os.environ.get("PINECONE_API_KEY") - ) - - host = pc.describe_index('index-name').host - - **Target an index by name (not recommended for production)** - - For more casual usage, such as when you are playing and exploring with Pinecone - in a notebook setting, you can also target an index by name. If you use this - approach, the client may need to perform an extra call to the Pinecone control - plane to get the host url on your behalf to get the index host. - - The client will cache the index host for future use whenever it is seen, so you - will only incur the overhead of only one call. But this approach is not - recommended for production usage because it introduces an unnecessary runtime - dependency on api.pinecone.io. - - .. code-block:: python - - import os - from pinecone import Pinecone, ServerlessSpec - - api_key = os.environ.get("PINECONE_API_KEY") - - pc = Pinecone(api_key=api_key) - pc.create_index( - name='my_index', - dimension=1536, - metric='cosine', - spec=ServerlessSpec(cloud='aws', region='us-west-2') - ) - index = pc.Index('my_index') - - # Now you're ready to perform data operations - index.query(vector=[...], top_k=10) - - """ - pass - - def IndexAsyncio(self, host: str, **kwargs): - """Build an asyncio-compatible Index object. - - :param host: The host url of the index to target. You can find this url in the Pinecone - web console or by calling describe_index method of ``Pinecone`` or ``PineconeAsyncio``. - :type host: str, required - - :return: An instance of the ``IndexAsyncio`` class. - - .. code-block:: python - - import asyncio - import os - from pinecone import Pinecone - - async def main(): - pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) - async with pc.IndexAsyncio(host=os.environ.get("PINECONE_INDEX_HOST")) as index: - await index.query(vector=[...], top_k=10) - - asyncio.run(main()) - - See more docs for ``PineconeAsyncio`` `here <./asyncio.html#db-data-plane>`_. - - """ - pass diff --git a/pinecone/pinecone.py b/pinecone/pinecone.py index 523d852ca..779ded98a 100644 --- a/pinecone/pinecone.py +++ b/pinecone/pinecone.py @@ -7,8 +7,6 @@ from pinecone.config import PineconeConfig, ConfigBuilder -from .legacy_pinecone_interface import LegacyPineconeDBControlInterface - from pinecone.utils import normalize_host, PluginAware, docslinks, require_kwargs from .langchain_import_warnings import _build_langchain_attribute_error_message @@ -60,7 +58,7 @@ ) -class Pinecone(PluginAware, LegacyPineconeDBControlInterface): +class Pinecone(PluginAware): """ A client for interacting with Pinecone APIs. """ @@ -169,7 +167,7 @@ def __init__( .. code-block:: python from pinecone import Pinecone - import urllib3 import make_headers + from urllib3.util import make_headers pc = Pinecone( api_key='YOUR_API_KEY', @@ -179,7 +177,6 @@ def __init__( pc.list_indexes() - **Using proxies with self-signed certificates** By default the Pinecone Python client will perform SSL certificate verification @@ -190,7 +187,7 @@ def __init__( .. code-block:: python from pinecone import Pinecone - import urllib3 import make_headers + from urllib3.util import make_headers pc = Pinecone( api_key='YOUR_API_KEY', @@ -201,7 +198,6 @@ def __init__( pc.list_indexes() - **Disabling SSL verification** If you would like to disable SSL verification, you can pass the ``ssl_verify`` @@ -210,7 +206,7 @@ def __init__( .. code-block:: python from pinecone import Pinecone - import urllib3 import make_headers + from urllib3.util import make_headers pc = Pinecone( api_key='YOUR_API_KEY', @@ -263,6 +259,35 @@ def __init__( def inference(self) -> "Inference": """ Inference is a namespace where an instance of the `pinecone.inference.Inference` class is lazily created and cached. + + This property provides access to Pinecone's inference functionality, including embedding and reranking operations. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone(api_key="your-api-key") + + # Generate embeddings for text + embeddings = pc.inference.embed( + model="multilingual-e5-large", + inputs=["Disease prevention", "Immune system health"] + ) + + # Rerank documents based on query relevance + reranked = pc.inference.rerank( + model="bge-reranker-v2-m3", + query="Disease prevention", + documents=[ + "Rich in vitamin C and other antioxidants, apples contribute to immune health and may reduce the risk of chronic diseases.", + "The high fiber content in apples can also help regulate blood sugar levels, making them beneficial for diabetes management.", + "Apples are a popular fruit known for their sweetness and crisp texture.", + "Regular exercise and a balanced diet are key components of maintaining good health and preventing illness.", + ], + top_n=2, + rank_fields=["text"] + ) + """ if self._inference is None: from pinecone.inference import Inference @@ -278,6 +303,20 @@ def inference(self) -> "Inference": def db(self) -> "DBControl": """ DBControl is a namespace where an instance of the `pinecone.db_control.DBControl` class is lazily created and cached. + + This property provides access to database control operations such as managing indexes, collections, and backups. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone(api_key="your-api-key") + + # Access database control operations + indexes = pc.db.index.list() + collections = pc.db.collection.list() + backups = pc.db.backup.list() + """ if self._db_control is None: from pinecone.db_control import DBControl @@ -347,6 +386,116 @@ def create_index( vector_type: ("VectorType" | str) | None = "dense", tags: dict[str, str] | None = None, ) -> "IndexModel": + """Creates a Pinecone index. + + :param name: The name of the index to create. Must be unique within your project and + cannot be changed once created. Allowed characters are lowercase letters, numbers, + and hyphens and the name may not begin or end with hyphens. Maximum length is 45 characters. + :type name: str + :param metric: Type of similarity metric used in the vector index when querying, one of ``{"cosine", "dotproduct", "euclidean"}``. + :type metric: str, optional + :param spec: A dictionary containing configurations describing how the index should be deployed. For serverless indexes, + specify region and cloud. Optionally, you can specify ``read_capacity`` to configure dedicated read capacity mode + (OnDemand or Dedicated) and ``schema`` to configure which metadata fields are filterable. For pod indexes, specify + replicas, shards, pods, pod_type, metadata_config, and source_collection. + Alternatively, use the ``ServerlessSpec``, ``PodSpec``, or ``ByocSpec`` objects to specify these configurations. + :type spec: Dict + :param dimension: If you are creating an index with ``vector_type="dense"`` (which is the default), you need to specify ``dimension`` to indicate the size of your vectors. + This should match the dimension of the embeddings you will be inserting. For example, if you are using + OpenAI's CLIP model, you should use ``dimension=1536``. Dimension is a required field when + creating an index with ``vector_type="dense"`` and should not be passed when ``vector_type="sparse"``. + :type dimension: int + :type timeout: int, optional + :param timeout: Specify the number of seconds to wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; + if -1, return immediately and do not wait. + :param deletion_protection: If enabled, the index cannot be deleted. If disabled, the index can be deleted. + :type deletion_protection: Optional[Literal["enabled", "disabled"]] + :param vector_type: The type of vectors to be stored in the index. One of ``{"dense", "sparse"}``. + :type vector_type: str, optional + :param tags: Tags are key-value pairs you can attach to indexes to better understand, organize, and identify your resources. Some example use cases include tagging indexes with the name of the model that generated the embeddings, the date the index was created, or the purpose of the index. + :type tags: Optional[dict[str, str]] + :return: A ``IndexModel`` instance containing a description of the index that was created. + + Examples: + + .. code-block:: python + :caption: Creating a serverless index + + import os + from pinecone import ( + Pinecone, + ServerlessSpec, + CloudProvider, + AwsRegion, + Metric, + DeletionProtection, + VectorType + ) + + pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) + + pc.create_index( + name="my_index", + dimension=512, + metric=Metric.COSINE, + spec=ServerlessSpec( + cloud=CloudProvider.AWS, + region=AwsRegion.US_WEST_2, + read_capacity={ + "mode": "Dedicated", + "dedicated": { + "node_type": "t1", + "scaling": "Manual", + "manual": {"shards": 2, "replicas": 2}, + }, + }, + schema={ + "genre": {"filterable": True}, + "year": {"filterable": True}, + "rating": {"filterable": True}, + }, + ), + deletion_protection=DeletionProtection.DISABLED, + vector_type=VectorType.DENSE, + tags={ + "app": "movie-recommendations", + "env": "production" + } + ) + + .. code-block:: python + :caption: Creating a pod index + + import os + from pinecone import ( + Pinecone, + PodSpec, + PodIndexEnvironment, + PodType, + Metric, + DeletionProtection, + VectorType + ) + + pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) + + pc.create_index( + name="my_index", + dimension=1536, + metric=Metric.COSINE, + spec=PodSpec( + environment=PodIndexEnvironment.US_EAST4_GCP, + pod_type=PodType.P1_X1 + ), + deletion_protection=DeletionProtection.DISABLED, + tags={ + "model": "clip", + "app": "image-search", + "env": "testing" + } + ) + + """ return self.db.index.create( name=name, spec=spec, @@ -385,6 +534,114 @@ def create_index_for_model( | None = None, timeout: int | None = None, ) -> "IndexModel": + """Create a Serverless index configured for use with Pinecone's integrated inference models. + + :param name: The name of the index to create. Must be unique within your project and + cannot be changed once created. Allowed characters are lowercase letters, numbers, + and hyphens and the name may not begin or end with hyphens. Maximum length is 45 characters. + :type name: str + :param cloud: The cloud provider to use for the index. One of ``{"aws", "gcp", "azure"}``. + :type cloud: str + :param region: The region to use for the index. Enum objects ``AwsRegion``, ``GcpRegion``, and ``AzureRegion`` are also available to help you quickly set these parameters, but may not be up to date as new regions become available. + :type region: str + :param embed: The embedding configuration for the index. This param accepts a dictionary or an instance of the ``IndexEmbed`` object. + :type embed: Union[Dict, IndexEmbed] + :param tags: Tags are key-value pairs you can attach to indexes to better understand, organize, and identify your resources. Some example use cases include tagging indexes with the name of the model that generated the embeddings, the date the index was created, or the purpose of the index. + :type tags: Optional[dict[str, str]] + :param deletion_protection: If enabled, the index cannot be deleted. If disabled, the index can be deleted. This setting can be changed with ``configure_index``. + :type deletion_protection: Optional[Literal["enabled", "disabled"]] + :param read_capacity: Optional read capacity configuration. You can specify ``read_capacity`` to configure dedicated read capacity mode + (OnDemand or Dedicated). See ``ServerlessSpec`` documentation for details on read capacity configuration. + :type read_capacity: Optional[Union[ReadCapacityDict, ReadCapacity, ReadCapacityOnDemandSpec, ReadCapacityDedicatedSpec]] + :param schema: Optional metadata schema configuration. You can specify ``schema`` to configure which metadata fields are filterable. + The schema can be provided as a dictionary mapping field names to their configurations (e.g., ``{"genre": {"filterable": True}}``) + or as a dictionary with a ``fields`` key (e.g., ``{"fields": {"genre": {"filterable": True}}}``). + :type schema: Optional[Union[dict[str, MetadataSchemaFieldConfig], dict[str, dict[str, Any]], BackupModelSchema]] + :type timeout: Optional[int] + :param timeout: Specify the number of seconds to wait until index is ready to receive data. If None, wait indefinitely; if >=0, time out after this many seconds; + if -1, return immediately and do not wait. + :return: A description of the index that was created. + :rtype: IndexModel + + The resulting index can be described, listed, configured, and deleted like any other Pinecone index with the ``describe_index``, ``list_indexes``, ``configure_index``, and ``delete_index`` methods. + + After the model is created, you can upsert records into the index with the ``upsert_records`` method, and search your records with the ``search`` method. + + .. code-block:: python + + from pinecone import ( + Pinecone, + IndexEmbed, + CloudProvider, + AwsRegion, + EmbedModel, + Metric, + ) + + pc = Pinecone() + + if not pc.has_index("book-search"): + desc = pc.create_index_for_model( + name="book-search", + cloud=CloudProvider.AWS, + region=AwsRegion.US_EAST_1, + embed=IndexEmbed( + model=EmbedModel.Multilingual_E5_Large, + metric=Metric.COSINE, + field_map={ + "text": "description", + }, + ) + ) + + .. code-block:: python + :caption: Creating an index for model with schema and dedicated read capacity + + from pinecone import ( + Pinecone, + IndexEmbed, + CloudProvider, + AwsRegion, + EmbedModel, + Metric, + ) + + pc = Pinecone() + + if not pc.has_index("book-search"): + desc = pc.create_index_for_model( + name="book-search", + cloud=CloudProvider.AWS, + region=AwsRegion.US_EAST_1, + embed=IndexEmbed( + model=EmbedModel.Multilingual_E5_Large, + metric=Metric.COSINE, + field_map={ + "text": "description", + }, + ), + read_capacity={ + "mode": "Dedicated", + "dedicated": { + "node_type": "t1", + "scaling": "Manual", + "manual": {"shards": 2, "replicas": 2}, + }, + }, + schema={ + "genre": {"filterable": True}, + "year": {"filterable": True}, + "rating": {"filterable": True}, + }, + ) + + .. seealso:: + + Official docs on `available cloud regions `_ + + `Model Gallery `_ to learn about available models + + """ return self.db.index.create_for_model( name=name, cloud=cloud, @@ -407,6 +664,42 @@ def create_index_from_backup( tags: dict[str, str] | None = None, timeout: int | None = None, ) -> "IndexModel": + """Create an index from a backup. + + Call ``list_backups`` to get a list of backups for your project. + + :param name: The name of the index to create. + :type name: str + :param backup_id: The ID of the backup to restore. + :type backup_id: str + :param deletion_protection: If enabled, the index cannot be deleted. If disabled, the index can be deleted. This setting can be changed with ``configure_index``. + :type deletion_protection: Optional[Literal["enabled", "disabled"]] + :param tags: Tags are key-value pairs you can attach to indexes to better understand, organize, and identify your resources. Some example use cases include tagging indexes with the name of the model that generated the embeddings, the date the index was created, or the purpose of the index. + :type tags: Optional[dict[str, str]] + :param timeout: Specify the number of seconds to wait until index is ready to receive data. If None, wait indefinitely; if >=0, time out after this many seconds; + if -1, return immediately and do not wait. + :return: A description of the index that was created. + :rtype: IndexModel + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + # List available backups + backups = pc.list_backups() + if backups: + backup_id = backups[0].id + + # Create index from backup + index = pc.create_index_from_backup( + name="restored-index", + backup_id=backup_id, + deletion_protection="disabled" + ) + + """ return self.db.index.create_from_backup( name=name, backup_id=backup_id, @@ -416,15 +709,162 @@ def create_index_from_backup( ) def delete_index(self, name: str, timeout: int | None = None) -> None: + """Deletes a Pinecone index. + + :param name: the name of the index. + :type name: str + :param timeout: Number of seconds to poll status checking whether the index has been deleted. If None, + wait indefinitely; if >=0, time out after this many seconds; + if -1, return immediately and do not wait. + :type timeout: int, optional + + Deleting an index is an irreversible operation. All data in the index will be lost. + When you use this command, a request is sent to the Pinecone control plane to delete + the index, but the termination is not synchronous because resources take a few moments to + be released. + + By default the ``delete_index`` method will block until polling of the ``describe_index`` method + shows that the delete operation has completed. If you prefer to return immediately and not + wait for the index to be deleted, you can pass ``timeout=-1`` to the method. + + After the delete request is submitted, polling ``describe_index`` will show that the index + transitions into a ``Terminating`` state before eventually resulting in a 404 after it has been removed. + + This operation can fail if the index is configured with ``deletion_protection="enabled"``. + In this case, you will need to call ``configure_index`` to disable deletion protection before + you can delete the index. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + index_name = "my_index" + desc = pc.describe_index(name=index_name) + + if desc.deletion_protection == "enabled": + # If for some reason deletion protection is enabled, you will need to disable it first + # before you can delete the index. But use caution as this operation is not reversible + # and if somebody enabled deletion protection, they probably had a good reason. + pc.configure_index(name=index_name, deletion_protection="disabled") + + pc.delete_index(name=index_name) + + """ return self.db.index.delete(name=name, timeout=timeout) def list_indexes(self) -> "IndexList": + """Lists all indexes in your project. + + :return: Returns an ``IndexList`` object, which is iterable and contains a + list of ``IndexModel`` objects. The ``IndexList`` also has a convenience method ``names()`` + which returns a list of index names for situations where you just want to iterate over + all index names. + + The results include a description of all indexes in your project, including the + index name, dimension, metric, status, and spec. + + If you simply want to check whether an index exists, see the ``has_index()`` convenience method. + + You can use the ``list_indexes()`` method to iterate over descriptions of every index in your project. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + for index in pc.list_indexes(): + print(index.name) + print(index.dimension) + print(index.metric) + print(index.status) + print(index.host) + print(index.spec) + + """ return self.db.index.list() def describe_index(self, name: str) -> "IndexModel": + """Describes a Pinecone index. + + :param name: the name of the index to describe. + :return: Returns an ``IndexModel`` object + which gives access to properties such as the + index name, dimension, metric, host url, status, + and spec. + + **Getting your index host url** + + In a real production situation, you probably want to + store the host url in an environment variable so you + don't have to call describe_index and re-fetch it + every time you want to use the index. But this example + shows how to get the value from the API using describe_index. + + .. code-block:: python + + from pinecone import Pinecone, Index + + pc = Pinecone() + + index_name="my_index" + description = pc.describe_index(name=index_name) + print(description) + # { + # "name": "my_index", + # "metric": "cosine", + # "host": "my_index-dojoi3u.svc.aped-4627-b74a.pinecone.io", + # "spec": { + # "serverless": { + # "cloud": "aws", + # "region": "us-east-1" + # } + # }, + # "status": { + # "ready": true, + # "state": "Ready" + # }, + # "vector_type": "dense", + # "dimension": 1024, + # "deletion_protection": "enabled", + # "tags": { + # "environment": "production" + # } + # } + + print(f"Your index is hosted at {description.host}") + + index = pc.Index(host=description.host) + index.upsert(vectors=[...]) + + """ return self.db.index.describe(name=name) def has_index(self, name: str) -> bool: + """Checks if a Pinecone index exists. + + :param name: The name of the index to check for existence. + :return: Returns ``True`` if the index exists, ``False`` otherwise. + + .. code-block:: python + + from pinecone import Pinecone, ServerlessSpec + + pc = Pinecone() + + index_name = "my_index" + if not pc.has_index(index_name): + print("Index does not exist, creating...") + pc.create_index( + name=index_name, + dimension=768, + metric="cosine", + spec=ServerlessSpec(cloud="aws", region="us-west-2") + ) + + """ return self.db.index.has(name=name) def configure_index( @@ -443,6 +883,138 @@ def configure_index( ) | None = None, ) -> None: + """Modify an index's configuration. + + :param name: the name of the Index + :type name: str, required + :param replicas: the desired number of replicas, lowest value is 0. + :type replicas: int, optional + :param pod_type: the new ``pod_type`` for the index. To learn more about the + available pod types, please see `Understanding Indexes `_. + Note that pod type is only available for pod-based indexes. + :type pod_type: str or PodType, optional + :param deletion_protection: If set to ``'enabled'``, the index cannot be deleted. If ``'disabled'``, the index can be deleted. + :type deletion_protection: str or DeletionProtection, optional + :param tags: A dictionary of tags to apply to the index. Tags are key-value pairs that can be used to organize and manage indexes. To remove a tag, set the value to "". Tags passed to configure_index will be merged with existing tags and any with the value empty string will be removed. + :type tags: dict[str, str], optional + :param embed: configures the integrated inference embedding settings for the index. You can convert an existing index to an integrated index by specifying the embedding model and field_map. + The index vector type and dimension must match the model vector type and dimension, and the index similarity metric must be supported by the model. + You can later change the embedding configuration to update the field_map, read_parameters, or write_parameters. Once set, the model cannot be changed. + :type embed: Optional[Union[ConfigureIndexEmbed, Dict]], optional + :param read_capacity: Optional read capacity configuration for serverless indexes. You can specify ``read_capacity`` to configure dedicated read capacity mode + (OnDemand or Dedicated). See ``ServerlessSpec`` documentation for details on read capacity configuration. + Note that read capacity configuration is only available for serverless indexes. + :type read_capacity: Optional[Union[ReadCapacityDict, ReadCapacity, ReadCapacityOnDemandSpec, ReadCapacityDedicatedSpec]] + + This method is used to modify an index's configuration. It can be used to: + + * Configure read capacity for serverless indexes using ``read_capacity`` + * Scale a pod-based index horizontally using ``replicas`` + * Scale a pod-based index vertically using ``pod_type`` + * Enable or disable deletion protection using ``deletion_protection`` + * Add, change, or remove tags using ``tags`` + + **Configuring read capacity for serverless indexes** + + To configure read capacity for serverless indexes, pass the ``read_capacity`` parameter to the ``configure_index`` method. + You can configure either OnDemand or Dedicated read capacity mode. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + # Configure to OnDemand read capacity (default) + pc.configure_index( + name="my_index", + read_capacity={"mode": "OnDemand"} + ) + + # Configure to Dedicated read capacity with manual scaling + pc.configure_index( + name="my_index", + read_capacity={ + "mode": "Dedicated", + "dedicated": { + "node_type": "t1", + "scaling": "Manual", + "manual": {"shards": 1, "replicas": 1} + } + } + ) + + # Verify the configuration was applied + desc = pc.describe_index("my_index") + assert desc.spec.serverless.read_capacity.mode == "Dedicated" + + **Scaling pod-based indexes** + + To scale your pod-based index, you pass a ``replicas`` and/or ``pod_type`` param to the ``configure_index`` method. ``pod_type`` may be a string or a value from the ``PodType`` enum. + + .. code-block:: python + + from pinecone import Pinecone, PodType + + pc = Pinecone() + pc.configure_index( + name="my_index", + replicas=2, + pod_type=PodType.P1_X2 + ) + + After providing these new configurations, you must call ``describe_index`` to see the status of the index as the changes are applied. + + **Enabling or disabling deletion protection** + + To enable or disable deletion protection, pass the ``deletion_protection`` parameter to the ``configure_index`` method. When deletion protection + is enabled, the index cannot be deleted with the ``delete_index`` method. + + .. code-block:: python + + from pinecone import Pinecone, DeletionProtection + + pc = Pinecone() + + # Enable deletion protection + pc.configure_index( + name="my_index", + deletion_protection=DeletionProtection.ENABLED + ) + + # Call describe_index to see the change was applied. + assert pc.describe_index("my_index").deletion_protection == "enabled" + + # Disable deletion protection + pc.configure_index( + name="my_index", + deletion_protection=DeletionProtection.DISABLED + ) + + **Adding, changing, or removing tags** + + To add, change, or remove tags, pass the ``tags`` parameter to the ``configure_index`` method. When tags are passed using ``configure_index``, + they are merged with any existing tags already on the index. To remove a tag, set the value of the key to an empty string. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + # Add a tag + pc.configure_index(name="my_index", tags={"environment": "staging"}) + + # Change a tag + pc.configure_index(name="my_index", tags={"environment": "production"}) + + # Remove a tag + pc.configure_index(name="my_index", tags={"environment": ""}) + + # Call describe_index to view the tags are changed + print(pc.describe_index("my_index").tags) + + """ return self.db.index.configure( name=name, replicas=replicas, @@ -454,15 +1026,91 @@ def configure_index( ) def create_collection(self, name: str, source: str) -> None: + """Create a collection from a pod-based index. + + :param name: Name of the collection + :type name: str, required + :param source: Name of the source index + :type source: str, required + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + # Create a collection from an existing pod-based index + pc.create_collection(name="my_collection", source="my_index") + + """ return self.db.collection.create(name=name, source=source) def list_collections(self) -> "CollectionList": + """List all collections. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + for collection in pc.list_collections(): + print(collection.name) + print(collection.source) + + # You can also iterate specifically over the collection + # names with the .names() helper. + collection_name="my_collection" + for collection_name in pc.list_collections().names(): + print(collection_name) + + """ return self.db.collection.list() def delete_collection(self, name: str) -> None: + """Deletes a collection. + + :param str name: The name of the collection to delete. + + Deleting a collection is an irreversible operation. All data + in the collection will be lost. + + This method tells Pinecone you would like to delete a collection, + but it takes a few moments to complete the operation. Use the + ``describe_collection()`` method to confirm that the collection + has been deleted. + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + pc.delete_collection(name="my_collection") + + """ return self.db.collection.delete(name=name) def describe_collection(self, name: str) -> dict[str, Any]: + """Describes a collection. + + :param str name: The name of the collection + + :return: Description of the collection + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + description = pc.describe_collection("my_collection") + print(description.name) + print(description.source) + print(description.status) + print(description.size) + + """ from typing import cast result = self.db.collection.describe(name=name) @@ -472,6 +1120,31 @@ def describe_collection(self, name: str) -> dict[str, Any]: def create_backup( self, *, index_name: str, backup_name: str, description: str = "" ) -> "BackupModel": + """Create a backup of an index. + + :param index_name: The name of the index to backup. + :type index_name: str + :param backup_name: The name to give the backup. + :type backup_name: str + :param description: Optional description of the backup. + :type description: str, optional + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + # Create a backup of an index + backup = pc.create_backup( + index_name="my_index", + backup_name="my_backup", + description="Daily backup" + ) + + print(f"Backup created with ID: {backup.id}") + + """ return self.db.backup.create( index_name=index_name, backup_name=backup_name, description=description ) @@ -484,26 +1157,120 @@ def list_backups( limit: int | None = 10, pagination_token: str | None = None, ) -> "BackupList": + """List backups. + + If ``index_name`` is provided, the backups will be filtered by index. If no ``index_name`` is provided, all backups in the project will be returned. + + :param index_name: The name of the index to list backups for. + :type index_name: str, optional + :param limit: The maximum number of backups to return. + :type limit: int, optional + :param pagination_token: The pagination token to use for pagination. + :type pagination_token: str, optional + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + # List all backups + all_backups = pc.list_backups(limit=20) + + # List backups for a specific index + index_backups = pc.list_backups(index_name="my_index", limit=10) + + for backup in index_backups: + print(f"Backup: {backup.name}, Status: {backup.status}") + + """ return self.db.backup.list( index_name=index_name, limit=limit, pagination_token=pagination_token ) @require_kwargs def describe_backup(self, *, backup_id: str) -> "BackupModel": + """Describe a backup. + + :param backup_id: The ID of the backup to describe. + :type backup_id: str + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + backup = pc.describe_backup(backup_id="backup-123") + print(f"Backup: {backup.name}") + print(f"Status: {backup.status}") + print(f"Index: {backup.index_name}") + + """ return self.db.backup.describe(backup_id=backup_id) @require_kwargs def delete_backup(self, *, backup_id: str) -> None: + """Delete a backup. + + :param backup_id: The ID of the backup to delete. + :type backup_id: str + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + pc.delete_backup(backup_id="backup-123") + + """ return self.db.backup.delete(backup_id=backup_id) @require_kwargs def list_restore_jobs( self, *, limit: int | None = 10, pagination_token: str | None = None ) -> "RestoreJobList": + """List restore jobs. + + :param limit: The maximum number of restore jobs to return. + :type limit: int + :param pagination_token: The pagination token to use for pagination. + :type pagination_token: str + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + restore_jobs = pc.list_restore_jobs(limit=20) + + for job in restore_jobs: + print(f"Job ID: {job.id}, Status: {job.status}") + + """ return self.db.restore_job.list(limit=limit, pagination_token=pagination_token) @require_kwargs def describe_restore_job(self, *, job_id: str) -> "RestoreJobModel": + """Describe a restore job. + + :param job_id: The ID of the restore job to describe. + :type job_id: str + + .. code-block:: python + + from pinecone import Pinecone + + pc = Pinecone() + + job = pc.describe_restore_job(job_id="job-123") + print(f"Job ID: {job.id}") + print(f"Status: {job.status}") + print(f"Source backup: {job.backup_id}") + + """ return self.db.restore_job.describe(job_id=job_id) @staticmethod @@ -517,6 +1284,90 @@ def from_documents(*args: Any, **kwargs: Any) -> NoReturn: raise AttributeError(_build_langchain_attribute_error_message("from_documents")) def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": + """Target an index for data operations. + + :param name: The name of the index to target. If you specify the name of the index, the client will + fetch the host url from the Pinecone control plane. + :type name: str, optional + :param host: The host url of the index to target. If you specify the host url, the client will use + the host url directly without making any additional calls to the control plane. + :type host: str, optional + :param pool_threads: The number of threads to use when making parallel requests by calling index methods with optional kwarg async_req=True, or using methods that make use of thread-based parallelism automatically such as query_namespaces(). + :type pool_threads: int, optional + :param connection_pool_maxsize: The maximum number of connections to keep in the connection pool. + :type connection_pool_maxsize: int, optional + :return: An instance of the ``Index`` class. + + **Target an index by host url** + + In production situations, you want to upsert or query your data as quickly + as possible. If you know in advance the host url of your index, you can + eliminate a round trip to the Pinecone control plane by specifying the + host of the index. If instead you pass the name of the index, the client + will need to make an additional call to api.pinecone.io to get the host url + before any data operations can take place. + + .. code-block:: python + + import os + from pinecone import Pinecone + + api_key = os.environ.get("PINECONE_API_KEY") + index_host = os.environ.get("PINECONE_INDEX_HOST") + + pc = Pinecone(api_key=api_key) + index = pc.Index(host=index_host) + + # Now you're ready to perform data operations + index.query(vector=[...], top_k=10) + + To find your host url, you can use the describe_index method to call api.pinecone.io. + The host url is returned in the response. Or, alternatively, the + host is displayed in the Pinecone web console. + + .. code-block:: python + + import os + from pinecone import Pinecone + + pc = Pinecone( + api_key=os.environ.get("PINECONE_API_KEY") + ) + + host = pc.describe_index('index-name').host + + **Target an index by name (not recommended for production)** + + For more casual usage, such as when you are playing and exploring with Pinecone + in a notebook setting, you can also target an index by name. If you use this + approach, the client may need to perform an extra call to the Pinecone control + plane to get the host url on your behalf to get the index host. + + The client will cache the index host for future use whenever it is seen, so you + will only incur the overhead of only one call. But this approach is not + recommended for production usage because it introduces an unnecessary runtime + dependency on api.pinecone.io. + + .. code-block:: python + + import os + from pinecone import Pinecone, ServerlessSpec + + api_key = os.environ.get("PINECONE_API_KEY") + + pc = Pinecone(api_key=api_key) + pc.create_index( + name='my_index', + dimension=1536, + metric='cosine', + spec=ServerlessSpec(cloud='aws', region='us-west-2') + ) + index = pc.Index('my_index') + + # Now you're ready to perform data operations + index.query(vector=[...], top_k=10) + + """ from pinecone.db_data import _Index if name == "" and host == "": @@ -545,6 +1396,30 @@ def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": ) def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio": + """Build an asyncio-compatible Index object. + + :param host: The host url of the index to target. You can find this url in the Pinecone + web console or by calling describe_index method of ``Pinecone`` or ``PineconeAsyncio``. + :type host: str, required + + :return: An instance of the ``IndexAsyncio`` class. + + .. code-block:: python + + import asyncio + import os + from pinecone import Pinecone + + async def main(): + pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY")) + async with pc.IndexAsyncio(host=os.environ.get("PINECONE_INDEX_HOST")) as index: + await index.query(vector=[...], top_k=10) + + asyncio.run(main()) + + See more docs for ``PineconeAsyncio`` `here <./asyncio.html#db-data-plane>`_. + + """ from pinecone.db_data import _IndexAsyncio api_key = self._config.api_key