Skip to content

Commit fef0ce6

Browse files
committed
basic upsert
upsert working? temp routing name fetch list type name inline request body for upsert upsert request body _id magic . Simple HTTP impl update route name updates
1 parent f66b42b commit fef0ce6

3 files changed

Lines changed: 248 additions & 0 deletions

File tree

pinecone/pinecone.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
if TYPE_CHECKING:
1717
from pinecone.config import Config, OpenApiConfiguration
1818
from pinecone.db_data import _Index as Index, _IndexAsyncio as IndexAsyncio
19+
from pinecone.repository.data import _Repository as Repository
1920
from pinecone.db_control.index_host_store import IndexHostStore
2021
from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi
2122
from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict, ConfigureIndexEmbed
@@ -518,6 +519,35 @@ def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio":
518519
**kwargs,
519520
)
520521

522+
def Repository(self, name: str = "", host: str = "", **kwargs) -> "Repository":
523+
from pinecone.repository.data import _Repository
524+
525+
if name == "" and host == "":
526+
raise ValueError("Either name or host must be specified")
527+
528+
pt = kwargs.pop("pool_threads", None) or self._pool_threads
529+
api_key = self._config.api_key
530+
openapi_config = self._openapi_config
531+
532+
if host != "":
533+
check_realistic_host(host)
534+
535+
# Use host url if it is provided
536+
repository_host = normalize_host(host)
537+
else:
538+
# TODO, get host url from describe_kb using the index name
539+
# index_host = self.db.index._get_host(name)
540+
raise ValueError("host lookup not yet supported, specify host parameter")
541+
542+
return _Repository(
543+
host=repository_host,
544+
api_key=api_key,
545+
pool_threads=pt,
546+
openapi_config=openapi_config,
547+
source_tag=self.config.source_tag,
548+
**kwargs,
549+
)
550+
521551

522552
def check_realistic_host(host: str) -> None:
523553
""":meta private:
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from .repository import Repository
2+
3+
4+
_Repository = Repository # alias for backwards compatibility
5+
6+
7+
__all__ = ["_Repository"]
8+
9+
10+
def __getattr__(name):
11+
if name in locals():
12+
return locals()[name]
13+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
import logging
2+
import json
3+
from typing import Optional, Dict, Any, Tuple
4+
from urllib.parse import urljoin
5+
6+
import requests
7+
from requests.adapters import HTTPAdapter
8+
from urllib3.util.retry import Retry
9+
from multiprocessing import cpu_count
10+
from pinecone.core.openapi.repository_data import API_VERSION
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def _ensure_https_host(host: str) -> str:
16+
"""
17+
Normalizes the host value to include scheme and no trailing slash.
18+
Accepts: "kb.example.com", "https://kb.example.com/", "http://..."
19+
Returns: "https://kb.example.com"
20+
"""
21+
host = (host or "").strip()
22+
if not host:
23+
raise ValueError("host must be provided (e.g., 'kb.your-company.com').")
24+
if not host.startswith(("http://", "https://")):
25+
host = "https://" + host
26+
# strip single trailing slash
27+
if host.endswith("/"):
28+
host = host[:-1]
29+
return host
30+
31+
32+
class HTTPError(Exception):
33+
"""Rich HTTP error including status code and server payload (if any)."""
34+
35+
def __init__(self, status_code: int, message: str, payload: Optional[dict] = None):
36+
super().__init__(f"{status_code}: {message}")
37+
self.status_code = status_code
38+
self.payload = payload or {}
39+
40+
41+
class Repository:
42+
"""
43+
A client for interacting with the Pinecone Knowledge Base Data Plane (Documents).
44+
Uses `requests` directly, with retries and sane defaults.
45+
46+
Methods return plain `dict` responses parsed from JSON.
47+
"""
48+
49+
def __init__(
50+
self,
51+
api_key: str,
52+
host: str,
53+
pool_threads: Optional[int] = None,
54+
additional_headers: Optional[Dict[str, str]] = None,
55+
openapi_config=None, # kept for backward compat; unused
56+
echo: bool = False,
57+
**kwargs,
58+
):
59+
self._api_key = api_key
60+
self._base_url = _ensure_https_host(host)
61+
self._echo = echo # store the flag
62+
63+
# Connection pool sizing
64+
self._pool_threads = 5 * cpu_count() if pool_threads is None else pool_threads
65+
pool_maxsize = kwargs.get("connection_pool_maxsize", self._pool_threads)
66+
67+
# Timeouts (connect, read). Allow overrides via kwargs
68+
# e.g., timeout=(3.05, 30)
69+
self._timeout: Tuple[float, float] = kwargs.get("timeout", (5.0, 60.0))
70+
71+
# Retries: conservative defaults; override via kwargs["retries"]
72+
retries = kwargs.get(
73+
"retries",
74+
Retry(
75+
total=5,
76+
backoff_factor=0.5,
77+
status_forcelist=(429, 500, 502, 503, 504),
78+
allowed_methods=frozenset(["GET", "POST", "DELETE"]),
79+
raise_on_status=False,
80+
),
81+
)
82+
83+
self._session = requests.Session()
84+
adapter = HTTPAdapter(
85+
pool_connections=self._pool_threads, pool_maxsize=pool_maxsize, max_retries=retries
86+
)
87+
self._session.mount("https://", adapter)
88+
self._session.mount("http://", adapter)
89+
90+
self._default_headers = {
91+
"Api-Key": self._api_key,
92+
"Accept": "application/json",
93+
"x-pinecone-api-version": API_VERSION,
94+
# Content-Type set per request when needed
95+
}
96+
if additional_headers:
97+
self._default_headers.update(additional_headers)
98+
99+
# -----------------------
100+
# Internal request helper
101+
# -----------------------
102+
def _request(
103+
self,
104+
method: str,
105+
path: str,
106+
*,
107+
json_body: Optional[dict] = None,
108+
headers: Optional[dict] = None,
109+
params: Optional[dict] = None,
110+
echo: Optional[bool] = None,
111+
) -> dict:
112+
url = urljoin(self._base_url + "/", path.lstrip("/"))
113+
hdrs = dict(self._default_headers)
114+
if headers:
115+
hdrs.update(headers)
116+
if json_body is not None:
117+
hdrs.setdefault("Content-Type", "application/json")
118+
119+
logger.debug("HTTP %s %s params=%s json=%s", method, url, params, json_body)
120+
121+
# decide whether to echo this call
122+
do_echo = self._echo if echo is None else echo
123+
if do_echo:
124+
print("----- HTTP Request -----")
125+
print(f"{method} {url}")
126+
if params:
127+
print("Params:", params)
128+
129+
safe_headers = dict(hdrs)
130+
for k, v in hdrs.items():
131+
print(f"checking........... {k}: {v}")
132+
if k.lower() == "api-key":
133+
masked = (v[:5] + "...") if isinstance(v, str) and len(v) > 5 else "..."
134+
safe_headers[k] = masked
135+
else:
136+
safe_headers[k] = v
137+
138+
print("Headers:", safe_headers)
139+
if json_body is not None:
140+
print("Body:", json.dumps(json_body, indent=2))
141+
print("------------------------")
142+
143+
resp = self._session.request(
144+
method=method,
145+
url=url,
146+
headers=hdrs,
147+
params=params,
148+
json=json_body,
149+
timeout=self._timeout,
150+
)
151+
152+
# Try to parse JSON payload (even on errors) for better messages
153+
payload: Optional[dict]
154+
try:
155+
payload = resp.json() if resp.content else None
156+
except json.JSONDecodeError:
157+
payload = None
158+
159+
if not (200 <= resp.status_code < 300):
160+
msg = payload.get("message") if isinstance(payload, dict) else resp.text
161+
raise HTTPError(resp.status_code, msg or "HTTP request failed", payload)
162+
163+
if payload is None:
164+
return {}
165+
return payload
166+
167+
# -------------
168+
# API methods
169+
# -------------
170+
def upsert(self, namespace: str, document: Dict[str, Any], **kwargs) -> dict:
171+
"""
172+
POST /knowledge-bases//namespaces/{namespace}/documents/upsert
173+
Returns UpsertDocumentResponse as dict.
174+
"""
175+
if not isinstance(document, dict):
176+
raise TypeError("document must be a dict (JSON-serializable).")
177+
178+
path = f"/knowledge-bases/{namespace}/documents/upsert"
179+
return self._request("POST", path, json_body=document, **kwargs)
180+
181+
def fetch(self, namespace: str, document_id: str, **kwargs) -> dict:
182+
"""
183+
GET /knowledge-bases/{namespace}/documents/{document_id}
184+
Returns GetDocumentResponse as dict.
185+
"""
186+
path = f"/knowledge-bases/{namespace}/documents/{document_id}"
187+
return self._request("GET", path, **kwargs)
188+
189+
def list(self, namespace: str, **kwargs) -> dict:
190+
"""
191+
GET /knowledge-bases/{namespace}/documents
192+
Returns ListDocumentsResponse as dict.
193+
"""
194+
path = f"/knowledge-bases/{namespace}/documents"
195+
# Spec does not define query params, but keep hook if server adds (e.g., pagination).
196+
params = kwargs.get("params")
197+
return self._request("GET", path, params=params, **kwargs)
198+
199+
def delete(self, namespace: str, document_id: str, **kwargs) -> dict:
200+
"""
201+
DELETE /knowledge-bases/{namespace}/documents/{document_id}
202+
Returns DeleteDocumentResponse as dict.
203+
"""
204+
path = f"/knowledge-bases/{namespace}/documents/{document_id}"
205+
return self._request("DELETE", path, **kwargs)

0 commit comments

Comments
 (0)