From 88d57bcf0d1d6da3ab42e131c48be053829f19e1 Mon Sep 17 00:00:00 2001 From: Neelpatel1604 Date: Tue, 12 May 2026 16:01:07 -0400 Subject: [PATCH] feat: add list_files endpoint to documents resource Implemented the `list_files` method in both synchronous and asynchronous document resources to retrieve raw file objects in storage for a specified namespace. Updated documentation and added corresponding tests to ensure functionality and error handling for non-existent namespaces. --- README.md | 1 + examples/10_list_files.py | 95 +++++++++++++++++++++++++ llms.txt | 6 +- moorcheh_sdk/resources/documents.py | 103 ++++++++++++++++++++++++++++ moorcheh_sdk/types/__init__.py | 4 ++ moorcheh_sdk/types/document.py | 17 +++++ tests/resources/test_documents.py | 44 ++++++++++++ tests/test_async_client.py | 49 +++++++++++++ 8 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 examples/10_list_files.py diff --git a/README.md b/README.md index 02ff745..926fd50 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,7 @@ The `MoorchehClient` and `AsyncMoorchehClient` classes provide the same method s | `documents.upload` | namespace_name, documents | Upload text documents to a text namespace. | | `documents.get` | namespace_name, ids | Retrieve documents by ID. | | `documents.upload_file` | namespace_name, file_path | Upload a file for server-side ingestion. | +| `documents.list_files` | namespace_name | List raw files in storage for a namespace. | | `documents.delete` | namespace_name, ids | Delete documents by ID. | | `documents.delete_files` | namespace_name, file_names | Delete uploaded files by filename. | | `vectors.upload` | namespace_name, vectors=[{id, vector}] | Upload vectors to a vector namespace. | diff --git a/examples/10_list_files.py b/examples/10_list_files.py new file mode 100644 index 0000000..db3c269 --- /dev/null +++ b/examples/10_list_files.py @@ -0,0 +1,95 @@ +# examples/10_list_files.py + +import json +import logging +import sys + +from moorcheh_sdk import ( + APIError, + AuthenticationError, + InvalidInputError, + MoorchehClient, + MoorchehError, + NamespaceNotFound, +) + +# --- Configure Logging --- +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +logger = logging.getLogger(__name__) +# ------------------------- + + +def main(): + """ + Example: list raw file objects in document storage for a namespace (GET + list-files). This is storage listing (e.g. after upload_file), not indexed + documents by ID — use documents.get or fetch_text_data for pipeline data. + """ + logger.info("--- Moorcheh SDK: List Files Example ---") + + try: + client = MoorchehClient() + logger.info("Client initialized successfully.") + except AuthenticationError as e: + logger.error(f"Authentication Error: {e}") + logger.error( + "Please ensure the MOORCHEH_API_KEY environment variable is set correctly." + ) + sys.exit(1) + except MoorchehError as e: + logger.error(f"Error initializing client: {e}", exc_info=True) + sys.exit(1) + + target_namespace = "test-documents" # Change this to your namespace name + + logger.info(f"Target namespace: {target_namespace}") + + try: + with client: + logger.info(f"Listing files in namespace '{target_namespace}'...") + response = client.documents.list_files(namespace_name=target_namespace) + + logger.info("--- API Response (200 OK) ---") + logger.info(json.dumps(response, indent=2)) + logger.info("-------------------------------") + + if response.get("success"): + count = response.get("file_count", 0) + files = response.get("files") or [] + logger.info(f"✅ Listed {count} file object(s).") + for f in files: + logger.info( + " %s | %s bytes | %s", + f.get("file_name"), + f.get("size"), + f.get("last_modified"), + ) + else: + logger.warning( + f"Unexpected response: success={response.get('success')!r}" + ) + + except NamespaceNotFound: + logger.error(f"Namespace '{target_namespace}' was not found.") + logger.info( + "Create a namespace first (see examples/01_create_namespace.py) " + "and upload a file (examples/07_upload_file.py)." + ) + except InvalidInputError as e: + logger.error(f"Invalid input: {e}") + except AuthenticationError as e: + logger.error(f"Authentication failed: {e}") + except APIError: + logger.exception("An API error occurred.") + except MoorchehError: + logger.exception("An SDK or network error occurred.") + except Exception: + logger.exception("An unexpected error occurred.") + + +if __name__ == "__main__": + main() diff --git a/llms.txt b/llms.txt index 9350d95..971b990 100644 --- a/llms.txt +++ b/llms.txt @@ -423,6 +423,10 @@ Moorcheh Python SDK. Use this file as guidance for LLMs and tooling when reading curl -X PUT "" \ -H "Content-Type: application/pdf" \ --data-binary "@/path/to/document.pdf" +- GET /namespaces/{namespace_name}/list-files (list raw file objects in storage) + - Lists file_name, size (bytes), last_modified for each object under the namespace prefix. + - No query parameters or body. + - Common responses: 200, 400, 401, 404, 429. - DELETE /namespaces/{namespace_name}/delete-file (delete file(s)) - At least one of: body fileName, body fileNames, or query fileName. - Body: @@ -650,7 +654,7 @@ Moorcheh Python SDK. Use this file as guidance for LLMs and tooling when reading - documents.delete(namespace_name, ids) - Deletes by ID (max 1000). - Raises NamespaceNotFound, InvalidInputError. -- File uploads: use the REST upload-url and delete-file endpoints with the pre-signed S3 flow. +- File uploads: use the REST upload-url and delete-file endpoints with the pre-signed S3 flow; use GET list-files to list raw objects in storage. ### Vectors - vectors.upload(namespace_name, vectors) diff --git a/moorcheh_sdk/resources/documents.py b/moorcheh_sdk/resources/documents.py index 4cc9dd1..9c23ce9 100644 --- a/moorcheh_sdk/resources/documents.py +++ b/moorcheh_sdk/resources/documents.py @@ -19,6 +19,7 @@ FetchTextDataResponse, FileDeleteResponse, FileUploadResponse, + ListFilesResponse, ) from ..utils.batching import chunk_iterable from ..utils.constants import INVALID_ID_CHARS @@ -544,6 +545,57 @@ def upload_file( if should_close and hasattr(file_obj, "close"): file_obj.close() + @required_args(["namespace_name"], types={"namespace_name": str}) + def list_files(self, namespace_name: str) -> ListFilesResponse: + """ + Lists raw file objects in document storage for a namespace. + + This returns objects uploaded via the pre-signed upload flow (for example + ``upload_file``). It is distinct from ``get``, which returns indexed text + documents by ID from the text pipeline. + + Args: + namespace_name: The name of the target namespace. + + Returns: + A dictionary with ``success``, ``namespace``, ``file_count``, and + ``files`` (each file has ``file_name``, ``size``, ``last_modified``). + + Raises: + InvalidInputError: If the API returns 400. + NamespaceNotFound: If the namespace does not exist (404). + AuthenticationError: If authentication fails (401/403). + APIError: For other API errors. + MoorchehError: For network issues. + + Example: + >>> client = MoorchehClient() + >>> response = client.documents.list_files(namespace_name="my-docs") + >>> print(response["file_count"], response["files"]) + """ + logger.info(f"Listing files in namespace '{namespace_name}'...") + + endpoint = f"/namespaces/{namespace_name}/list-files" + + response_data = self._client._request( + method="GET", + endpoint=endpoint, + json_data=None, + expected_status=200, + ) + + if not isinstance(response_data, dict): + logger.error("List files response was not a dictionary.") + raise APIError( + message="Unexpected response format from list files endpoint." + ) + + logger.info( + f"List files for '{namespace_name}' completed. Count:" + f" {response_data.get('file_count', 'unknown')}" + ) + return cast(ListFilesResponse, response_data) + @required_args( ["namespace_name", "file_names"], types={"namespace_name": str, "file_names": list}, @@ -1125,6 +1177,57 @@ async def upload_file( if should_close and hasattr(file_obj, "close"): file_obj.close() + @required_args(["namespace_name"], types={"namespace_name": str}) + async def list_files(self, namespace_name: str) -> ListFilesResponse: + """ + Lists raw file objects in document storage for a namespace (async). + + This returns objects uploaded via the pre-signed upload flow (for example + ``upload_file``). It is distinct from ``get``, which returns indexed text + documents by ID from the text pipeline. + + Args: + namespace_name: The name of the target namespace. + + Returns: + A dictionary with ``success``, ``namespace``, ``file_count``, and + ``files`` (each file has ``file_name``, ``size``, ``last_modified``). + + Raises: + InvalidInputError: If the API returns 400. + NamespaceNotFound: If the namespace does not exist (404). + AuthenticationError: If authentication fails (401/403). + APIError: For other API errors. + MoorchehError: For network issues. + + Example: + >>> client = AsyncMoorchehClient() + >>> response = await client.documents.list_files(namespace_name="my-docs") + >>> print(response["file_count"]) + """ + logger.info(f"Listing files in namespace '{namespace_name}'...") + + endpoint = f"/namespaces/{namespace_name}/list-files" + + response_data = await self._client._request( + method="GET", + endpoint=endpoint, + json_data=None, + expected_status=200, + ) + + if not isinstance(response_data, dict): + logger.error("List files response was not a dictionary.") + raise APIError( + message="Unexpected response format from list files endpoint." + ) + + logger.info( + f"List files for '{namespace_name}' completed. Count:" + f" {response_data.get('file_count', 'unknown')}" + ) + return cast(ListFilesResponse, response_data) + @required_args( ["namespace_name", "file_names"], types={"namespace_name": str, "file_names": list}, diff --git a/moorcheh_sdk/types/__init__.py b/moorcheh_sdk/types/__init__.py index fbd6866..44931a8 100644 --- a/moorcheh_sdk/types/__init__.py +++ b/moorcheh_sdk/types/__init__.py @@ -13,7 +13,9 @@ FetchTextDataResponse, FileDeleteResponse, FileDeleteResult, + FileListItem, FileUploadResponse, + ListFilesResponse, TextDataItem, TextDataStatistics, ) @@ -48,7 +50,9 @@ "TextDataStatistics", "FileDeleteResponse", "FileDeleteResult", + "FileListItem", "FileUploadResponse", + "ListFilesResponse", "Vector", "VectorUploadResponse", "VectorDeleteResponse", diff --git a/moorcheh_sdk/types/document.py b/moorcheh_sdk/types/document.py index 99cd317..28d9737 100644 --- a/moorcheh_sdk/types/document.py +++ b/moorcheh_sdk/types/document.py @@ -71,3 +71,20 @@ class FileDeleteResponse(TypedDict): message: str namespace: str results: list[FileDeleteResult] + + +class FileListItem(TypedDict): + """One object from ``GET .../list-files`` (raw storage listing).""" + + file_name: str + size: int + last_modified: str + + +class ListFilesResponse(TypedDict): + """Response from ``GET .../namespaces/{namespace}/list-files``.""" + + success: bool + namespace: str + file_count: int + files: list[FileListItem] diff --git a/tests/resources/test_documents.py b/tests/resources/test_documents.py index 5b8362e..cae8663 100644 --- a/tests/resources/test_documents.py +++ b/tests/resources/test_documents.py @@ -511,6 +511,50 @@ def test_upload_file_invalid_input_error(client, mocker, mock_response, tmp_path ) +def test_list_files_success(client, mocker, mock_response): + """Test GET list-files (raw objects in document storage).""" + expected_response = { + "success": True, + "namespace": TEST_NAMESPACE, + "file_count": 2, + "files": [ + { + "file_name": "report.pdf", + "size": 245678, + "last_modified": "2026-05-10T14:22:11.000Z", + }, + { + "file_name": "notes.txt", + "size": 1204, + "last_modified": "2026-05-09T09:01:00.000Z", + }, + ], + } + mock_resp = mock_response(200, json_data=expected_response) + client._mock_httpx_instance.request.return_value = mock_resp + + result = client.documents.list_files(namespace_name=TEST_NAMESPACE) + + client._mock_httpx_instance.request.assert_called_once_with( + method="GET", + url=f"/namespaces/{TEST_NAMESPACE}/list-files", + json=None, + params=None, + ) + assert result == expected_response + + +def test_list_files_namespace_not_found(client, mocker, mock_response): + """Test list_files when namespace is missing.""" + error_text = f"Namespace '{TEST_NAMESPACE}' not found." + mock_resp = mock_response(404, text_data=error_text) + client._mock_httpx_instance.request.return_value = mock_resp + + with pytest.raises(NamespaceNotFound, match=error_text): + client.documents.list_files(namespace_name=TEST_NAMESPACE) + client._mock_httpx_instance.request.assert_called_once() + + def test_delete_files_success_200(client, mocker, mock_response): """Test successful deletion of files.""" file_names = ["document.pdf", "report.docx"] diff --git a/tests/test_async_client.py b/tests/test_async_client.py index c44f6c4..449d23a 100644 --- a/tests/test_async_client.py +++ b/tests/test_async_client.py @@ -464,6 +464,55 @@ async def test_upload_file_api_error(client, tmp_path): assert mock_request.call_args.kwargs["path"] == "/namespaces/test/upload-url" +@pytest.mark.asyncio +async def test_list_files_success(client): + """Test successful async list_files.""" + expected_response = { + "success": True, + "namespace": "test", + "file_count": 1, + "files": [ + { + "file_name": "report.pdf", + "size": 100, + "last_modified": "2026-05-10T14:22:11.000Z", + } + ], + } + + with patch.object(client, "request", new_callable=AsyncMock) as mock_request: + mock_request.return_value = MagicMock( + status_code=200, json=lambda: expected_response + ) + + response = await client.documents.list_files(namespace_name="test") + + assert response == expected_response + mock_request.assert_called_once() + kwargs = mock_request.call_args.kwargs + assert kwargs["method"] == "GET" + assert kwargs["path"] == "/namespaces/test/list-files" + assert kwargs["json"] is None + + +@pytest.mark.asyncio +async def test_list_files_namespace_not_found(client): + """Test async list_files against a non-existent namespace.""" + error_text = "Namespace 'test' not found." + + with patch.object(client, "request", new_callable=AsyncMock) as mock_request: + mock_response_obj = MagicMock() + mock_response_obj.status_code = 404 + mock_response_obj.text = error_text + mock_response_obj.json.side_effect = Exception("Cannot decode JSON") + mock_request.return_value = mock_response_obj + + with pytest.raises(NamespaceNotFound, match=error_text): + await client.documents.list_files(namespace_name="test") + mock_request.assert_called_once() + assert mock_request.call_args.kwargs["path"] == "/namespaces/test/list-files" + + @pytest.mark.asyncio async def test_delete_files_success(client): """Test successful async deletion of files."""