Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ The `MoorchehClient` and `AsyncMoorchehClient` classes provide the same method s
| `documents.upload` | namespace_name, documents | Upload text documents to a text namespace. |
| `documents.get` | namespace_name, ids | Retrieve documents by ID. |
| `documents.upload_file` | namespace_name, file_path | Upload a file for server-side ingestion. |
| `documents.list_files` | namespace_name | List raw files in storage for a namespace. |
| `documents.delete` | namespace_name, ids | Delete documents by ID. |
| `documents.delete_files` | namespace_name, file_names | Delete uploaded files by filename. |
| `vectors.upload` | namespace_name, vectors=[{id, vector}] | Upload vectors to a vector namespace. |
Expand Down
95 changes: 95 additions & 0 deletions examples/10_list_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# examples/10_list_files.py

import json
import logging
import sys

from moorcheh_sdk import (
APIError,
AuthenticationError,
InvalidInputError,
MoorchehClient,
MoorchehError,
NamespaceNotFound,
)

# --- Configure Logging ---
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
# -------------------------


def main():
"""
Example: list raw file objects in document storage for a namespace (GET
list-files). This is storage listing (e.g. after upload_file), not indexed
documents by ID — use documents.get or fetch_text_data for pipeline data.
"""
logger.info("--- Moorcheh SDK: List Files Example ---")

try:
client = MoorchehClient()
logger.info("Client initialized successfully.")
except AuthenticationError as e:
logger.error(f"Authentication Error: {e}")
logger.error(
"Please ensure the MOORCHEH_API_KEY environment variable is set correctly."
)
sys.exit(1)
except MoorchehError as e:
logger.error(f"Error initializing client: {e}", exc_info=True)
sys.exit(1)

target_namespace = "test-documents" # Change this to your namespace name

logger.info(f"Target namespace: {target_namespace}")

try:
with client:
logger.info(f"Listing files in namespace '{target_namespace}'...")
response = client.documents.list_files(namespace_name=target_namespace)

logger.info("--- API Response (200 OK) ---")
logger.info(json.dumps(response, indent=2))
logger.info("-------------------------------")

if response.get("success"):
count = response.get("file_count", 0)
files = response.get("files") or []
logger.info(f"✅ Listed {count} file object(s).")
for f in files:
logger.info(
" %s | %s bytes | %s",
f.get("file_name"),
f.get("size"),
f.get("last_modified"),
)
else:
logger.warning(
f"Unexpected response: success={response.get('success')!r}"
)

except NamespaceNotFound:
logger.error(f"Namespace '{target_namespace}' was not found.")
logger.info(
"Create a namespace first (see examples/01_create_namespace.py) "
"and upload a file (examples/07_upload_file.py)."
)
except InvalidInputError as e:
logger.error(f"Invalid input: {e}")
except AuthenticationError as e:
logger.error(f"Authentication failed: {e}")
except APIError:
logger.exception("An API error occurred.")
except MoorchehError:
logger.exception("An SDK or network error occurred.")
except Exception:
logger.exception("An unexpected error occurred.")


if __name__ == "__main__":
main()
6 changes: 5 additions & 1 deletion llms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,10 @@ Moorcheh Python SDK. Use this file as guidance for LLMs and tooling when reading
curl -X PUT "<uploadUrl>" \
-H "Content-Type: application/pdf" \
--data-binary "@/path/to/document.pdf"
- GET /namespaces/{namespace_name}/list-files (list raw file objects in storage)
- Lists file_name, size (bytes), last_modified for each object under the namespace prefix.
- No query parameters or body.
- Common responses: 200, 400, 401, 404, 429.
- DELETE /namespaces/{namespace_name}/delete-file (delete file(s))
- At least one of: body fileName, body fileNames, or query fileName.
- Body:
Expand Down Expand Up @@ -650,7 +654,7 @@ Moorcheh Python SDK. Use this file as guidance for LLMs and tooling when reading
- documents.delete(namespace_name, ids)
- Deletes by ID (max 1000).
- Raises NamespaceNotFound, InvalidInputError.
- File uploads: use the REST upload-url and delete-file endpoints with the pre-signed S3 flow.
- File uploads: use the REST upload-url and delete-file endpoints with the pre-signed S3 flow; use GET list-files to list raw objects in storage.

### Vectors
- vectors.upload(namespace_name, vectors)
Expand Down
103 changes: 103 additions & 0 deletions moorcheh_sdk/resources/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
FetchTextDataResponse,
FileDeleteResponse,
FileUploadResponse,
ListFilesResponse,
)
from ..utils.batching import chunk_iterable
from ..utils.constants import INVALID_ID_CHARS
Expand Down Expand Up @@ -544,6 +545,57 @@ def upload_file(
if should_close and hasattr(file_obj, "close"):
file_obj.close()

@required_args(["namespace_name"], types={"namespace_name": str})
def list_files(self, namespace_name: str) -> ListFilesResponse:
"""
Lists raw file objects in document storage for a namespace.

This returns objects uploaded via the pre-signed upload flow (for example
``upload_file``). It is distinct from ``get``, which returns indexed text
documents by ID from the text pipeline.

Args:
namespace_name: The name of the target namespace.

Returns:
A dictionary with ``success``, ``namespace``, ``file_count``, and
``files`` (each file has ``file_name``, ``size``, ``last_modified``).

Raises:
InvalidInputError: If the API returns 400.
NamespaceNotFound: If the namespace does not exist (404).
AuthenticationError: If authentication fails (401/403).
APIError: For other API errors.
MoorchehError: For network issues.

Example:
>>> client = MoorchehClient()
>>> response = client.documents.list_files(namespace_name="my-docs")
>>> print(response["file_count"], response["files"])
"""
logger.info(f"Listing files in namespace '{namespace_name}'...")

endpoint = f"/namespaces/{namespace_name}/list-files"

response_data = self._client._request(
method="GET",
endpoint=endpoint,
json_data=None,
expected_status=200,
)

if not isinstance(response_data, dict):
logger.error("List files response was not a dictionary.")
raise APIError(
message="Unexpected response format from list files endpoint."
)

logger.info(
f"List files for '{namespace_name}' completed. Count:"
f" {response_data.get('file_count', 'unknown')}"
)
return cast(ListFilesResponse, response_data)

@required_args(
["namespace_name", "file_names"],
types={"namespace_name": str, "file_names": list},
Expand Down Expand Up @@ -1125,6 +1177,57 @@ async def upload_file(
if should_close and hasattr(file_obj, "close"):
file_obj.close()

@required_args(["namespace_name"], types={"namespace_name": str})
async def list_files(self, namespace_name: str) -> ListFilesResponse:
"""
Lists raw file objects in document storage for a namespace (async).

This returns objects uploaded via the pre-signed upload flow (for example
``upload_file``). It is distinct from ``get``, which returns indexed text
documents by ID from the text pipeline.

Args:
namespace_name: The name of the target namespace.

Returns:
A dictionary with ``success``, ``namespace``, ``file_count``, and
``files`` (each file has ``file_name``, ``size``, ``last_modified``).

Raises:
InvalidInputError: If the API returns 400.
NamespaceNotFound: If the namespace does not exist (404).
AuthenticationError: If authentication fails (401/403).
APIError: For other API errors.
MoorchehError: For network issues.

Example:
>>> client = AsyncMoorchehClient()
>>> response = await client.documents.list_files(namespace_name="my-docs")
>>> print(response["file_count"])
"""
logger.info(f"Listing files in namespace '{namespace_name}'...")

endpoint = f"/namespaces/{namespace_name}/list-files"

response_data = await self._client._request(
method="GET",
endpoint=endpoint,
json_data=None,
expected_status=200,
)

if not isinstance(response_data, dict):
logger.error("List files response was not a dictionary.")
raise APIError(
message="Unexpected response format from list files endpoint."
)

logger.info(
f"List files for '{namespace_name}' completed. Count:"
f" {response_data.get('file_count', 'unknown')}"
)
return cast(ListFilesResponse, response_data)

@required_args(
["namespace_name", "file_names"],
types={"namespace_name": str, "file_names": list},
Expand Down
4 changes: 4 additions & 0 deletions moorcheh_sdk/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
FetchTextDataResponse,
FileDeleteResponse,
FileDeleteResult,
FileListItem,
FileUploadResponse,
ListFilesResponse,
TextDataItem,
TextDataStatistics,
)
Expand Down Expand Up @@ -48,7 +50,9 @@
"TextDataStatistics",
"FileDeleteResponse",
"FileDeleteResult",
"FileListItem",
"FileUploadResponse",
"ListFilesResponse",
"Vector",
"VectorUploadResponse",
"VectorDeleteResponse",
Expand Down
17 changes: 17 additions & 0 deletions moorcheh_sdk/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,20 @@ class FileDeleteResponse(TypedDict):
message: str
namespace: str
results: list[FileDeleteResult]


class FileListItem(TypedDict):
"""One object from ``GET .../list-files`` (raw storage listing)."""

file_name: str
size: int
last_modified: str


class ListFilesResponse(TypedDict):
"""Response from ``GET .../namespaces/{namespace}/list-files``."""

success: bool
namespace: str
file_count: int
files: list[FileListItem]
44 changes: 44 additions & 0 deletions tests/resources/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,50 @@ def test_upload_file_invalid_input_error(client, mocker, mock_response, tmp_path
)


def test_list_files_success(client, mocker, mock_response):
"""Test GET list-files (raw objects in document storage)."""
expected_response = {
"success": True,
"namespace": TEST_NAMESPACE,
"file_count": 2,
"files": [
{
"file_name": "report.pdf",
"size": 245678,
"last_modified": "2026-05-10T14:22:11.000Z",
},
{
"file_name": "notes.txt",
"size": 1204,
"last_modified": "2026-05-09T09:01:00.000Z",
},
],
}
mock_resp = mock_response(200, json_data=expected_response)
client._mock_httpx_instance.request.return_value = mock_resp

result = client.documents.list_files(namespace_name=TEST_NAMESPACE)

client._mock_httpx_instance.request.assert_called_once_with(
method="GET",
url=f"/namespaces/{TEST_NAMESPACE}/list-files",
json=None,
params=None,
)
assert result == expected_response


def test_list_files_namespace_not_found(client, mocker, mock_response):
"""Test list_files when namespace is missing."""
error_text = f"Namespace '{TEST_NAMESPACE}' not found."
mock_resp = mock_response(404, text_data=error_text)
client._mock_httpx_instance.request.return_value = mock_resp

with pytest.raises(NamespaceNotFound, match=error_text):
client.documents.list_files(namespace_name=TEST_NAMESPACE)
client._mock_httpx_instance.request.assert_called_once()


def test_delete_files_success_200(client, mocker, mock_response):
"""Test successful deletion of files."""
file_names = ["document.pdf", "report.docx"]
Expand Down
49 changes: 49 additions & 0 deletions tests/test_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,55 @@ async def test_upload_file_api_error(client, tmp_path):
assert mock_request.call_args.kwargs["path"] == "/namespaces/test/upload-url"


@pytest.mark.asyncio
async def test_list_files_success(client):
"""Test successful async list_files."""
expected_response = {
"success": True,
"namespace": "test",
"file_count": 1,
"files": [
{
"file_name": "report.pdf",
"size": 100,
"last_modified": "2026-05-10T14:22:11.000Z",
}
],
}

with patch.object(client, "request", new_callable=AsyncMock) as mock_request:
mock_request.return_value = MagicMock(
status_code=200, json=lambda: expected_response
)

response = await client.documents.list_files(namespace_name="test")

assert response == expected_response
mock_request.assert_called_once()
kwargs = mock_request.call_args.kwargs
assert kwargs["method"] == "GET"
assert kwargs["path"] == "/namespaces/test/list-files"
assert kwargs["json"] is None


@pytest.mark.asyncio
async def test_list_files_namespace_not_found(client):
"""Test async list_files against a non-existent namespace."""
error_text = "Namespace 'test' not found."

with patch.object(client, "request", new_callable=AsyncMock) as mock_request:
mock_response_obj = MagicMock()
mock_response_obj.status_code = 404
mock_response_obj.text = error_text
mock_response_obj.json.side_effect = Exception("Cannot decode JSON")
mock_request.return_value = mock_response_obj

with pytest.raises(NamespaceNotFound, match=error_text):
await client.documents.list_files(namespace_name="test")
mock_request.assert_called_once()
assert mock_request.call_args.kwargs["path"] == "/namespaces/test/list-files"


@pytest.mark.asyncio
async def test_delete_files_success(client):
"""Test successful async deletion of files."""
Expand Down
Loading