diff --git a/.jules/bolt.md b/.jules/bolt.md index 0fb92dce..f996f711 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -2,3 +2,7 @@ ## 2026-02-19 - [Frontend Download Optimization] **Learning:** Using `fetch` + `blob` to download large files (500MB+) in browser JavaScript causes significant memory spikes and potential crashes. **Action:** Always prefer direct link downloads (creating a hidden `` tag) for large file downloads. Implemented `triggerDownload` utility in `api.ts` to handle this centrally with backend compatibility checks. + +## 2025-05-23 - Avoid Reading Files for Size Validation in Async Python +**Learning:** Iterating over `UploadFile.file` (a `SpooledTemporaryFile`) to check file size is a synchronous operation that blocks the event loop, causing severe performance degradation (e.g., 1s block for 50MB file). +**Action:** Always use `file.file.seek(0, 2)` and `file.file.tell()` to determine size in O(1) time without reading content. Ensure to reset the file pointer with `await file.seek(0)` afterwards. diff --git a/backend/src/api/conversions.py b/backend/src/api/conversions.py index 54b82298..3651ab70 100644 --- a/backend/src/api/conversions.py +++ b/backend/src/api/conversions.py @@ -201,16 +201,17 @@ async def validate_file_size(file: UploadFile) -> tuple[bool, str]: Returns: Tuple of (is_valid, error_message) """ - # Read file to check size - total_size = 0 - for chunk in file.file: - total_size += len(chunk) - if total_size > MAX_UPLOAD_SIZE: - return False, f"File size exceeds {MAX_UPLOAD_SIZE // (1024 * 1024)}MB limit" + # Check size using seek/tell (O(1) instead of O(N) read) + # file.file is a SpooledTemporaryFile which supports seek/tell synchronously + file.file.seek(0, 2) + file_size = file.file.tell() # Reset file pointer await file.seek(0) + if file_size > MAX_UPLOAD_SIZE: + return False, f"File size exceeds {MAX_UPLOAD_SIZE // (1024 * 1024)}MB limit" + return True, ""