From f45834addf633a52e55b31fd6813ce95622e615b Mon Sep 17 00:00:00 2001 From: Tomer Kashi Date: Sun, 29 Mar 2026 16:13:37 +0300 Subject: [PATCH] fix: support large audio files (>25 MB) for transcription MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flask had no MAX_CONTENT_LENGTH set, causing Werkzeug to reject uploads over 25 MB before they reached the route handler — making the existing compression logic unreachable dead code. - Set MAX_CONTENT_LENGTH to 2 GB to accommodate long WAV recordings - Calculate compression bitrate dynamically based on audio duration so the output always fits under Whisper's 25 MB API limit, regardless of how long the recording is (previously hardcoded 64k caused failures on recordings longer than ~50 minutes) Co-Authored-By: Claude Sonnet 4.6 --- app.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 2d73b19..9e96a46 100644 --- a/app.py +++ b/app.py @@ -109,6 +109,7 @@ def get_clip_speaker(clip, transcript): from openai import OpenAI app = Flask(__name__) +app.config['MAX_CONTENT_LENGTH'] = 2 * 1024 * 1024 * 1024 # 2 GB upload limit # ─── APP CONFIG ────────────────────────────────────────── _config_path = "config.json" @@ -311,9 +312,13 @@ def do_transcribe(): if os.path.getsize(filepath) > 25 * 1024 * 1024: progress.update(phase="transcribe", current=0, total=3, message="compressing audio…") compressed = filepath.rsplit(".", 1)[0] + "_compressed.mp3" + # Calculate bitrate to keep output under 24 MB regardless of duration + duration = progress["audio_duration"] or 1 + target_bits = 24 * 1024 * 1024 * 8 + bitrate_kbps = max(8, min(64, int(target_bits / duration / 1000))) subprocess.run([ "ffmpeg", "-y", "-i", filepath, - "-ac", "1", "-ar", "16000", "-b:a", "64k", + "-ac", "1", "-ar", "16000", "-b:a", f"{bitrate_kbps}k", compressed ], capture_output=True, check=True) upload_path = compressed @@ -745,9 +750,18 @@ def do_process(): if os.path.getsize(filepath) > 25 * 1024 * 1024: progress.update(phase="narration", current=0, total=3, message="compressing narration…") compressed = filepath.rsplit(".", 1)[0] + "_compressed.mp3" + try: + dur_result = subprocess.run( + ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", "-of", "csv=p=0", filepath], + capture_output=True, text=True) + narr_duration = float(dur_result.stdout.strip()) or 1 + except Exception: + narr_duration = 1 + target_bits = 24 * 1024 * 1024 * 8 + bitrate_kbps = max(8, min(64, int(target_bits / narr_duration / 1000))) subprocess.run([ "ffmpeg", "-y", "-i", filepath, - "-ac", "1", "-ar", "16000", "-b:a", "64k", + "-ac", "1", "-ar", "16000", "-b:a", f"{bitrate_kbps}k", compressed ], capture_output=True, check=True) upload_path = compressed