From 47787ca88c13da6475852169dcd19688f40cf151 Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 12:39:21 +0300
Subject: [PATCH 1/8] docs: multi-file upload design for parallel transcription

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../2026-04-11-multi-file-upload-design.md    | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 docs/plans/2026-04-11-multi-file-upload-design.md
diff --git a/docs/plans/2026-04-11-multi-file-upload-design.md b/docs/plans/2026-04-11-multi-file-upload-design.md
new file mode 100644
index 0000000..46fd7a2
--- /dev/null
+++ b/docs/plans/2026-04-11-multi-file-upload-design.md
@@ -0,0 +1,68 @@
+# Multi-File Upload & Parallel Transcription
+
+## Problem
+
+Users often have multiple interview recordings (different speakers) for the same project. Currently they must merge files externally before uploading, which is slow and adds a manual step. Transcribing one large merged file is also slower than transcribing smaller files in parallel.
+
+## Design
+
+### Approach: Multi-file single endpoint with parallel transcription
+
+The upload zone accepts multiple files. All files are sent to `/transcribe` in one request. The backend spawns a thread per file, transcribes in parallel via Whisper, then merges results into one unified transcript with artificial time offsets so segments don't overlap.
+
+### State changes
+
+`source_file` (string) becomes `source_files` (array):
+
+```json
+{
+  "source_files": [
+    { "filename": "interview_a.wav", "path": "projects/x/uploads/interview_a.wav", "offset": 0, "duration": 300.5 },
+    { "filename": "interview_b.wav", "path": "projects/x/uploads/interview_b.wav", "offset": 300.5, "duration": 245.8 }
+  ]
+}
+```
+
+Backward compatible: old projects with `source_file` (string) still load fine.
+
+### Transcript merging
+
+- Each file is transcribed independently (compression + Whisper call per file)
+- After all complete, transcripts are merged in upload order
+- File B's timestamps are shifted by the sum of all preceding files' durations
+- Each segment and word gets a `source_index` field pointing to its entry in `source_files`
+
+### Clip cutting
+
+When cutting a clip, the system:
+1. Looks up `source_index` on the segment/word
+2. Subtracts the file's `offset` to get the real timestamp within that file
+3. Runs ffmpeg against the correct source file
+
+### Progress UX
+
+- Progress bar shows overall status: "Transcribing file 2 of 3..."
+- Individual file compression/upload steps tracked
+- Errors on one file don't block others; failed files are reported at the end
+
+### UI changes
+
+- Upload zone `<input>` gets `multiple` attribute
+- File info bar shows count: "3 files selected" instead of single filename
+- Transcript view unchanged — segments appear with speaker colors/labels as before
+- A subtle divider or label between file boundaries (optional, low priority)
+
+### Backend changes
+
+- `/transcribe` accepts multiple files in `request.files.getlist("file")`
+- Each file gets its own thread for compress + Whisper
+- Results collected and merged after all threads complete
+- `source_files` array written to state
+
+### What stays the same
+
+- Speaker diarization (runs per-file, results merged)
+- Clip selection UI (word clicking)
+- Assembly timeline
+- All export routes
+- Narration workflow

From 792cfe3f54542fe954ff416deb78d2a65a14b9a2 Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 12:44:28 +0300
Subject: [PATCH 2/8] docs: multi-file upload implementation plan

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/plans/2026-04-11-multi-file-upload.md | 463 +++++++++++++++++++++
 1 file changed, 463 insertions(+)
 create mode 100644 docs/plans/2026-04-11-multi-file-upload.md

diff --git a/docs/plans/2026-04-11-multi-file-upload.md b/docs/plans/2026-04-11-multi-file-upload.md
new file mode 100644
index 0000000..ca1fb48
--- /dev/null
+++ b/docs/plans/2026-04-11-multi-file-upload.md
@@ -0,0 +1,463 @@
+# Multi-File Upload Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Let users upload multiple interview files at once, transcribe them in parallel, and work with one unified transcript.
+
+**Architecture:** The `/transcribe` endpoint accepts multiple files via `getlist("file")`. A thread per file handles compression + Whisper. After all threads complete, transcripts merge with time offsets into one unified state. Clip cutting resolves which source file to use via `source_index`.
+
+**Tech Stack:** Python/Flask, threading, ffmpeg, OpenAI Whisper API, vanilla JS
+
+---
+
+### Task 1: Backend — `source_files` state + backward compat
+
+**Files:**
+- Modify: `app.py:232-234` (initial state shape)
+- Modify: `app.py:516-564` (cut_clips — source file lookup)
+
+**Step 1: Update initial state shape**
+
+In `load_state()` return dict (line 232), add `source_files` and keep `source_file` for backward compat:
+
+```python
+return {"transcript": [], "words": [], "clips": [], "text_clips": [],
+        "narration_transcript": [], "narration_words": [], "narr_text_clips": [],
+        "narration": [], "assembly": [], "source_file": None, "source_files": [], "phase": 1}
+```
+
+**Step 2: Add helper to resolve source file for a clip**
+
+Add this function after `get_clip_speaker()` (after line 108):
+
+```python
+def resolve_source_for_clip(clip, state):
+    """Given a clip with start/end times, find the correct source file and real timestamps."""
+    source_files = state.get("source_files", [])
+    if not source_files:
+        # Legacy single-file project
+        sf = state.get("source_file", "")
+        return sf, clip["start"], clip["end"]
+
+    # Find which source file this clip belongs to by checking source_index on words/segments
+    # or by timestamp range
+    for i, sf in enumerate(source_files):
+        offset = sf["offset"]
+        end_time = offset + sf["duration"]
+        if clip["start"] >= offset and clip["start"] < end_time:
+            real_start = clip["start"] - offset
+            real_end = clip["end"] - offset
+            return sf["path"], real_start, real_end
+    # Fallback to last file
+    sf = source_files[-1]
+    return sf["path"], clip["start"] - sf["offset"], clip["end"] - sf["offset"]
+```
+
+**Step 3: Update `cut_clips` to use resolver**
+
+In `do_cut()` (line 530), replace the single `source_path` lookup with per-clip resolution:
+
+```python
+def do_cut():
+  try:
+    st = load_state()
+    # Validate we have source files
+    source_files = st.get("source_files", [])
+    source_file = st.get("source_file", "")
+    if not source_files and not source_file:
+        st["status"] = "error: no source file found"
+        save_state(st)
+        progress.update(phase=None, message="")
+        return
+    # For legacy single-file: check it exists
+    if not source_files and source_file and not os.path.exists(source_file):
+        st["status"] = f"error: source file not found — {source_file}"
+        save_state(st)
+        progress.update(phase=None, message="")
+        return
+
+    clips = st.get("text_clips", [])
+    progress.update(phase="cut", current=0, total=len(clips), message=f"cutting 0/{len(clips)} clips…")
+
+    cut_files = []
+    for i, clip in enumerate(clips):
+        progress.update(current=i, message=f"cutting {clip['id']}… ({i+1}/{len(clips)})")
+        source_path, real_start, real_end = resolve_source_for_clip(clip, st)
+        if not os.path.exists(source_path):
+            continue
+        out_path = os.path.join(pdir("clips"), f"{clip['id']}.wav")
+        duration = real_end - real_start
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", source_path,
+            "-ss", str(real_start),
+            "-t", str(duration),
+            "-c:a", "pcm_s16le", "-ar", "44100", "-ac", "1",
+            out_path
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            cut_files.append({
+                "id": clip["id"],
+                "path": out_path,
+                "start": clip["start"],
+                "end": clip["end"],
+                "duration": round(duration, 2)
+            })
+```
+
+**Step 4: Commit**
+
+```bash
+git add app.py
+git commit -m "feat: add source_files state and clip source resolver"
+```
+
+---
+
+### Task 2: Backend — multi-file `/transcribe` endpoint
+
+**Files:**
+- Modify: `app.py:279-393` (transcribe route + do_transcribe)
+
+**Step 1: Extract single-file transcription into a reusable function**
+
+Add this function before the `/transcribe` route:
+
+```python
+def transcribe_single_file(filepath, whisper_lang, diarize, file_index, total_files):
+    """Transcribe a single file. Returns (segments, words, duration) or raises."""
+    # Get duration
+    try:
+        dur_result = subprocess.run(
+            ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", "-of", "csv=p=0", filepath],
+            capture_output=True, text=True)
+        duration = float(dur_result.stdout.strip())
+    except Exception:
+        duration = 0
+
+    # Compress if needed
+    upload_path = filepath
+    if os.path.getsize(filepath) > 25 * 1024 * 1024:
+        progress.update(phase="transcribe", current=file_index,
+                       total=total_files, message=f"compressing file {file_index+1}/{total_files}…")
+        compressed = filepath.rsplit(".", 1)[0] + "_compressed.mp3"
+        target_bits = 24 * 1024 * 1024 * 8
+        bitrate_kbps = max(8, min(64, int(target_bits / (duration or 1) / 1000)))
+        subprocess.run([
+            "ffmpeg", "-y", "-i", filepath,
+            "-ac", "1", "-ar", "16000", "-b:a", f"{bitrate_kbps}k",
+            compressed
+        ], capture_output=True, check=True)
+        upload_path = compressed
+
+    progress.update(message=f"sending file {file_index+1}/{total_files} to Whisper…")
+
+    whisper_kwargs = {
+        "model": "whisper-1",
+        "response_format": "verbose_json",
+        "timestamp_granularities": ["word", "segment"],
+    }
+    if whisper_lang:
+        whisper_kwargs["language"] = whisper_lang
+
+    with open(upload_path, "rb") as audio_file:
+        whisper_kwargs["file"] = audio_file
+        result = client.audio.transcriptions.create(**whisper_kwargs)
+
+    words = []
+    if hasattr(result, 'words') and result.words:
+        for w in result.words:
+            words.append({"word": w.word.strip(), "start": w.start, "end": w.end})
+
+    raw = [{"start": seg.start, "end": seg.end, "text": seg.text.strip()} for seg in result.segments]
+    passages = merge_segments(raw)
+    segments = []
+    for i, p in enumerate(passages):
+        segments.append({
+            "id": i, "start": p["start"], "end": p["end"],
+            "text": p["text"], "speaker": "S1",
+        })
+
+    if diarize and os.environ.get("HUGGINGFACE_TOKEN"):
+        segments = assign_speakers(segments, filepath)
+
+    return segments, words, duration
+```
+
+**Step 2: Rewrite `/transcribe` to handle multiple files**
+
+```python
+@app.route("/transcribe", methods=["POST"])
+def transcribe():
+    if not client:
+        return jsonify({"error": "OPENAI_API_KEY not set"}), 500
+
+    files = request.files.getlist("file")
+    if not files:
+        return jsonify({"error": "No file uploaded"}), 400
+
+    # Save all uploaded files
+    file_infos = []
+    for f in files:
+        filepath = os.path.join(pdir("uploads"), f.filename)
+        f.save(filepath)
+        file_infos.append({"filename": f.filename, "path": filepath})
+
+    whisper_lang = request.form.get("language", "he")
+    diarize = request.form.get("diarize") == "1"
+    if whisper_lang == "auto":
+        whisper_lang = None
+
+    def do_transcribe():
+        state = load_state()
+        state["status"] = "transcribing"
+        # Set source_file to first file for backward compat
+        state["source_file"] = file_infos[0]["path"]
+        save_state(state)
+
+        total_files = len(file_infos)
+        progress.update(phase="transcribe", current=0, total=total_files,
+                       message=f"transcribing 0/{total_files} files…")
+
+        # Transcribe files in parallel
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        results = [None] * total_files
+        errors = []
+
+        def transcribe_file(idx):
+            info = file_infos[idx]
+            return idx, transcribe_single_file(info["path"], whisper_lang, diarize, idx, total_files)
+
+        with ThreadPoolExecutor(max_workers=min(total_files, 4)) as executor:
+            futures = {executor.submit(transcribe_file, i): i for i in range(total_files)}
+            for future in as_completed(futures):
+                try:
+                    idx, (segments, words, duration) = future.result()
+                    results[idx] = {"segments": segments, "words": words, "duration": duration}
+                    progress.update(current=sum(1 for r in results if r is not None),
+                                   message=f"transcribed {sum(1 for r in results if r is not None)}/{total_files} files…")
+                except Exception as e:
+                    idx = futures[future]
+                    errors.append(f"{file_infos[idx]['filename']}: {friendly_error(e)}")
+
+        if all(r is None for r in results):
+            state["status"] = f"error: all files failed — {'; '.join(errors)}"
+            save_state(state)
+            progress.update(phase=None, current=0, total=0, message="")
+            return
+
+        # Merge results with time offsets
+        merged_segments = []
+        merged_words = []
+        source_files = []
+        offset = 0.0
+        seg_id = 0
+
+        for idx, r in enumerate(results):
+            if r is None:
+                continue
+            info = file_infos[idx]
+            source_files.append({
+                "filename": info["filename"],
+                "path": info["path"],
+                "offset": offset,
+                "duration": r["duration"]
+            })
+            for seg in r["segments"]:
+                merged_segments.append({
+                    "id": seg_id,
+                    "start": seg["start"] + offset,
+                    "end": seg["end"] + offset,
+                    "text": seg["text"],
+                    "speaker": seg.get("speaker", "S1"),
+                    "source_index": len(source_files) - 1
+                })
+                seg_id += 1
+            for w in r["words"]:
+                merged_words.append({
+                    "word": w["word"],
+                    "start": w["start"] + offset,
+                    "end": w["end"] + offset,
+                    "source_index": len(source_files) - 1
+                })
+            offset += r["duration"]
+
+        progress.update(message="processing segments…")
+
+        # Build speaker_names
+        seen = []
+        for seg in merged_segments:
+            spk = seg.get("speaker", "S1")
+            if spk not in seen:
+                seen.append(spk)
+        speaker_names = {spk: spk for spk in seen}
+
+        state["transcript"] = merged_segments
+        state["words"] = merged_words
+        state["text_clips"] = []
+        state["clips"] = []
+        state["status"] = "transcribed"
+        state["filename"] = ", ".join(info["filename"] for info in file_infos)
+        state["source_files"] = source_files
+        state["source_file"] = file_infos[0]["path"]  # backward compat
+        state["transcription_language"] = whisper_lang or "auto"
+        state["speaker_names"] = speaker_names
+        if errors:
+            state["transcription_warnings"] = errors
+        save_state(state)
+        progress.update(phase="transcribe", current=total_files, total=total_files, message="done")
+        progress["audio_duration"] = offset
+
+    threading.Thread(target=do_transcribe).start()
+    return jsonify({"message": "Transcription started"})
+```
+
+**Step 3: Commit**
+
+```bash
+git add app.py
+git commit -m "feat: multi-file parallel transcription endpoint"
+```
+
+---
+
+### Task 3: Frontend — multi-file upload UI
+
+**Files:**
+- Modify: `templates/index.html:456` (file input)
+- Modify: `templates/index.html:773-794` (uploadAndTranscribe function)
+
+**Step 1: Add `multiple` attribute to file input**
+
+Line 456, change:
+```html
+<input type="file" id="audio-input" accept=".wav,.mp3,.m4a,.ogg" onchange="uploadAndTranscribe(event)">
+```
+to:
+```html
+<input type="file" id="audio-input" accept=".wav,.mp3,.m4a,.ogg" multiple onchange="uploadAndTranscribe(event)">
+```
+
+**Step 2: Update `uploadAndTranscribe` to send multiple files**
+
+```javascript
+async function uploadAndTranscribe(event) {
+  const files = Array.from(event.target.files);
+  if (!files.length) return;
+  document.getElementById('upload-zone').style.display = 'none';
+  document.getElementById('file-info').textContent =
+    files.length === 1 ? files[0].name : `${files.length} files selected`;
+  setStatus('working', t('transcribing'));
+  const whisperLang = document.getElementById('whisper-lang').value;
+  const diarize = document.getElementById('diarize-toggle').checked;
+  const formData = new FormData();
+  for (const file of files) {
+    formData.append('file', file);
+  }
+  formData.append('language', whisperLang);
+  if (diarize) formData.append('diarize', '1');
+  const res = await fetch('/transcribe', { method: 'POST', body: formData });
+  if (!res.ok) {
+    const data = await res.json().catch(() => ({ error: t('server_error') }));
+    setStatus('error', data.error || t('transcription_failed'));
+    document.getElementById('upload-zone').style.display = '';
+    return;
+  }
+  startProgressPolling();
+  startPolling();
+}
+```
+
+**Step 3: Commit**
+
+```bash
+git add templates/index.html
+git commit -m "feat: multi-file upload UI"
+```
+
+---
+
+### Task 4: Update remaining `source_file` references for compat
+
+**Files:**
+- Modify: `app.py` — all routes that read `source_file`
+
+**Step 1: Audit and update all `source_file` references**
+
+Search for all `source_file` usages and ensure they fall back correctly. Key places:
+
+- `load_demo` route: should set both `source_file` and `source_files`
+- `save_project` / `load_project`: `source_files` paths need fixing like `source_file`
+- `/state` route: already returns full state, no change needed
+- `reset` route: no change needed (returns fresh state which now includes `source_files`)
+
+Update `load_state()` to auto-populate `source_files` from legacy `source_file`:
+
+```python
+def load_state():
+    sf = state_file()
+    if os.path.exists(sf):
+        with open(sf) as f:
+            state = json.load(f)
+        # Backward compat: populate source_files from legacy source_file
+        if state.get("source_file") and not state.get("source_files"):
+            path = state["source_file"]
+            if os.path.exists(path):
+                try:
+                    dur_result = subprocess.run(
+                        ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", "-of", "csv=p=0", path],
+                        capture_output=True, text=True)
+                    duration = float(dur_result.stdout.strip())
+                except Exception:
+                    duration = 0
+                state["source_files"] = [{
+                    "filename": state.get("filename", os.path.basename(path)),
+                    "path": path, "offset": 0, "duration": duration
+                }]
+        # ... existing phase detection ...
+```
+
+**Step 2: Update path-fixing in `load_project`**
+
+Where `source_file` path is fixed (line ~1328), also fix `source_files` paths:
+
+```python
+state["source_file"] = fix(state.get("source_file"))
+for sf in state.get("source_files", []):
+    sf["path"] = fix(sf.get("path", ""))
+```
+
+**Step 3: Commit**
+
+```bash
+git add app.py
+git commit -m "feat: backward compat for source_files in all routes"
+```
+
+---
+
+### Task 5: Manual integration test
+
+**Step 1: Restart the server**
+
+```bash
+kill -9 $(lsof -ti:5555) 2>/dev/null
+source venv/bin/activate && python app.py
+```
+
+**Step 2: Test single file upload (backward compat)**
+
+Upload one file, verify transcription works as before.
+
+**Step 3: Test multi-file upload**
+
+Upload 2+ files, verify:
+- Progress shows file count
+- Transcript merges correctly with all segments
+- Clips can be marked across file boundaries
+- Clip cutting produces correct audio from correct source file
+
+**Step 4: Test loading old project**
+
+Load an existing saved project, verify `source_files` auto-populates from `source_file`.

From 227a91776faa069359d6953843b73daab87ae10f Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 12:52:41 +0300
Subject: [PATCH 3/8] feat: add source_files state and clip source resolver

Support multiple source files in project state alongside the existing
single source_file field for backward compatibility. The resolver maps
clip timestamps back to the correct source file for ffmpeg cutting.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app.py | 46 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/app.py b/app.py
index 9e96a46..04affb3 100644
--- a/app.py
+++ b/app.py
@@ -99,6 +99,26 @@ def get_clip_speaker(clip, transcript):
             best = seg.get("speaker", "S1")
     return best
 
+def resolve_source_for_clip(clip, state):
+    """Given a clip with start/end times, find the correct source file and real timestamps."""
+    source_files = state.get("source_files", [])
+    if not source_files:
+        # Legacy single-file project
+        sf = state.get("source_file", "")
+        return sf, clip["start"], clip["end"]
+
+    # Find which source file this clip belongs to by checking timestamp range
+    for i, sf in enumerate(source_files):
+        offset = sf["offset"]
+        end_time = offset + sf["duration"]
+        if clip["start"] >= offset and clip["start"] < end_time:
+            real_start = clip["start"] - offset
+            real_end = clip["end"] - offset
+            return sf["path"], real_start, real_end
+    # Fallback to last file
+    sf = source_files[-1]
+    return sf["path"], clip["start"] - sf["offset"], clip["end"] - sf["offset"]
+
 # Load .env file if present (so OPENAI_API_KEY persists across sessions)
 try:
     from dotenv import load_dotenv
@@ -231,7 +251,7 @@ def load_state():
         return state
     return {"transcript": [], "words": [], "clips": [], "text_clips": [],
             "narration_transcript": [], "narration_words": [], "narr_text_clips": [],
-            "narration": [], "assembly": [], "source_file": None, "phase": 1}
+            "narration": [], "assembly": [], "source_file": None, "source_files": [], "phase": 1}
 
 def save_state(state):
     with open(state_file(), "w") as f:
@@ -515,9 +535,15 @@ def remove_clip():
 @app.route("/cut_clips", methods=["POST"])
 def cut_clips():
     state = load_state()
+    source_files = state.get("source_files", [])
     source = state.get("source_file")
 
-    if not source or not os.path.exists(source):
+    # Validate we have at least one source file available
+    if source_files:
+        has_valid = any(os.path.exists(sf["path"]) for sf in source_files)
+        if not has_valid:
+            return jsonify({"error": "Source audio file not found"}), 400
+    elif not source or not os.path.exists(source):
         return jsonify({"error": "Source audio file not found"}), 400
 
     text_clips = state.get("text_clips", [])
@@ -530,9 +556,12 @@ def cut_clips():
     def do_cut():
       try:
         st = load_state()
-        source_path = st.get("source_file", "")
-        if not source_path or not os.path.exists(source_path):
-            st["status"] = f"error: source file not found — {source_path}"
+
+        # Validate source availability
+        sf_list = st.get("source_files", [])
+        legacy_source = st.get("source_file", "")
+        if not sf_list and (not legacy_source or not os.path.exists(legacy_source)):
+            st["status"] = f"error: source file not found — {legacy_source}"
             save_state(st)
             progress.update(phase=None, message="")
             return
@@ -543,12 +572,15 @@ def do_cut():
         cut_files = []
         for i, clip in enumerate(clips):
             progress.update(current=i, message=f"cutting {clip['id']}… ({i+1}/{len(clips)})")
+            source_path, real_start, real_end = resolve_source_for_clip(clip, st)
+            if not source_path or not os.path.exists(source_path):
+                continue
             out_path = os.path.join(pdir("clips"), f"{clip['id']}.wav")
-            duration = clip["end"] - clip["start"]
+            duration = real_end - real_start
             cmd = [
                 "ffmpeg", "-y",
                 "-i", source_path,
-                "-ss", str(clip["start"]),
+                "-ss", str(real_start),
                 "-t", str(duration),
                 "-c:a", "pcm_s16le", "-ar", "44100", "-ac", "1",
                 out_path

From 71bad8c19d5c10e6e72684fb1bd20a05fffc2931 Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 12:56:04 +0300
Subject: [PATCH 4/8] feat: multi-file parallel transcription endpoint

Extract single-file transcription into reusable transcribe_single_file()
and rewrite /transcribe to accept multiple files via getlist("file").
Files are transcribed in parallel using ThreadPoolExecutor (up to 4 workers),
then merged with cumulative time offsets. Each segment and word gets a
source_index field. Partial failures produce warnings without failing
the whole job.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app.py | 236 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 167 insertions(+), 69 deletions(-)

diff --git a/app.py b/app.py
index 04affb3..a3b20c9 100644
--- a/app.py
+++ b/app.py
@@ -1,4 +1,4 @@
-import os, json, subprocess, tempfile, threading, time, io, shutil, struct
+import os, json, subprocess, tempfile, threading, time, io, shutil, struct, concurrent.futures
 from pathlib import Path
 from flask import Flask, render_template, request, jsonify, send_file, Response, abort
 
@@ -296,6 +296,79 @@ def get_state():
 
 # ─── STEP 1: TRANSCRIBE ───────────────────────────────────
 
+def transcribe_single_file(filepath, whisper_lang, diarize, file_index, total_files):
+    """Transcribe a single audio file and return (segments, words, duration).
+
+    Updates the global ``progress`` dict with per-file status messages.
+    Raises on failure so the caller can handle partial errors.
+    """
+    prefix = f"file {file_index + 1}/{total_files}: " if total_files > 1 else ""
+
+    # ── Get audio duration via ffprobe ──
+    duration = 0
+    try:
+        dur_result = subprocess.run(
+            ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", "-of", "csv=p=0", filepath],
+            capture_output=True, text=True)
+        duration = float(dur_result.stdout.strip())
+    except Exception:
+        pass
+
+    # ── Compress if file > 25 MB ──
+    upload_path = filepath
+    if os.path.getsize(filepath) > 25 * 1024 * 1024:
+        progress.update(message=f"{prefix}compressing audio…")
+        compressed = filepath.rsplit(".", 1)[0] + "_compressed.mp3"
+        target_bits = 24 * 1024 * 1024 * 8
+        bitrate_kbps = max(8, min(64, int(target_bits / (duration or 1) / 1000)))
+        subprocess.run([
+            "ffmpeg", "-y", "-i", filepath,
+            "-ac", "1", "-ar", "16000", "-b:a", f"{bitrate_kbps}k",
+            compressed
+        ], capture_output=True, check=True)
+        upload_path = compressed
+
+    # ── Send to Whisper API ──
+    progress.update(message=f"{prefix}sending to Whisper…")
+    whisper_kwargs = {
+        "model": "whisper-1",
+        "response_format": "verbose_json",
+        "timestamp_granularities": ["word", "segment"],
+    }
+    if whisper_lang:
+        whisper_kwargs["language"] = whisper_lang
+
+    with open(upload_path, "rb") as audio_file:
+        whisper_kwargs["file"] = audio_file
+        result = client.audio.transcriptions.create(**whisper_kwargs)
+
+    # ── Extract words ──
+    words = []
+    if hasattr(result, 'words') and result.words:
+        for w in result.words:
+            words.append({"word": w.word.strip(), "start": w.start, "end": w.end})
+
+    # ── Extract and merge segments ──
+    raw = [{"start": seg.start, "end": seg.end, "text": seg.text.strip()} for seg in result.segments]
+    passages = merge_segments(raw)
+    segments = []
+    for i, p in enumerate(passages):
+        segments.append({
+            "id": i,
+            "start": p["start"],
+            "end": p["end"],
+            "text": p["text"],
+            "speaker": "S1",
+        })
+
+    # ── Diarization (optional) ──
+    if diarize and os.environ.get("HUGGINGFACE_TOKEN"):
+        progress.update(message=f"{prefix}detecting speakers…")
+        segments = assign_speakers(segments, filepath)
+
+    return segments, words, duration
+
+
 @app.route("/transcribe", methods=["POST"])
 def transcribe():
     if not client:
@@ -303,104 +376,129 @@ def transcribe():
     if "file" not in request.files:
         return jsonify({"error": "No file uploaded"}), 400
 
-    f = request.files["file"]
-    filename = f.filename
-    filepath = os.path.join(pdir("uploads"), filename)
-    f.save(filepath)
+    files = request.files.getlist("file")
     whisper_lang = request.form.get("language", "he")
     diarize = request.form.get("diarize") == "1"
     if whisper_lang == "auto":
         whisper_lang = None
 
+    # Save all uploaded files to the uploads directory
+    saved_files = []  # list of (filename, filepath)
+    for f in files:
+        filepath = os.path.join(pdir("uploads"), f.filename)
+        f.save(filepath)
+        saved_files.append((f.filename, filepath))
+
     def do_transcribe():
         state = load_state()
-        state["source_file"] = filepath
         state["status"] = "transcribing"
+        state["source_file"] = saved_files[0][1]  # backward compat
         save_state(state)
 
-        try:
-            # Get audio duration for time estimates
-            try:
-                dur_result = subprocess.run(
-                    ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", "-of", "csv=p=0", filepath],
-                    capture_output=True, text=True)
-                progress["audio_duration"] = float(dur_result.stdout.strip())
-            except Exception:
-                progress["audio_duration"] = 0
+        total_files = len(saved_files)
+        progress.update(phase="transcribe", current=0, total=total_files + 1,
+                        message="starting transcription…")
 
-            upload_path = filepath
-            if os.path.getsize(filepath) > 25 * 1024 * 1024:
-                progress.update(phase="transcribe", current=0, total=3, message="compressing audio…")
-                compressed = filepath.rsplit(".", 1)[0] + "_compressed.mp3"
-                # Calculate bitrate to keep output under 24 MB regardless of duration
-                duration = progress["audio_duration"] or 1
-                target_bits = 24 * 1024 * 1024 * 8
-                bitrate_kbps = max(8, min(64, int(target_bits / duration / 1000)))
-                subprocess.run([
-                    "ffmpeg", "-y", "-i", filepath,
-                    "-ac", "1", "-ar", "16000", "-b:a", f"{bitrate_kbps}k",
-                    compressed
-                ], capture_output=True, check=True)
-                upload_path = compressed
-                progress.update(current=1, message="sending to Whisper…")
-            else:
-                progress.update(phase="transcribe", current=0, total=2, message="sending to Whisper…")
-
-            whisper_kwargs = {
-                "model": "whisper-1",
-                "response_format": "verbose_json",
-                "timestamp_granularities": ["word", "segment"],
-            }
-            if whisper_lang:
-                whisper_kwargs["language"] = whisper_lang
+        # Transcribe files in parallel
+        results = [None] * total_files  # indexed by position
+        errors = [None] * total_files
+        warnings = []
 
-            with open(upload_path, "rb") as audio_file:
-                whisper_kwargs["file"] = audio_file
-                result = client.audio.transcriptions.create(**whisper_kwargs)
+        def _transcribe_one(idx):
+            filename, filepath = saved_files[idx]
+            try:
+                return idx, transcribe_single_file(filepath, whisper_lang, diarize, idx, total_files)
+            except Exception as e:
+                return idx, e
 
-            # Store word-level timestamps
-            words = []
-            if hasattr(result, 'words') and result.words:
-                for w in result.words:
-                    words.append({"word": w.word.strip(), "start": w.start, "end": w.end})
+        max_workers = min(total_files, 4)
+        try:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = [executor.submit(_transcribe_one, i) for i in range(total_files)]
+                for future in concurrent.futures.as_completed(futures):
+                    idx, result = future.result()
+                    if isinstance(result, Exception):
+                        errors[idx] = result
+                        warnings.append(f"{saved_files[idx][0]}: {friendly_error(result)}")
+                    else:
+                        results[idx] = result  # (segments, words, duration)
+                    # Update progress count
+                    done_count = sum(1 for r in results if r is not None) + sum(1 for e in errors if e is not None)
+                    progress.update(current=done_count, message=f"transcribed {done_count}/{total_files} files…")
+
+            # Check if ALL files failed
+            if all(r is None for r in results):
+                err_msg = "; ".join(warnings) if warnings else "all files failed"
+                raise Exception(err_msg)
+
+            # ── Merge results with cumulative time offsets ──
+            progress.update(message="merging transcripts…")
+            merged_segments = []
+            merged_words = []
+            source_files_info = []
+            cumulative_offset = 0.0
+            global_seg_id = 0
+
+            for idx in range(total_files):
+                filename, filepath = saved_files[idx]
+                if results[idx] is None:
+                    # This file failed — skip it but record in source_files
+                    source_files_info.append({
+                        "filename": filename,
+                        "path": filepath,
+                        "offset": cumulative_offset,
+                        "duration": 0,
+                        "error": friendly_error(errors[idx]) if errors[idx] else "unknown error",
+                    })
+                    continue
 
-            # Store segment-level (merged into passages) for paragraph grouping
-            raw = [{"start": seg.start, "end": seg.end, "text": seg.text.strip()} for seg in result.segments]
-            passages = merge_segments(raw)
+                segments, words, duration = results[idx]
 
-            segments = []
-            for i, p in enumerate(passages):
-                segments.append({
-                    "id": i,
-                    "start": p["start"],
-                    "end": p["end"],
-                    "text": p["text"],
-                    "speaker": "S1",
+                source_files_info.append({
+                    "filename": filename,
+                    "path": filepath,
+                    "offset": cumulative_offset,
+                    "duration": duration,
                 })
 
-            progress.update(current=progress["total"] - 1, message="processing segments…")
+                # Shift timestamps and add source_index
+                for seg in segments:
+                    seg["id"] = global_seg_id
+                    seg["start"] += cumulative_offset
+                    seg["end"] += cumulative_offset
+                    seg["source_index"] = idx
+                    merged_segments.append(seg)
+                    global_seg_id += 1
 
-            # Diarization (optional — only if user opted in and HUGGINGFACE_TOKEN is set)
-            if diarize and os.environ.get("HUGGINGFACE_TOKEN"):
-                progress.update(message="detecting speakers…")
-                segments = assign_speakers(segments, filepath)
+                for w in words:
+                    w["start"] += cumulative_offset
+                    w["end"] += cumulative_offset
+                    w["source_index"] = idx
+                    merged_words.append(w)
+
+                cumulative_offset += duration
 
-            # Build speaker_names map from unique speakers in transcript
+            # Build speaker_names from merged segments
             seen = []
-            for seg in segments:
+            for seg in merged_segments:
                 spk = seg.get("speaker", "S1")
                 if spk not in seen:
                     seen.append(spk)
             speaker_names = {spk: spk for spk in seen}
 
-            state["transcript"] = segments
-            state["words"] = words
+            # Save state
+            state["transcript"] = merged_segments
+            state["words"] = merged_words
             state["text_clips"] = []
             state["clips"] = []
             state["status"] = "transcribed"
-            state["filename"] = filename
+            state["source_file"] = saved_files[0][1]  # backward compat
+            state["source_files"] = source_files_info
+            state["filename"] = ", ".join(fn for fn, _ in saved_files)
             state["transcription_language"] = whisper_lang or "auto"
             state["speaker_names"] = speaker_names
+            if warnings:
+                state["transcription_warnings"] = warnings
             save_state(state)
             progress.update(current=progress["total"], message="done", phase=None)
 

From 6ce1f63feeee88ac4550314269b0f9aae9cc4bcb Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 12:58:23 +0300
Subject: [PATCH 5/8] feat: multi-file upload UI

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 templates/index.html | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/templates/index.html b/templates/index.html
index 84ae027..45b2294 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -453,7 +453,7 @@ <h2 data-i18n="projects_title"></h2>
             <button class="btn" onclick="loadDemo()" style="width:auto;padding:6px 16px;font-size:12px;opacity:0.7" data-i18n="try_demo"></button>
           </div>
         </div>
-        <input type="file" id="audio-input" accept=".wav,.mp3,.m4a,.ogg" onchange="uploadAndTranscribe(event)">
+        <input type="file" id="audio-input" accept=".wav,.mp3,.m4a,.ogg" multiple onchange="uploadAndTranscribe(event)">
         <div id="speaker-legend"></div>
         <div class="search-bar" id="search-bar">
           <input type="text" id="search-input" data-i18n-placeholder="search_placeholder" aria-label="Search transcript" oninput="doSearch()" onkeydown="searchKeydown(event)">
@@ -771,15 +771,18 @@ <h3 data-i18n="narration_clips_title"></h3>
 }
 
 async function uploadAndTranscribe(event) {
-  const file = event.target.files[0];
-  if (!file) return;
+  const files = Array.from(event.target.files);
+  if (!files.length) return;
   document.getElementById('upload-zone').style.display = 'none';
-  document.getElementById('file-info').textContent = file.name;
+  document.getElementById('file-info').textContent =
+    files.length === 1 ? files[0].name : `${files.length} files selected`;
   setStatus('working', t('transcribing'));
   const whisperLang = document.getElementById('whisper-lang').value;
   const diarize = document.getElementById('diarize-toggle').checked;
   const formData = new FormData();
-  formData.append('file', file);
+  for (const file of files) {
+    formData.append('file', file);
+  }
   formData.append('language', whisperLang);
   if (diarize) formData.append('diarize', '1');
   const res = await fetch('/transcribe', { method: 'POST', body: formData });

From 869589a7a1d662361189aff328895f1d6ef092bd Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 12:59:49 +0300
Subject: [PATCH 6/8] feat: backward compat for source_files in all routes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/app.py b/app.py
index a3b20c9..62944d4 100644
--- a/app.py
+++ b/app.py
@@ -239,6 +239,21 @@ def load_state():
     if os.path.exists(sf):
         with open(sf) as f:
             state = json.load(f)
+        # Backward compat: populate source_files from legacy source_file
+        if state.get("source_file") and not state.get("source_files"):
+            path = state["source_file"]
+            if os.path.exists(path):
+                try:
+                    dur_result = subprocess.run(
+                        ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", "-of", "csv=p=0", path],
+                        capture_output=True, text=True)
+                    duration = float(dur_result.stdout.strip())
+                except Exception:
+                    duration = 0
+                state["source_files"] = [{
+                    "filename": state.get("filename", os.path.basename(path)),
+                    "path": path, "offset": 0, "duration": duration
+                }]
         # Auto-detect phase for legacy projects that predate the phase system
         if "phase" not in state:
             status = state.get("status", "")
@@ -1456,6 +1471,8 @@ def fix(path):
         return path
 
     state["source_file"] = fix(state.get("source_file"))
+    for sf in state.get("source_files", []):
+        sf["path"] = fix(sf.get("path", ""))
     state["narration_source"] = fix(state.get("narration_source"))
     for c in state.get("clips", []):
         c["path"] = fix(c.get("path", ""))

From 74bf499dad934661f884546cc9bacb72536c786c Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 14:04:13 +0300
Subject: [PATCH 7/8] feat: unique speaker IDs across files and visual file
 dividers

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app.py               | 16 +++++++++++++++-
 templates/index.html | 36 ++++++++++++++++++++++++++++++++++--
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/app.py b/app.py
index 62944d4..cd5f4d8 100644
--- a/app.py
+++ b/app.py
@@ -453,6 +453,7 @@ def _transcribe_one(idx):
             source_files_info = []
             cumulative_offset = 0.0
             global_seg_id = 0
+            speaker_offset = 0
 
             for idx in range(total_files):
                 filename, filepath = saved_files[idx]
@@ -476,12 +477,24 @@ def _transcribe_one(idx):
                     "duration": duration,
                 })
 
-                # Shift timestamps and add source_index
+                # Build speaker ID mapping: offset speakers so each file gets unique IDs
+                file_speakers = set()
+                for seg in segments:
+                    file_speakers.add(seg.get("speaker", "S1"))
+                # Sort to get deterministic mapping
+                file_speakers = sorted(file_speakers, key=lambda s: int(s[1:]) if s[1:].isdigit() else 0)
+                speaker_map = {}
+                for spk in file_speakers:
+                    old_num = int(spk[1:]) if spk[1:].isdigit() else 1
+                    speaker_map[spk] = f"S{old_num + speaker_offset}"
+
+                # Shift timestamps, remap speakers, and add source_index
                 for seg in segments:
                     seg["id"] = global_seg_id
                     seg["start"] += cumulative_offset
                     seg["end"] += cumulative_offset
                     seg["source_index"] = idx
+                    seg["speaker"] = speaker_map.get(seg.get("speaker", "S1"), seg.get("speaker", "S1"))
                     merged_segments.append(seg)
                     global_seg_id += 1
 
@@ -491,6 +504,7 @@ def _transcribe_one(idx):
                     w["source_index"] = idx
                     merged_words.append(w)
 
+                speaker_offset += len(file_speakers)
                 cumulative_offset += duration
 
             # Build speaker_names from merged segments
diff --git a/templates/index.html b/templates/index.html
index 45b2294..95182da 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -90,6 +90,22 @@
   .pane-label { font-family: var(--sans); font-size: 14px; font-weight: 600; color: var(--text2); margin-bottom: 12px; display: flex; align-items: center; gap: 8px; text-wrap: balance; }
   .pane-label .label-count { color: var(--text2); }
 
+  /* FILE DIVIDER */
+  .file-divider {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    margin: 16px 0;
+    color: var(--text2);
+    font-size: 11px;
+    font-family: var(--mono);
+  }
+  .file-divider::before, .file-divider::after {
+    content: '';
+    flex: 1;
+    border-top: 1px solid var(--border);
+  }
+
   /* WORD-LEVEL TRANSCRIPT */
   .paragraph { margin-bottom: 16px; line-height: 1.85; position: relative; font-size: 15px; }
   .para-ts { font-family: var(--mono); font-size: 11px; color: var(--text3); margin-left: 8px; cursor: pointer; user-select: none; font-variant-numeric: tabular-nums; }
@@ -828,12 +844,16 @@ <h3 data-i18n="narration_clips_title"></h3>
     renderSpeakerLegend();
     const names0 = state.speaker_names || {};
     const multi0 = new Set(segments.map(s => s.speaker || 'S1')).size > 1;
-    container.innerHTML = segments.map(seg => {
+    container.innerHTML = segments.map((seg, i) => {
       const spk = seg.speaker || 'S1';
       const badge = (multi0 || spk !== 'S1')
         ? `<span class="spk-badge spk-${spk}" onclick="renameSpeaker('${spk}')" title="Click to rename">${names0[spk] || spk}</span>`
         : '';
-      return `<div class="paragraph"><span class="para-ts" onclick="playSnippet('${state.source_file}',${seg.start},${seg.end})">${formatTime(seg.start)}</span>${badge} ${seg.text}</div>`;
+      const prevSrcIdx = i > 0 ? segments[i - 1].source_index : undefined;
+      const divider = (seg.source_index !== undefined && prevSrcIdx !== undefined && seg.source_index !== prevSrcIdx && state.source_files)
+        ? `<div class="file-divider"><span>${state.source_files[seg.source_index]?.filename || ''}</span></div>`
+        : '';
+      return divider + `<div class="paragraph"><span class="para-ts" onclick="playSnippet('${state.source_file}',${seg.start},${seg.end})">${formatTime(seg.start)}</span>${badge} ${seg.text}</div>`;
     }).join('');
     return;
   }
@@ -844,7 +864,19 @@ <h3 data-i18n="narration_clips_title"></h3>
   const names = state.speaker_names || {};
   const multiSpeaker = new Set(segments.map(s => s.speaker || 'S1')).size > 1;
 
+  let prevSourceIndex = undefined;
   segments.forEach(seg => {
+    // Insert file divider when source_index changes between consecutive segments
+    if (seg.source_index !== undefined && prevSourceIndex !== undefined && seg.source_index !== prevSourceIndex && state.source_files) {
+      const divider = document.createElement('div');
+      divider.className = 'file-divider';
+      const label = document.createElement('span');
+      label.textContent = state.source_files[seg.source_index]?.filename || '';
+      divider.appendChild(label);
+      container.appendChild(divider);
+    }
+    prevSourceIndex = seg.source_index;
+
     const para = document.createElement('div');
     para.className = 'paragraph';
 

From 62d10f376e0d4d41e8ea90aef7f495a2db30f0c6 Mon Sep 17 00:00:00 2001
From: Shaul Amsterdamski <shaulamsterdamski@mac.local>
Date: Sat, 11 Apr 2026 14:25:53 +0300
Subject: [PATCH 8/8] fix: combined waveform visualization for multi-file
 projects

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app.py               | 59 ++++++++++++++++++++++++++++++++++++++++++++
 templates/index.html | 42 ++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/app.py b/app.py
index cd5f4d8..39b5034 100644
--- a/app.py
+++ b/app.py
@@ -1306,6 +1306,65 @@ def waveform():
     return jsonify({"points": points, "duration": round(duration, 2)})
 
 
+@app.route("/waveform_multi")
+def waveform_multi():
+    """Combined waveform from all source files for multi-file projects."""
+    n_points = int(request.args.get("points", 1000))
+
+    source_files = state.get("source_files", [])
+    if not source_files:
+        # Legacy single-file fallback
+        sf = state.get("source_file", "")
+        if not sf:
+            return jsonify({"error": "No source files"}), 404
+        source_files = [{"path": sf}]
+
+    all_samples = []
+    total_duration = 0.0
+
+    for sf in source_files:
+        filepath = sf.get("path", "")
+        if not filepath:
+            continue
+        filepath = safe_project_path(filepath)
+        if not os.path.exists(filepath):
+            continue
+
+        cmd = [
+            "ffmpeg", "-i", filepath,
+            "-ac", "1", "-filter:a", "aresample=100",
+            "-map_metadata", "-1",
+            "-f", "s16le", "-acodec", "pcm_s16le", "pipe:1"
+        ]
+        result = subprocess.run(cmd, capture_output=True)
+        if result.returncode != 0 or not result.stdout:
+            continue
+
+        raw = result.stdout
+        n_samples = len(raw) // 2
+        samples = struct.unpack(f"<{n_samples}h", raw)
+        all_samples.extend(samples)
+        total_duration += n_samples / 100.0
+
+    if not all_samples:
+        return jsonify({"error": "No audio data"}), 500
+
+    n_total = len(all_samples)
+    chunk = max(1, n_total // n_points)
+    rms_list = []
+    for i in range(0, n_total, chunk):
+        seg = all_samples[i:i + chunk]
+        rms = (sum(s * s for s in seg) / len(seg)) ** 0.5
+        rms_list.append(rms)
+
+    max_rms = max(rms_list) if rms_list else 1.0
+    if max_rms == 0:
+        max_rms = 1.0
+    points = [round(r / max_rms, 4) for r in rms_list]
+
+    return jsonify({"points": points, "duration": round(total_duration, 2)})
+
+
 @app.route("/audio_snippet")
 def audio_snippet():
     """Extract a small audio snippet on the fly as MP3 — instant playback, no buffering."""
diff --git a/templates/index.html b/templates/index.html
index 95182da..420699f 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -752,7 +752,11 @@ <h3 data-i18n="narration_clips_title"></h3>
   renderPhaseButtons();
   updatePhaseBar(state.phase || 1);
   if (state.source_file && state.transcript && state.transcript.length) {
-    loadWaveform(state.source_file);
+    if (state.source_files && state.source_files.length > 1) {
+      loadWaveformMulti();
+    } else {
+      loadWaveform(state.source_file);
+    }
   }
   // Restore narration upload state
   if (state.narration_filename && !(state.narration_words && state.narration_words.length)) {
@@ -1142,7 +1146,8 @@ <h3 data-i18n="narration_clips_title"></h3>
   if (!file) return;
   snippetAudio.src = `/audio_snippet?file=${encodeURIComponent(file)}&start=${start}&end=${end}`;
   snippetAudio.play().catch(() => {});
-  if (waveformData && (file === state.source_file || file === waveformSourceFile)) {
+  const isMultiFile = waveformSourceFile === '__multi__' && state.source_files && state.source_files.some(sf => sf.path === file);
+  if (waveformData && (file === state.source_file || file === waveformSourceFile || isMultiFile)) {
     waveformPlayStart = start;
     startPlayheadAnimation();
   }
@@ -1304,6 +1309,23 @@ <h3 data-i18n="narration_clips_title"></h3>
   drawWaveform();
 }
 
+async function loadWaveformMulti() {
+  const key = '__multi__';
+  if (waveformSourceFile === key && waveformData) { drawWaveform(); return; }
+  const res = await fetch('/waveform_multi');
+  if (!res.ok) return;
+  const data = await res.json();
+  if (data.error || !data.points) return;
+  waveformData = data.points;
+  waveformDuration = data.duration;
+  waveformSourceFile = key;
+  waveformZoom = 1;
+  waveformOffset = 0;
+  computeSilences();
+  document.getElementById('waveform-bar').classList.add('visible');
+  drawWaveform();
+}
+
 function computeSilences() {
   silenceRegions = [];
   if (!waveformData || !waveformDuration) return;
@@ -1478,12 +1500,24 @@ <h3 data-i18n="narration_clips_title"></h3>
 }
 
 function waveformMouseDown(e) {
-  if (!waveformDuration || !state.source_file) return;
+  if (!waveformDuration) return;
   const rect = e.currentTarget.getBoundingClientRect();
   const t = xToT(e.clientX - rect.left, rect.width);
   waveformPlayhead = t;
   drawWaveform();
-  playSnippet(state.source_file, t, Math.min(waveformDuration, t + 30));
+  // Resolve correct source file for multi-file projects
+  let playFile = state.source_file;
+  let playStart = t;
+  if (state.source_files && state.source_files.length > 1) {
+    for (const sf of state.source_files) {
+      if (t >= sf.offset && t < sf.offset + sf.duration) {
+        playFile = sf.path;
+        playStart = t - sf.offset;
+        break;
+      }
+    }
+  }
+  playSnippet(playFile, playStart, playStart + 30);
   scrollTranscriptToTime(t);
 }