race-replay/generate_missing_clips.py at main · fayerman-source/race-replay · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
"""
Generate missing commentary audio clips (indices 3 and 10)
"""

import os
import re
import wave
import time
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

from google import genai
from google.genai import types
from pydub import AudioSegment

# Missing commentary events
MISSING_EVENTS = [
    { "index": 3, "time": 41.5, "text": "Skye through the first 200 in 41.5, staying calm.", "subjectId": 8 },
    { "index": 10, "time": 141, "text": "Melodi crosses the line! 2:21.58! Winner!", "subjectId": 1 }
]

MODEL = "gemini-2.5-flash-preview-tts"
RATE_LIMIT_DELAY = 4

VOICE_CONFIG = {
    "voice": "Puck",
    "prompt": "You are an energetic track and field announcer. Speak with excitement and enthusiasm, like you're calling a championship race."
}

def save_pcm_as_wav(pcm_data: bytes, output_path: str, sample_rate: int = 24000):
    with wave.open(output_path, 'wb') as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(sample_rate)
        wav_file.writeframes(pcm_data)

def generate_clip(client, event, output_dir):
    safe_name = re.sub(r'[^\w\s-]', '', event['text'][:40]).replace(' ', '_')
    filename = f"commentary_{event['index']:02d}_{event['time']:.1f}s_{safe_name}.mp3"
    output_path = output_dir / filename
    wav_path = str(output_path).replace('.mp3', '.wav')

    prompt = f"{VOICE_CONFIG['prompt']}\n\n{event['text']}"

    try:
        print(f"Generating: {event['text'][:50]}...", end=" ", flush=True)

        response = client.models.generate_content(
            model=MODEL,
            contents=prompt,
            config=types.GenerateContentConfig(
                response_modalities=["AUDIO"],
                speech_config=types.SpeechConfig(
                    voice_config=types.VoiceConfig(
                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
                            voice_name=VOICE_CONFIG["voice"]
                        )
                    )
                )
            )
        )

        if not response.candidates or not response.candidates[0].content.parts:
            print("✗ No audio data")
            return None

        audio_data = response.candidates[0].content.parts[0].inline_data.data

        save_pcm_as_wav(audio_data, wav_path)
        audio = AudioSegment.from_wav(wav_path)
        audio.export(str(output_path), format="mp3")
        os.remove(wav_path)

        duration_sec = len(audio) / 1000
        print(f"✓ ({duration_sec:.1f}s)")
        return filename

    except Exception as e:
        print(f"✗ Error: {e}")
        return None

def main():
    api_key = os.getenv("GEMINI_API_KEY")
    if not api_key:
        print("Error: GEMINI_API_KEY not found")
        return

    client = genai.Client(api_key=api_key)
    output_dir = Path("commentary_audio")
    output_dir.mkdir(parents=True, exist_ok=True)

    print(f"Generating {len(MISSING_EVENTS)} missing commentary clips...")
    print(f"Voice: {VOICE_CONFIG['voice']}")
    print("-" * 60)

    for i, event in enumerate(MISSING_EVENTS):
        generate_clip(client, event, output_dir)
        if i < len(MISSING_EVENTS) - 1:
            time.sleep(RATE_LIMIT_DELAY)

    print("-" * 60)
    print("Done!")

if __name__ == "__main__":
    main()