-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_missing_clips.py
More file actions
108 lines (86 loc) Β· 3.32 KB
/
generate_missing_clips.py
File metadata and controls
108 lines (86 loc) Β· 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
"""
Generate missing commentary audio clips (indices 3 and 10)
"""
import os
import re
import wave
import time
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
from google import genai
from google.genai import types
from pydub import AudioSegment
# Missing commentary events
MISSING_EVENTS = [
{ "index": 3, "time": 41.5, "text": "Skye through the first 200 in 41.5, staying calm.", "subjectId": 8 },
{ "index": 10, "time": 141, "text": "Melodi crosses the line! 2:21.58! Winner!", "subjectId": 1 }
]
MODEL = "gemini-2.5-flash-preview-tts"
RATE_LIMIT_DELAY = 4
VOICE_CONFIG = {
"voice": "Puck",
"prompt": "You are an energetic track and field announcer. Speak with excitement and enthusiasm, like you're calling a championship race."
}
def save_pcm_as_wav(pcm_data: bytes, output_path: str, sample_rate: int = 24000):
with wave.open(output_path, 'wb') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(sample_rate)
wav_file.writeframes(pcm_data)
def generate_clip(client, event, output_dir):
safe_name = re.sub(r'[^\w\s-]', '', event['text'][:40]).replace(' ', '_')
filename = f"commentary_{event['index']:02d}_{event['time']:.1f}s_{safe_name}.mp3"
output_path = output_dir / filename
wav_path = str(output_path).replace('.mp3', '.wav')
prompt = f"{VOICE_CONFIG['prompt']}\n\n{event['text']}"
try:
print(f"Generating: {event['text'][:50]}...", end=" ", flush=True)
response = client.models.generate_content(
model=MODEL,
contents=prompt,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name=VOICE_CONFIG["voice"]
)
)
)
)
)
if not response.candidates or not response.candidates[0].content.parts:
print("β No audio data")
return None
audio_data = response.candidates[0].content.parts[0].inline_data.data
save_pcm_as_wav(audio_data, wav_path)
audio = AudioSegment.from_wav(wav_path)
audio.export(str(output_path), format="mp3")
os.remove(wav_path)
duration_sec = len(audio) / 1000
print(f"β ({duration_sec:.1f}s)")
return filename
except Exception as e:
print(f"β Error: {e}")
return None
def main():
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
print("Error: GEMINI_API_KEY not found")
return
client = genai.Client(api_key=api_key)
output_dir = Path("commentary_audio")
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Generating {len(MISSING_EVENTS)} missing commentary clips...")
print(f"Voice: {VOICE_CONFIG['voice']}")
print("-" * 60)
for i, event in enumerate(MISSING_EVENTS):
generate_clip(client, event, output_dir)
if i < len(MISSING_EVENTS) - 1:
time.sleep(RATE_LIMIT_DELAY)
print("-" * 60)
print("Done!")
if __name__ == "__main__":
main()