In this tutorial we extract structured data from a variety of sources, including an investment newsletter PDF, a podcast, and a YouTube video.
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
GOOGLE_API_KEY=your_google_api_key
yt-dlp https://www.youtube.com/youryoutubeurl
yt-dlp -x --audio-format mp3 https://www.youtube.com/youryoutubeurl
ffmpeg -i input.mp3 -f segment -segment_time 10 -c copy output_%03d.mp3
./split_audio.sh input.mp3
ffmpeg -i input.mp4 -q:a 0 -map a output.mp3