Skip to content

Commit 9e56422

Browse files
author
aninhasalesp
committed
Add VideoTranscription command to download and save YouTube video transcriptions; Updated version requests lib
1 parent 2e3cd3e commit 9e56422

File tree

3 files changed

+100
-2
lines changed

3 files changed

+100
-2
lines changed

requirements/base.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
isodate
2-
requests
2+
requests==2.32.4

youtool/commands/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@
77
from .video_info import VideoInfo
88
from .video_livechat import VideoLiveChat
99
from .video_search import VideoSearch
10+
from .video_transcription import VideoTranscription
1011

11-
COMMANDS: List[Command] = [ChannelId, ChannelInfo, VideoInfo, VideoSearch, VideoComments, VideoLiveChat]
12+
COMMANDS: List[Command] = [
13+
ChannelId,
14+
ChannelInfo,
15+
VideoInfo,
16+
VideoSearch,
17+
VideoComments,
18+
VideoLiveChat,
19+
VideoTranscription,
20+
]
1221

1322
__all__ = [
1423
"Command",
@@ -19,4 +28,5 @@
1928
"VideoSearch",
2029
"VideoComments",
2130
"VideoLiveChat",
31+
"VideoTranscription",
2232
]
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from pathlib import Path
2+
3+
from youtool import YouTube
4+
5+
from .base import Command
6+
7+
8+
class VideoTranscription(Command):
9+
"""Download video transcriptions from YouTube videos based on IDs or URLs (or CSV filename with URLs/IDs inside), and save them to files."""
10+
11+
name = "video-transcription"
12+
arguments = [
13+
{"name": "--ids", "type": str, "help": "Video IDs", "nargs": "*", "mutually_exclusive_group": "input_source"},
14+
{"name": "--urls", "type": str, "help": "Video URLs", "nargs": "*", "mutually_exclusive_group": "input_source"},
15+
{
16+
"name": "--urls-file-path",
17+
"type": Path,
18+
"help": "Channels urls csv file path",
19+
"mutually_exclusive_group": "input_source",
20+
},
21+
{"name": "--ids-file-path", "type": Path, "help": "Channel IDs CSV file path", "mutually_exclusive_group": "input_source"},
22+
{"name": "--output-dir", "type": Path, "help": "Output directory to save transcriptions", "required": True},
23+
{"name": "--language-code", "type": str, "help": "Language code for transcription", "required": True},
24+
{"name": "--url_column_name", "type": str, "help": "URL column name on CSV input files"},
25+
{"name": "--id_column_name", "type": str, "help": "ID column name on CSV input files"},
26+
]
27+
28+
ID_COLUMN_NAME: str = "video_id"
29+
URL_COLUMN_NAME: str = "video_url"
30+
31+
@classmethod
32+
def execute(cls, **kwargs) -> str:
33+
"""Execute the video-transcription command to download transcriptions of videos from IDs or URLs and save them to a CSV file.
34+
35+
- a list of YouTube video IDs (`--ids`), or
36+
- a list of YouTube video URLs (`--urls`), or
37+
- a CSV file containing those URLs (`--urls-file-path`) or IDs (`--ids-file-path`).
38+
39+
Args:
40+
ids (list[str], optional): List of YouTube video IDs.
41+
Mutually exclusive with `urls` and `input_file_path`.
42+
urls (list[str], optional): List of YouTube video URLs.
43+
Mutually exclusive with `ids` and `input_file_path`.
44+
urls_file_path (Path, optional): Path to a CSV file containing YouTube video URLs.
45+
ids_file_path (Path, optional): Path to a CSV file containing YouTube video IDs.
46+
output_dir (Path, optional): Path to the output CSV file where video information will be saved.
47+
language_code (str): Language code for the transcription language.
48+
api_key (str): The API key to authenticate with the YouTube Data API.
49+
url_column_name (str, optional): Column name for URLs in the CSV input file. Defaults to "video_url".
50+
id_column_name (str, optional): Column name for IDs in the CSV output file. Defaults to "video_id".
51+
52+
Returns:
53+
str: A message indicating the result of the command. Reports success or failure for each video transcription download.
54+
"""
55+
ids = kwargs.get("ids") or []
56+
urls = kwargs.get("urls") or []
57+
ids_file_path = kwargs.get("ids_file_path")
58+
urls_file_path = kwargs.get("urls_file_path")
59+
output_dir = kwargs.get("output_dir")
60+
language_code = kwargs.get("language_code")
61+
api_key = kwargs.get("api_key")
62+
63+
url_column_name = kwargs.get("url_column_name") or VideoTranscription.URL_COLUMN_NAME
64+
id_column_name = kwargs.get("id_column_name") or VideoTranscription.ID_COLUMN_NAME
65+
66+
youtube = YouTube([api_key], disable_ipv6=True)
67+
68+
if ids_file_path:
69+
ids += cls.data_from_csv(ids_file_path, id_column_name)
70+
if urls_file_path:
71+
urls += cls.data_from_csv(urls_file_path, url_column_name)
72+
73+
if not ids and not urls:
74+
raise Exception("Either 'ids' or 'urls' must be provided for the video-transcription command")
75+
76+
if urls:
77+
ids += sum([cls.video_id_from_url(url) for url in urls], [])
78+
79+
# Remove duplicated
80+
ids = list(set(ids))
81+
youtube.videos_transcriptions(ids, language_code, output_dir)
82+
output_dir_path = Path(output_dir)
83+
saved_transcriptions = [
84+
str(output_dir_path / f"{v_id}.{language_code}.vtt")
85+
for v_id in ids
86+
if (output_dir_path / f"{v_id}.{language_code}.vtt").is_file()
87+
]
88+
return "\n".join(saved_transcriptions)

0 commit comments

Comments
 (0)