Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4846c76
Fix regex for channel ID extraction
Nov 7, 2025
2d7b47f
Implement YouTube CLI Tool with command structure and channel ID extr…
Nov 7, 2025
89983c7
Make lint
Nov 7, 2025
62896c4
Add entry points for youtool CLI in setup configuration
Nov 7, 2025
660edaf
Update execute method documentation for channel-id command to clarify…
Nov 7, 2025
c1a0f33
Add debug mode option to CLI and update argument types for channel ID…
Nov 8, 2025
b6a65a7
Add ChannelInfo command to retrieve YouTube channel information and u…
Nov 8, 2025
0c1db6d
Make lint
Nov 8, 2025
3d44ac9
Refactor ChannelInfo command to improve argument handling and ensure …
Nov 12, 2025
65b07e1
Add VideoInfo command to retrieve YouTube video information and updat…
Nov 12, 2025
95ff0bc
Refactor filter_fields from ChannelInfo to base.py for reuse across c…
Nov 12, 2025
bcc65a5
Add VideoSearch command to retrieve and export YouTube video information
Nov 13, 2025
bf0eebe
Add VideoComments command to retrieve and export YouTube video comments
Nov 13, 2025
08e5b3b
Update output file path type to Path in VideoComments command
Nov 14, 2025
598f4e4
Make lint
Nov 21, 2025
08cc8d4
Add VideoLiveChat command to retrieve and export live chat comments f…
Nov 14, 2025
049ac90
Add VideoTranscription command to download and save YouTube video tra…
Nov 21, 2025
e40301c
Make lint
Nov 21, 2025
3bae4cc
Refactor argument parser to improve API key handling and add default …
Nov 22, 2025
2f2b45e
Enhance argument parser to support multiple argument names for comman…
Nov 22, 2025
c20ea80
Add short options for command arguments in ChannelId
Nov 22, 2025
305a9fe
Add short options for arguments in ChannelInfo command
Nov 22, 2025
0de9928
Add short options for arguments in VideoComments command
Nov 22, 2025
4c36066
Add short options for arguments in VideoInfo command
Nov 22, 2025
06bdd2e
Add short options for arguments in VideoLiveChat command
Nov 22, 2025
98a7720
Add short options for arguments in VideoSearch command
Nov 22, 2025
af9dde6
Add short options for arguments in VideoTranscription command
Nov 22, 2025
1133cf7
Add YouToolChatDownloader and YouTubeCD classes for enhanced chat dat…
Nov 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
isodate
requests
requests==2.32.4
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ packages = find:
python_requires = >=3.7
install_requires = file: requirements/base.txt

[options.entry_points]
console_scripts =
youtool = youtool.cli:main

[options.extras_require]
cli = file: requirements/cli.txt
dev = file: requirements/dev.txt
Expand Down
7 changes: 4 additions & 3 deletions youtool/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import isodate # TODO: implement duration parser to remove dependency?
import requests

REGEXP_CHANNEL_ID = re.compile('"externalId":"([^"]+)"')
REGEXP_CHANNEL_ID = re.compile('"channelId":"([^"]+)"')
REGEXP_LOCATION_RADIUS = re.compile(r"^[0-9.]+(?:m|km|ft|mi)$")
REGEXP_NAIVE_DATETIME = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}[T ][0-9]{2}:[0-9]{2}:[0-9]{2}$")
REGEXP_DATETIME_MILLIS = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}[T ][0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+")
Expand Down Expand Up @@ -519,10 +519,11 @@ def video_comments(self, video_id: str):
yield parse_comment_data(reply)

def video_livechat(self, video_id: str, expand_emojis=True):
from chat_downloader import ChatDownloader
from chat_downloader.errors import ChatDisabled, LoginRequired, NoChatReplay

downloader = ChatDownloader()
from youtool.chat_downloader import YouToolChatDownloader

downloader = YouToolChatDownloader()
video_url = f"https://youtube.com/watch?v={video_id}"
try:
live = downloader.get_chat(video_url, message_groups=["messages", "superchat"])
Expand Down
142 changes: 142 additions & 0 deletions youtool/chat_downloader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import itertools
import time
from urllib.parse import urlparse

from chat_downloader.chat_downloader import ChatDownloader
from chat_downloader.debugging import log
from chat_downloader.errors import ChatGeneratorError, InvalidURL, SiteNotSupported, URLNotProvided
from chat_downloader.formatting.format import ItemFormatter
from chat_downloader.output.continuous_write import ContinuousWriter
from chat_downloader.sites.common import SiteDefault
from chat_downloader.utils.timed_utils import TimedGenerator

from .youtube import YouTubeCD


class YouToolChatDownloader(ChatDownloader):
"""
YouTool Chat Downloader subclass to fix YouTube data parsing issues
"""

def get_chat(
self,
url=None,
start_time=None,
end_time=None,
max_attempts=15,
retry_timeout=None,
interruptible_retry=True,
timeout=None,
inactivity_timeout=None,
max_messages=None,
message_groups=SiteDefault("message_groups"),
message_types=None,
output=None,
overwrite=True,
sort_keys=True,
indent=4,
format=SiteDefault("format"),
format_file=None,
chat_type="live",
ignore=None,
message_receive_timeout=0.1,
buffer_size=4096,
):
"""
Override get_chat to use YouTubeCD instead of YouTubeChatDownloader
"""
if not url:
raise URLNotProvided("No URL provided.")

original_params = locals()
original_params.pop("self")

# loop through all websites and
# get corresponding website parser
# based on matching url with predefined regex
site = YouTubeCD
match_info = site.matches(url)
if match_info: # match found

function_name, match = match_info

# Create new session
self.create_session(site)
site_object = self.sessions[site.__name__]

# Parse site-defaults
params = {}
for k, v in original_params.items():
params[k] = site_object.get_site_value(v)

log("info", f"Site: {site_object._NAME}")
log("debug", f"Program parameters: {params}")

get_chat = getattr(site_object, function_name, None)
if not get_chat:
raise NotImplementedError(f"{function_name} has not been implemented in {site.__name__}.")

chat = get_chat(match, params)
log("debug", f'Match found: "{match}". Running "{function_name}" function in "{site.__name__}".')

if chat is None:
raise ChatGeneratorError(f'No valid generator found in {site.__name__} for url "{url}"')

if isinstance(params["max_messages"], int):
chat.chat = itertools.islice(chat.chat, params["max_messages"])
else:
pass # TODO throw error

if params["timeout"] is not None or params["inactivity_timeout"] is not None:
# Generator requires timing functionality

chat.chat = TimedGenerator(chat.chat, params["timeout"], params["inactivity_timeout"])

if isinstance(params["timeout"], (float, int)):
start = time.time()

def log_on_timeout():
log("debug", f"Timeout occurred after {time.time() - start} seconds.")

setattr(chat.chat, "on_timeout", log_on_timeout)

if isinstance(params["inactivity_timeout"], (float, int)):

def log_on_inactivity_timeout():
log("debug", f"Inactivity timeout occurred after {params['inactivity_timeout']} seconds.")

setattr(chat.chat, "on_inactivity_timeout", log_on_inactivity_timeout)

formatter = ItemFormatter(params["format_file"])
chat.format = lambda x: formatter.format(x, format_name=params["format"])

if params["output"]:
chat.attach_writer(
ContinuousWriter(
params["output"],
indent=params["indent"],
sort_keys=params["sort_keys"],
overwrite=params["overwrite"],
lazy_initialise=True,
)
)

chat.site = site_object

log("debug", f"Chat information: {chat.__dict__}")
log("info", f'Retrieving chat for "{chat.title}".')

return chat

parsed = urlparse(url)
log("debug", str(parsed))

if parsed.netloc:
raise SiteNotSupported(f"Site not supported: {parsed.netloc}")
elif not parsed.scheme: # No scheme, try to correct
original_params["url"] = "https://" + url
chat = self.get_chat(**original_params)
if chat:
return chat
else:
raise InvalidURL(f'Invalid URL: "{url}"')
123 changes: 123 additions & 0 deletions youtool/chat_downloader/youtube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from chat_downloader.sites.youtube import YouTubeChatDownloader
from chat_downloader.utils.core import float_or_none, multi_get, parse_iso8601, regex_search, try_parse_json


class YouTubeCD(YouTubeChatDownloader):
"""
YouTube Chat Downloader subclass to fix YouTube data parsing issues
"""

def _parse_video_data(self, video_id, params=None, video_type="video"):
details = {}

if video_type == "clip":
original_url = self._YT_CLIP_TEMPLATE.format(video_id)
else: # video_type == 'video'
original_url = self._YT_VIDEO_TEMPLATE.format(video_id)

yt_initial_data, ytcfg, player_response_info = self._get_initial_info(original_url, params)

streaming_data = player_response_info.get("streamingData") or {}
first_format = multi_get(streaming_data, "adaptiveFormats", 0) or multi_get(streaming_data, "formats", 0) or {}

# Live streaming details
player_renderer = multi_get(player_response_info, "microformat", "playerMicroformatRenderer") or {}
live_details = player_renderer.get("liveBroadcastDetails") or {}

# Video info
video_details = player_response_info.get("videoDetails") or {}
details["title"] = video_details.get("title")
details["author"] = video_details.get("author")
details["author_id"] = video_details.get("channelId")
details["original_video_id"] = video_details.get("videoId")

# Clip info
clip_details = player_response_info.get("clipConfig")
if clip_details:
details["clip_start_time"] = float_or_none(clip_details.get("startTimeMs", 0)) / 1e3
details["clip_end_time"] = float_or_none(clip_details.get("endTimeMs", 0)) / 1e3
details["video_type"] = "clip"

elif not video_details.get("isLiveContent"):
details["video_type"] = "premiere"

else:
details["video_type"] = "video"

start_timestamp = live_details.get("startTimestamp")
end_timestamp = live_details.get("endTimestamp")
details["start_time"] = parse_iso8601(start_timestamp) if start_timestamp else None
details["end_time"] = parse_iso8601(end_timestamp) if end_timestamp else None

details["duration"] = (
(float_or_none(first_format.get("approxDurationMs", 0)) / 1e3)
or float_or_none(video_details.get("lengthSeconds"))
or float_or_none(player_renderer.get("lengthSeconds"))
)

if not details["duration"] and details["start_time"] and details["end_time"]:
details["duration"] = (details["end_time"] - details["start_time"]) / 1e6

# Parse continuation info
sub_menu_items = (
multi_get(
yt_initial_data,
"contents",
"twoColumnWatchNextResults",
"conversationBar",
"liveChatRenderer",
"header",
"liveChatHeaderRenderer",
"viewSelector",
"sortFilterSubMenuRenderer",
"subMenuItems",
)
or {}
)
details["continuation_info"] = {
x["title"]: x["continuation"]["reloadContinuationData"]["continuation"] for x in sub_menu_items
}

# live, upcoming or past
if video_details.get("isLive") or live_details.get("isLiveNow"):
details["status"] = "live"

elif video_details.get("isUpcoming"):
details["status"] = "upcoming"

else:
details["status"] = "past"

try:
client_continuation = yt_initial_data["contents"]["twoColumnWatchNextResults"]["conversationBar"][
"liveChatRenderer"
]["continuations"][0]["reloadContinuationData"]["continuation"]

if details["status"] != "past":
response = self._session_get(f"https://www.youtube.com/live_chat?continuation={client_continuation}")
else:
response = self._session_get(
f"https://www.youtube.com/live_chat_replay?continuation={client_continuation}"
)

html = response.text
yt = regex_search(html, self._YT_INITIAL_DATA_RE)
dictLiveChats = try_parse_json(yt)

continuations = dictLiveChats["continuationContents"]["liveChatContinuation"]["header"][
"liveChatHeaderRenderer"
]["viewSelector"]["sortFilterSubMenuRenderer"]["subMenuItems"]

top_continuation = continuations[0]["continuation"]["reloadContinuationData"]["continuation"]
live_continuation = continuations[1]["continuation"]["reloadContinuationData"]["continuation"]

if details["status"] != "past":
details["continuation_info"]["Top chat"] = top_continuation
details["continuation_info"]["Live chat"] = live_continuation
else:
details["continuation_info"]["Top chat replay"] = top_continuation
details["continuation_info"]["Live chat replay"] = live_continuation
except:
pass

return details, player_response_info, yt_initial_data, ytcfg
53 changes: 53 additions & 0 deletions youtool/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import argparse
import os

from youtool.commands import COMMANDS


def main():
"""Main function for the YouTube CLI Tool.

This function sets up the argument parser for the CLI tool, including options for the YouTube API key and
command-specific subparsers. It then parses the command-line arguments, retrieving the YouTube API key
from either the command-line argument '--api-key' or the environment variable 'YOUTUBE_API_KEY'. If the API
key is not provided through any means, it raises an argparse.ArgumentError.

Finally, the function executes the appropriate command based on the parsed arguments. If an exception occurs
during the execution of the command, it is caught and raised as an argparse error for proper handling.

Raises:
argparse.ArgumentError: If the YouTube API key is not provided.
argparse.ArgumentError: If there is an error during the execution of the command.
"""
parser = argparse.ArgumentParser(description="CLI Tool for managing YouTube videos add playlists")
parser.add_argument(
"-k",
"--api-key",
type=str,
help="YouTube API key (defaults to environment variable YOUTUBE_API_KEY)",
default=os.getenv("YOUTUBE_API_KEY"),
dest="api_key",
)
parser.add_argument("-d", "--debug", help="Debug mode", dest="debug", default=False, action="store_true")

subparsers = parser.add_subparsers(required=True, dest="command", title="Command", help="Command to be executed")

for command in COMMANDS:
command.parse_arguments(subparsers)

args = parser.parse_args()
args.api_key = args.api_key or os.environ.get("YOUTUBE_API_KEY")

if not args.api_key:
parser.error("YouTube API Key is required")

try:
print(args.func(**args.__dict__))
except Exception as error:
if args.debug:
raise error
parser.error(error)


if __name__ == "__main__":
main()
32 changes: 32 additions & 0 deletions youtool/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from typing import List

from .base import Command
from .channel_id import ChannelId
from .channel_info import ChannelInfo
from .video_comments import VideoComments
from .video_info import VideoInfo
from .video_livechat import VideoLiveChat
from .video_search import VideoSearch
from .video_transcription import VideoTranscription

COMMANDS: List[Command] = [
ChannelId,
ChannelInfo,
VideoInfo,
VideoSearch,
VideoComments,
VideoLiveChat,
VideoTranscription,
]

__all__ = [
"Command",
"COMMANDS",
"ChannelId",
"ChannelInfo",
"VideoInfo",
"VideoSearch",
"VideoComments",
"VideoLiveChat",
"VideoTranscription",
]
Loading