diff --git a/setup.cfg b/setup.cfg index 77478cb..36ff81d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,10 @@ packages = find: python_requires = >=3.7 install_requires = file: requirements/base.txt +[options.entry_points] +console_scripts = + youtool = youtool.cli:main + [options.extras_require] cli = file: requirements/cli.txt dev = file: requirements/dev.txt diff --git a/youtool/__init__.py b/youtool/__init__.py index 28bbe83..a8cca7f 100644 --- a/youtool/__init__.py +++ b/youtool/__init__.py @@ -11,7 +11,7 @@ import isodate # TODO: implement duration parser to remove dependency? import requests -REGEXP_CHANNEL_ID = re.compile('"externalId":"([^"]+)"') +REGEXP_CHANNEL_ID = re.compile('"channelId":"([^"]+)"') REGEXP_LOCATION_RADIUS = re.compile(r"^[0-9.]+(?:m|km|ft|mi)$") REGEXP_NAIVE_DATETIME = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}[T ][0-9]{2}:[0-9]{2}:[0-9]{2}$") REGEXP_DATETIME_MILLIS = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}[T ][0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+") diff --git a/youtool/cli.py b/youtool/cli.py new file mode 100644 index 0000000..517c150 --- /dev/null +++ b/youtool/cli.py @@ -0,0 +1,46 @@ +import argparse +import os + +from youtool.commands import COMMANDS + + +def main(): + """Main function for the YouTube CLI Tool. + + This function sets up the argument parser for the CLI tool, including options for the YouTube API key and + command-specific subparsers. It then parses the command-line arguments, retrieving the YouTube API key + from either the command-line argument '--api-key' or the environment variable 'YOUTUBE_API_KEY'. If the API + key is not provided through any means, it raises an argparse.ArgumentError. + + Finally, the function executes the appropriate command based on the parsed arguments. If an exception occurs + during the execution of the command, it is caught and raised as an argparse error for proper handling. + + Raises: + argparse.ArgumentError: If the YouTube API key is not provided. + argparse.ArgumentError: If there is an error during the execution of the command. + """ + parser = argparse.ArgumentParser(description="CLI Tool for managing YouTube videos add playlists") + parser.add_argument("--api-key", type=str, help="YouTube API Key", dest="api_key") + parser.add_argument("--debug", help="Debug mode", dest="debug", default=False, action="store_true") + + subparsers = parser.add_subparsers(required=True, dest="command", title="Command", help="Command to be executed") + + for command in COMMANDS: + command.parse_arguments(subparsers) + + args = parser.parse_args() + args.api_key = args.api_key or os.environ.get("YOUTUBE_API_KEY") + + if not args.api_key: + parser.error("YouTube API Key is required") + + try: + print(args.func(**args.__dict__)) + except Exception as error: + if args.debug: + raise error + parser.error(error) + + +if __name__ == "__main__": + main() diff --git a/youtool/commands/__init__.py b/youtool/commands/__init__.py new file mode 100644 index 0000000..f827894 --- /dev/null +++ b/youtool/commands/__init__.py @@ -0,0 +1,12 @@ +from .base import Command +from .channel_id import ChannelId +from .channel_info import ChannelInfo + +COMMANDS = [ChannelId, ChannelInfo] + +__all__ = [ + "Command", + "COMMANDS", + "ChannelId", + "ChannelInfo", +] diff --git a/youtool/commands/base.py b/youtool/commands/base.py new file mode 100644 index 0000000..cf3e7a9 --- /dev/null +++ b/youtool/commands/base.py @@ -0,0 +1,125 @@ +import argparse +import csv +from datetime import datetime +from io import StringIO +from pathlib import Path +from typing import Any, Dict, List, Optional + + +class Command: + """A base class for commands to inherit from, following a specific structure. + + Attributes: + name (str): The name of the command. + arguments (List[Dict[str, Any]]): A list of dictionaries, each representing an argument for the command. + """ + + name: str + arguments: List[Dict[str, Any]] + + @classmethod + def generate_parser(cls, subparsers: argparse._SubParsersAction): + """Creates a parser for the command and adds it to the subparsers. + + Args: + subparsers (argparse._SubParsersAction): The subparsers action to add the parser to. + + Returns: + argparse.ArgumentParser: The parser for the command. + """ + return subparsers.add_parser(cls.name, help=cls.__doc__) + + @classmethod + def parse_arguments(cls, subparsers: argparse._SubParsersAction) -> None: + """Parses the arguments for the command and sets the command's execute method as the default function to call. + + Args: + subparsers (argparse._SubParsersAction): The subparsers action to add the parser to. + """ + parser = cls.generate_parser(subparsers) + groups = {} + + for argument in cls.arguments: + argument_copy = {**argument} + argument_name = argument_copy.pop("name") + + group_name = argument_copy.pop("mutually_exclusive_group", None) + if group_name: + if group_name not in groups: + groups[group_name] = parser.add_argument_group(group_name) + groups[group_name].add_argument(argument_name, **argument_copy) + else: + parser.add_argument(argument_name, **argument_copy) + parser.set_defaults(func=cls.execute) + + @classmethod + def execute(cls, **kwargs) -> str: # noqa: D417 + """Executes the command. + + This method should be overridden by subclasses to define the command's behavior. + + Args: + arguments (argparse.Namespace): The parsed arguments for the command. + """ + raise NotImplementedError() + + @staticmethod + def data_from_csv(file_path: Path, data_column_name: Optional[str] = None) -> List[str]: + """Extracts a list of URLs from a specified CSV file. + + Args: + file_path: The path to the CSV file containing the URLs. + data_column_name: The name of the column in the CSV file that contains the URLs. + If not provided, it defaults to `ChannelId.URL_COLUMN_NAME`. + + Returns: + A list of URLs extracted from the specified CSV file. + + Raises: + Exception: If the file path is invalid or the file cannot be found. + """ + data = [] + + if not file_path.is_file(): + raise FileNotFoundError(f"Invalid file path: {file_path}") + + with file_path.open("r", newline="") as csv_file: + reader = csv.DictReader(csv_file) + fieldnames = reader.fieldnames + + if fieldnames is None: + raise ValueError("Fieldnames is None") + + if data_column_name not in fieldnames: + raise Exception(f"Column {data_column_name} not found on {file_path}") + for row in reader: + value = row.get(data_column_name) + if value is not None: + data.append(str(value)) + return data + + @classmethod + def data_to_csv(cls, data: List[Dict], output_file_path: Optional[str] = None) -> str: + """Converts a list of channel IDs into a CSV file. + + Parameters: + channels_ids (List[str]): List of channel IDs to be written to the CSV. + output_file_path (str, optional): Path to the file where the CSV will be saved. If not provided, the CSV will be returned as a string. + channel_id_column_name (str, optional): Name of the column in the CSV that will contain the channel IDs. + If not provided, the default value defined in ChannelId.CHANNEL_ID_COLUMN_NAME will be used. + + Returns: + str: The path of the created CSV file or, if no path is provided, the contents of the CSV as a string. + """ + if output_file_path: + output_path = Path(output_file_path) + if output_path.is_dir(): + command_name = cls.name.replace("-", "_") + timestamp = datetime.now().strftime("%M%S%f") + output_file_path = output_path / f"{command_name}_{timestamp}.csv" + + with Path(output_file_path).open("w", newline="") if output_file_path else StringIO() as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=list(data[0].keys()) if data else []) + writer.writeheader() + writer.writerows(data) + return str(output_file_path) if output_file_path else csv_file.getvalue() diff --git a/youtool/commands/channel_id.py b/youtool/commands/channel_id.py new file mode 100644 index 0000000..5bf45ad --- /dev/null +++ b/youtool/commands/channel_id.py @@ -0,0 +1,92 @@ +from pathlib import Path + +from youtool import YouTube + +from .base import Command + + +class ChannelId(Command): + """Get channel IDs from a list of URLs (or CSV filename with URLs inside), generate CSV output (just the IDs).""" + + name = "channel-id" + arguments = [ + { + "name": "--urls", + "type": str, + "help": "Channels urls", + "nargs": "*", + "mutually_exclusive_group": "input_source", + }, + { + "name": "--urls-file-path", + "type": Path, + "help": "Channels urls csv file path", + "mutually_exclusive_group": "input_source", + }, + {"name": "--output-file-path", "type": Path, "help": "Output csv file path"}, + {"name": "--url-column-name", "type": str, "help": "URL column name on csv input files"}, + {"name": "--id-column-name", "type": str, "help": "Channel ID column name on csv output files"}, + ] + + URL_COLUMN_NAME: str = "channel_url" + CHANNEL_ID_COLUMN_NAME: str = "channel_id" + + @classmethod + def execute(cls, **kwargs) -> str: + """Execute the channel-id command to fetch YouTube channel IDs from URLs and save them to a CSV file. + + This command retrieves YouTube channel IDs from one of two possible inputs: + - a list of YouTube channel URLs (`--urls`), or + - a CSV file containing those URLs (`--urls-file-path`). + + Args: + urls (list[str]): List of YouTube channel URLs. + Mutually exclusive with `urls_file_path`. + urls_file_path (Path): Path to a CSV file containing YouTube channel URLs. + Mutually exclusive with `urls`. + Requires url_column_name to specify the column with URLs. + output_file_path (Path, optional): Path to the output CSV file where channel IDs will be saved. + If not provided, the result will be returned as a string. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): The name of the column in the urls_file_path CSV file that contains the URLs. + Default is "url". + id_column_name (str, optional): The name of the column for channel IDs in the output CSV file. + Default is "channel_id". + + Returns: + str: A message indicating the result of the command. If output_file_path is specified, the message will + include the path to the generated CSV file. Otherwise, it will return the result as a string. + + Raises: + ValueError: If neither `urls` nor `urls_file_path` is provided, or if both are provided at the same time. + """ + urls = kwargs.get("urls") or [] + urls_file_path = kwargs.get("urls_file_path") + output_file_path = kwargs.get("output_file_path") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") + id_column_name = kwargs.get("id_column_name") + + urls = cls.resolve_urls(urls, urls_file_path, url_column_name) + + youtube = YouTube([api_key], disable_ipv6=True) + + channels_ids = [youtube.channel_id_from_url(url) for url in urls if url] + + result = cls.data_to_csv( + data=[{(id_column_name or cls.CHANNEL_ID_COLUMN_NAME): channel_id} for channel_id in channels_ids], + output_file_path=output_file_path, + ) + + return result + + @classmethod + def resolve_urls(cls, urls, urls_file_path, url_column_name): + if urls_file_path: + urls += cls.data_from_csv( + file_path=Path(urls_file_path), data_column_name=url_column_name or cls.URL_COLUMN_NAME + ) + if not urls: + raise Exception("Either 'username' or 'url' must be provided for the channel-id command") + return urls diff --git a/youtool/commands/channel_info.py b/youtool/commands/channel_info.py new file mode 100644 index 0000000..5962774 --- /dev/null +++ b/youtool/commands/channel_info.py @@ -0,0 +1,145 @@ +from pathlib import Path +from typing import Dict, List, Optional, Self + +from youtool import YouTube + +from .base import Command + + +class ChannelInfo(Command): + """Get channel info from a list of IDs (or CSV filename with IDs inside), generate CSV output + (same schema for `channel` dicts) + """ + + name = "channel-info" + arguments = [ + { + "name": "--urls", + "type": str, + "help": "Channel URLs", + "nargs": "*", + "mutually_exclusive_group": "input_source", + }, + {"name": "--usernames", "type": str, "help": "Channel usernames", "nargs": "*"}, + {"name": "--ids", "type": str, "help": "Channel IDs", "nargs": "*"}, + { + "name": "--urls-file-path", + "type": Path, + "help": "Channel URLs CSV file path", + "mutually_exclusive_group": "input_source", + }, + {"name": "--usernames-file-path", "type": Path, "help": "Channel usernames CSV file path"}, + {"name": "--ids-file-path", "type": Path, "help": "Channel IDs CSV file path"}, + {"name": "--output-file-path", "type": Path, "help": "Output CSV file path"}, + {"name": "--url-column-name", "type": str, "help": "URL column name on CSV input files"}, + {"name": "--username-column-name", "type": str, "help": "Username column name on CSV input files"}, + {"name": "--id-column-name", "type": str, "help": "ID column name on CSV input files"}, + ] + + URL_COLUMN_NAME: str = "channel_url" + USERNAME_COLUMN_NAME: str = "channel_username" + ID_COLUMN_NAME: str = "channel_id" + INFO_COLUMNS: List[str] = [ + "id", + "title", + "description", + "published_at", + "view_count", + "subscriber_count", + "video_count", + ] + + @staticmethod + def filter_fields(channel_info: Dict, info_columns: Optional[List] = None): + """Filters the fields of a dictionary containing channel information based on + specified columns. + + Args: + channel_info (Dict): A dictionary containing channel information. + info_columns (Optional[List], optional): A list specifying which fields + to include in the filtered output. If None, returns the entire + channel_info dictionary. Defaults to None. + + Returns: + Dict: A dictionary containing only the fields specified in info_columns + (if provided) or the entire channel_info dictionary if info_columns is None. + """ + return ( + {field: value for field, value in channel_info.items() if field in info_columns} + if info_columns + else channel_info + ) + + @classmethod + def execute(cls: Self, **kwargs) -> str: + """Execute the channel-info command to fetch YouTube channel information from URLs or + usernames and save them to a CSV file. + + Args: + urls (list[str], optional): A list of YouTube channel URLs. If not provided, `urls_file_path` must be specified. + usernames (list[str], optional): A list of YouTube channel usernames. If not provided, `usernames_file_path` must be specified. + ids (list[str], optional): A list of YouTube channel IDs. If not provided, `ids_file_path` must be specified. + urls_file_path (Path, optional): Path to a CSV file containing YouTube channel URLs. + usernames_file_path (Path, optional): Path to a CSV file containing YouTube channel usernames. + output_file_path (Path, optional): Path to the output CSV file where channel information will be saved. + ids_file_path (Path, optional): Path to a CSV file containing YouTube channel IDs. + api_key (str): The API key to authenticate with the YouTube Data API. + url_column_name (str, optional): The name of the column in the `urls_file_path` CSV file that contains the URLs. + Default is "channel_url". + username_column_name (str, optional): The name of the column in the `usernames_file_path` CSV file that contains the usernames. + Default is "channel_username". + info_columns (str, optional): Comma-separated list of columns to include in the output CSV. + Default is the class attribute `INFO_COLUMNS`. + + Returns: + str: A message indicating the result of the command. If `output_file_path` is specified, the message will + include the path to the generated CSV file. Otherwise, it will return the result as a string. + + Raises: + Exception: If neither `urls`, `usernames`, `urls_file_path` nor `usernames_file_path` is provided. + """ + + urls = kwargs.get("urls") or [] + usernames = kwargs.get("usernames") or [] + ids = kwargs.get("ids") or [] + urls_file_path = kwargs.get("urls_file_path") + usernames_file_path = kwargs.get("usernames_file_path") + output_file_path = kwargs.get("output_file_path") + ids_file_path = kwargs.get("ids_file_path") + api_key = kwargs.get("api_key") + + url_column_name = kwargs.get("url_column_name") or ChannelInfo.URL_COLUMN_NAME + username_column_name = kwargs.get("username_column_name") or ChannelInfo.USERNAME_COLUMN_NAME + id_column_name = kwargs.get("id_column_name") or ChannelInfo.ID_COLUMN_NAME + info_columns = kwargs.get("info_columns") + + info_columns = ( + [column.strip() for column in info_columns.split(",")] if info_columns else ChannelInfo.INFO_COLUMNS + ) + + if urls_file_path: + urls += ChannelInfo.data_from_csv(urls_file_path, url_column_name) + if usernames_file_path: + usernames += ChannelInfo.data_from_csv(usernames_file_path, username_column_name) + if ids_file_path: + ids += ChannelInfo.data_from_csv(ids_file_path, id_column_name) + + if not urls and not usernames and not ids: + raise Exception("Either 'urls', 'usernames', or 'ids' must be provided for the channel-info command") + + youtube = YouTube([api_key], disable_ipv6=True) + + channels_ids = ( + [youtube.channel_id_from_url(url) for url in (urls or []) if url] + + [youtube.channel_id_from_username(username) for username in (usernames or []) if username] + + (ids or []) + ) + channel_ids = list(set([channel_id for channel_id in channels_ids if channel_id])) + return cls.data_to_csv( + data=[ + ChannelInfo.filter_fields(channel_info, info_columns) + for channel_info in (youtube.channels_infos(channel_ids) or []) + if channel_info + ], + output_file_path=output_file_path, + )