diff --git a/internetarchive/__init__.py b/internetarchive/__init__.py index 3a1e9df3..8f720399 100644 --- a/internetarchive/__init__.py +++ b/internetarchive/__init__.py @@ -50,6 +50,7 @@ get_user_info, get_username, modify_metadata, + rerun_task, search_items, upload, ) diff --git a/internetarchive/api.py b/internetarchive/api.py index f97d5d41..aeba7891 100644 --- a/internetarchive/api.py +++ b/internetarchive/api.py @@ -535,6 +535,25 @@ def search_items( ) +def rerun_task( + identifier: str, + task_id: int, +) -> requests.Request | requests.Response: + """Rerun a task. + + :param identifier: The Archive.org identifier for which to rerun the task. + + :param task_id: The task ID to rerun. + + :param archive_session: An :class:`ArchiveSession` object can be provided. + + :returns: A :class:`requests.Response` object. + """ + archive_session = get_session() + item = archive_session.get_item(identifier) + return item.rerun_task(task_id) + + def configure( # nosec: hardcoded_password_default username: str = "", password: str = "", diff --git a/internetarchive/catalog.py b/internetarchive/catalog.py index a5a7e42e..36180500 100644 --- a/internetarchive/catalog.py +++ b/internetarchive/catalog.py @@ -266,6 +266,7 @@ class CatalogTask: """ def __init__(self, task_dict: Mapping, catalog_obj: Catalog): self.session = catalog_obj.session + self.auth = catalog_obj.auth self.request_kwargs = catalog_obj.request_kwargs self.color = None self.task_dict = task_dict @@ -329,3 +330,31 @@ def get_task_log( r = session.get(url, params=params, auth=_auth, **request_kwargs) r.raise_for_status() return r.content.decode('utf-8', errors='surrogateescape') + + def rerun(self): + """Rerun the task. + + Sends a PUT request to rerun the task and returns the response. + + :returns: A dictionary containing the response from the server. + """ + task_id = self.task_id # type: ignore + if task_id is None: + raise ValueError('task_id is None') + + url = f"{self.session.protocol}//{self.session.host}/services/tasks.php" + + data = json.dumps({ + "op": "rerun", + "task_id": task_id + }) + + response = self.session.put( + url, + data=data, + auth=self.auth, + **self.request_kwargs + ) + + response.raise_for_status() + return response diff --git a/internetarchive/cli/ia_tasks.py b/internetarchive/cli/ia_tasks.py index 5d6f6f96..232a143b 100644 --- a/internetarchive/cli/ia_tasks.py +++ b/internetarchive/cli/ia_tasks.py @@ -23,6 +23,8 @@ import sys import warnings +from requests.exceptions import HTTPError + from internetarchive.cli.cli_utils import PostDataAction, QueryStringAction from internetarchive.utils import json @@ -40,8 +42,10 @@ def setup(subparsers): parser.add_argument("-t", "--task", nargs="*", + metavar="TASK_ID", help="Return information about the given task.") parser.add_argument("-G", "--get-task-log", + metavar="TASK_ID", help="Return the given tasks task log.") parser.add_argument("-p", "--parameter", nargs="+", @@ -80,24 +84,14 @@ def setup(subparsers): type=str, nargs="?", help="Identifier for tasks specific operations.") + parser.add_argument("-R", "--rerun", + type=int, + metavar="TASK_ID", + help="Rerun the specified task.") parser.set_defaults(func=lambda args: main(args, parser)) -def handle_task_submission_result(result, cmd): - """ - Handle the result of a task submission. - """ - if result.get("success"): - task_log_url = result.get("value", {}).get("log") - print(f"success: {task_log_url}", file=sys.stderr) - elif "already queued/running" in result.get("error", ""): - print(f"success: {cmd} task already queued/running", file=sys.stderr) - else: - print(f"error: {result.get('error')}", file=sys.stderr) - sys.exit(0 if result.get("success") else 1) - - def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: """ Main entry point for 'ia tasks'. @@ -115,7 +109,31 @@ def main(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None: priority=int(args.data.get("priority", 0)), reduced_priority=args.reduced_priority, data=args.data) - handle_task_submission_result(r.json(), args.cmd) + j = r.json() + if j.get("success"): + task_log_url = j.get("value", {}).get("log") + print(f"success: {task_log_url}", file=sys.stderr) + elif "already queued/running" in j.get("error", ""): + print(f"success: {args.cmd} task already queued/running", file=sys.stderr) + else: + print(f"error: {j.get('error')}", file=sys.stderr) + sys.exit(0 if j.get("success") else 1) + elif args.rerun: + if not args.identifier: + parser.error('The positional argument `identifier` ' + 'is required when using `--rerun`.') + item = args.session.get_item(args.identifier) + try: + r = item.rerun_task(args.rerun) + except HTTPError as exc: + if exc.response.status_code == 409: + print(f"warning: task {args.rerun} " + f"for item '{args.identifier}' " + "does not need to be reran") + sys.exit(0) + j = r.json() + if j.get("success"): + print(f"success: Reran task {args.rerun} for item '{args.identifier}'") sys.exit(0) # Tasks read API. diff --git a/internetarchive/item.py b/internetarchive/item.py index e695df72..7b4f83cb 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -485,6 +485,17 @@ def dark(self, r.raise_for_status() return r + def rerun_task(self, task_id: int) -> Response: # type: ignore + """Rerun a task. + + :returns: :class:`requests.Response` if the task is found, otherwise None. + """ + if isinstance(task_id, str): + task_id = int(task_id) + for t in self.get_catalog(): + if t.task_id == task_id: # type: ignore[attr-defined] + return t.rerun() + def get_review(self) -> Response: u = f'{self.session.protocol}//{self.session.host}/services/reviews.php' p = {'identifier': self.identifier}