From 924c96c8a28017b940343751b0c46267c7f99210 Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Fri, 6 Sep 2024 12:36:27 -0700 Subject: [PATCH 1/2] [WIP] feat(/sources): add bulk DELETE endpoint Strict addition to the v1 Journalist API for deleting sources in bulk with O(1) instead O(n) latency from the perspective of a remote (over Tor) client. No breaking changes; no side effects. Deliberately overdocumented for now. You can test with "make dev" with (e.g.): $ curl -X DELETE -d "$(curl http://localhost:8081/api/v1/sources | jq ['.sources[] | .uuid]')" -H "Content-Type: application/json" http://localhost:8081/api/v1/sources % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-100 16231 100 16231 0 0 489k 0 --:--:-- --:--:-- --:--:-- 495k { "failed": [], "succeeded": [ "cf7bd746-80ce-4e1f-8c94-668fde45347b", "57d67752-4a63-4d72-8177-4e883248abe7", "bd900dbd-7bfa-4a50-b485-b969a2f0351e" ] } --- securedrop/journalist_app/api.py | 52 +++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/securedrop/journalist_app/api.py b/securedrop/journalist_app/api.py index a811e72adf..2b68a8ae91 100644 --- a/securedrop/journalist_app/api.py +++ b/securedrop/journalist_app/api.py @@ -8,7 +8,7 @@ import flask import werkzeug from db import db -from flask import Blueprint, abort, jsonify, request +from flask import Blueprint, abort, current_app, jsonify, request from journalist_app import utils from journalist_app.sessions import session from models import ( @@ -23,6 +23,7 @@ ) from sqlalchemy import Column from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm.exc import NoResultFound from store import NotEncrypted, Storage from two_factor import OtpSecretInvalid, OtpTokenInvalid from werkzeug.exceptions import default_exceptions @@ -124,6 +125,55 @@ def get_all_sources() -> Tuple[flask.Response, int]: sources = Source.query.filter_by(pending=False, deleted_at=None).all() return jsonify({"sources": [source.to_json() for source in sources]}), 200 + @api.route("/sources", methods=["DELETE"]) + def delete_sources() -> Tuple[flask.Response, int]: + """ + Given a list of `Source` UUIDs, iterate over the list and try to delete + each `Source`. Return HTTP 200 "Success" if all `Source`s were deleted + or HTTP 207 "Multi-Status" if some failed. (There's an argument for + HTTP 202 "Accepted", since filesystem-level deletion is still deferred + to the shredder; but that's an implementation detail that we can hide + from the client.) + + NB. According to RFC 9110 §9.3.5, a client may not assume that a DELETE + endpoint will accept a request body, but a DELETE endpoint may do so, + and in our case we can rule out middleboxes that might mangle it in + transit. + """ + if not isinstance(request.json, list): + abort(400, "no sources specified") + + succeeded = [] + failed = [] + for source_uuid in request.json: + try: + # Don't use `get_or_404()`: we'll handle the `NoResultFound` + # case ourselves, rather than abort with HTTP 404. + source = Source.query.filter(Source.uuid == source_uuid).one() + utils.delete_collection(source.filesystem_id) + succeeded.append(source_uuid) + + # Deletion is idempotent, so count nonexistent `Source`s as + # successes. + except NoResultFound: + succeeded.append(source_uuid) + + except Exception as exc: + current_app.logger.error(f"Failed to delete source {source_uuid}: {exc}") + failed.append(source_uuid) + + # Return the lists of both failed and succeeded deletion operations no + # matter what, so that the client can act directly on the results. + return ( + jsonify( + { + "failed": failed, + "succeeded": succeeded, + } + ), + 200 if len(failed) == 0 else 207, # Success or Multi-Status + ) + @api.route("/sources/", methods=["GET", "DELETE"]) def single_source(source_uuid: str) -> Tuple[flask.Response, int]: if request.method == "GET": From 6253662f6ad0ba028ed3a851b36bf94318b1b198 Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Thu, 12 Sep 2024 14:40:52 -0700 Subject: [PATCH 2/2] feat(/sources): limit DELETE requests to MAX_BATCH_SIZE --- securedrop/journalist_app/api.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/securedrop/journalist_app/api.py b/securedrop/journalist_app/api.py index 2b68a8ae91..9816bf9907 100644 --- a/securedrop/journalist_app/api.py +++ b/securedrop/journalist_app/api.py @@ -28,6 +28,8 @@ from two_factor import OtpSecretInvalid, OtpTokenInvalid from werkzeug.exceptions import default_exceptions +MAX_BATCH_SIZE = 50 # requests + def get_or_404(model: db.Model, object_id: str, column: Column) -> db.Model: result = model.query.filter(column == object_id).one_or_none() @@ -128,24 +130,30 @@ def get_all_sources() -> Tuple[flask.Response, int]: @api.route("/sources", methods=["DELETE"]) def delete_sources() -> Tuple[flask.Response, int]: """ - Given a list of `Source` UUIDs, iterate over the list and try to delete - each `Source`. Return HTTP 200 "Success" if all `Source`s were deleted - or HTTP 207 "Multi-Status" if some failed. (There's an argument for - HTTP 202 "Accepted", since filesystem-level deletion is still deferred - to the shredder; but that's an implementation detail that we can hide - from the client.) + Given a list of at most `MAX_BATCH_SIZE `Source` UUIDs, iterate over the + list and try to delete each `Source`. Return HTTP 200 "Success" if all + `Source`s were deleted or HTTP 207 "Multi-Status" if some failed. + (There's an argument for HTTP 202 "Accepted", since filesystem-level + deletion is still deferred to the shredder; but that's an implementation + detail that we can hide from the client.) + + Batching (under `MAX_BATCH_SIZE`) and retrying (of `Source`s returned as + `failed`) are responsibilities of the client. NB. According to RFC 9110 §9.3.5, a client may not assume that a DELETE endpoint will accept a request body, but a DELETE endpoint may do so, and in our case we can rule out middleboxes that might mangle it in transit. """ - if not isinstance(request.json, list): + data = request.json + if not isinstance(data, list): abort(400, "no sources specified") + elif len(data) > MAX_BATCH_SIZE: + abort(413, f"bulk requests may have at most {MAX_BATCH_SIZE} items") succeeded = [] failed = [] - for source_uuid in request.json: + for source_uuid in data: try: # Don't use `get_or_404()`: we'll handle the `NoResultFound` # case ourselves, rather than abort with HTTP 404.