From 739eab7490fb4121e6dd1166beac8e300d7591e6 Mon Sep 17 00:00:00 2001 From: tianfengguo Date: Sat, 30 May 2026 02:50:31 +0800 Subject: [PATCH] feat: add GDPR-compliant PII export and delete workflow (Closes #76) --- packages/backend/app/__init__.py | 19 ++ packages/backend/app/db/schema.sql | 1 + packages/backend/app/models.py | 1 + packages/backend/app/routes/__init__.py | 2 + packages/backend/app/routes/gdpr.py | 82 +++++++ packages/backend/app/services/gdpr.py | 119 +++++++++ packages/backend/tests/conftest.py | 37 +++ packages/backend/tests/test_gdpr.py | 313 ++++++++++++++++++++++++ 8 files changed, 574 insertions(+) create mode 100644 packages/backend/app/routes/gdpr.py create mode 100644 packages/backend/app/services/gdpr.py create mode 100644 packages/backend/tests/test_gdpr.py diff --git a/packages/backend/app/__init__.py b/packages/backend/app/__init__.py index cdf76b45f..828b37f21 100644 --- a/packages/backend/app/__init__.py +++ b/packages/backend/app/__init__.py @@ -118,3 +118,22 @@ def _ensure_schema_compatibility(app: Flask) -> None: conn.rollback() finally: conn.close() + + # GDPR: add ip_address column to audit_logs if missing + conn = db.engine.raw_connection() + try: + cur = conn.cursor() + cur.execute( + """ + ALTER TABLE audit_logs + ADD COLUMN IF NOT EXISTS ip_address VARCHAR(45) + """ + ) + conn.commit() + except Exception: + app.logger.exception( + "Schema compatibility patch failed for audit_logs.ip_address" + ) + conn.rollback() + finally: + conn.close() diff --git a/packages/backend/app/db/schema.sql b/packages/backend/app/db/schema.sql index 410189def..7c221b5ce 100644 --- a/packages/backend/app/db/schema.sql +++ b/packages/backend/app/db/schema.sql @@ -121,5 +121,6 @@ CREATE TABLE IF NOT EXISTS audit_logs ( id SERIAL PRIMARY KEY, user_id INT REFERENCES users(id) ON DELETE SET NULL, action VARCHAR(100) NOT NULL, + ip_address VARCHAR(45), created_at TIMESTAMP NOT NULL DEFAULT NOW() ); diff --git a/packages/backend/app/models.py b/packages/backend/app/models.py index 64d448104..3a64e0f76 100644 --- a/packages/backend/app/models.py +++ b/packages/backend/app/models.py @@ -132,4 +132,5 @@ class AuditLog(db.Model): id = db.Column(db.Integer, primary_key=True) user_id = db.Column(db.Integer, db.ForeignKey("users.id"), nullable=True) action = db.Column(db.String(100), nullable=False) + ip_address = db.Column(db.String(45), nullable=True) created_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=False) diff --git a/packages/backend/app/routes/__init__.py b/packages/backend/app/routes/__init__.py index f13b0f897..288b98166 100644 --- a/packages/backend/app/routes/__init__.py +++ b/packages/backend/app/routes/__init__.py @@ -7,6 +7,7 @@ from .categories import bp as categories_bp from .docs import bp as docs_bp from .dashboard import bp as dashboard_bp +from .gdpr import bp as gdpr_bp def register_routes(app: Flask): @@ -18,3 +19,4 @@ def register_routes(app: Flask): app.register_blueprint(categories_bp, url_prefix="/categories") app.register_blueprint(docs_bp, url_prefix="/docs") app.register_blueprint(dashboard_bp, url_prefix="/dashboard") + app.register_blueprint(gdpr_bp, url_prefix="/gdpr") diff --git a/packages/backend/app/routes/gdpr.py b/packages/backend/app/routes/gdpr.py new file mode 100644 index 000000000..501030713 --- /dev/null +++ b/packages/backend/app/routes/gdpr.py @@ -0,0 +1,82 @@ +"""GDPR-compliant PII export and deletion routes. + +Endpoints: + GET /gdpr/users//export – download all personal data as JSON + DELETE /gdpr/users/ – irreversibly delete user and all data +""" +from flask import Blueprint, jsonify, request +from flask_jwt_extended import jwt_required, get_jwt_identity +from ..extensions import db +from ..models import User, AuditLog +from ..services.gdpr import collect_user_data, permanently_delete_user, log_audit_action + +bp = Blueprint("gdpr", __name__) + + +@bp.get("/users//export") +@jwt_required() +def export_user_data(user_id: int): + """Export all PII for the authenticated user as a JSON package.""" + current_uid = int(get_jwt_identity()) + if current_uid != user_id: + return jsonify(error="forbidden – can only export your own data"), 403 + + user = db.session.get(User, user_id) + if not user: + return jsonify(error="user not found"), 404 + + ip_address = request.remote_addr + data = collect_user_data(user_id) + + # Audit trail + log_audit_action( + user_id=user_id, + action="GDPR_DATA_EXPORT", + ip_address=ip_address, + ) + + return jsonify(data), 200 + + +@bp.delete("/users/") +@jwt_required() +def delete_user(user_id: int): + """Irreversibly delete the authenticated user and all associated data. + + Requires a JSON body with ``{"confirm": true}`` to prevent accidental + deletion. + """ + current_uid = int(get_jwt_identity()) + if current_uid != user_id: + return jsonify(error="forbidden – can only delete your own data"), 403 + + user = db.session.get(User, user_id) + if not user: + return jsonify(error="user not found"), 404 + + # Require explicit confirmation + data = request.get_json(silent=True) or {} + if not data.get("confirm"): + return jsonify(error="confirmation required – send {\"confirm\": true}"), 400 + + ip_address = request.remote_addr + + # Audit trail – log *before* deletion so user_id FK is still valid + log_audit_action( + user_id=user_id, + action="GDPR_DATA_DELETE", + ip_address=ip_address, + ) + + # Set user_id on audit logs to NULL before deleting the user, + # so the audit record survives (GDPR requires we keep evidence of deletion). + db.session.query(AuditLog).filter_by(user_id=user_id).update( + {"user_id": None}, synchronize_session="fetch" + ) + db.session.commit() + + success = permanently_delete_user(user_id) + if not success: + return jsonify(error="deletion failed"), 500 + + return jsonify(message="user and all associated data permanently deleted"), 200 diff --git a/packages/backend/app/services/gdpr.py b/packages/backend/app/services/gdpr.py new file mode 100644 index 000000000..1700cd7e3 --- /dev/null +++ b/packages/backend/app/services/gdpr.py @@ -0,0 +1,119 @@ +"""GDPR-compliant PII export and deletion service.""" +import logging +from datetime import datetime, timezone +from ..extensions import db +from ..models import ( + User, + Category, + Expense, + RecurringExpense, + Bill, + Reminder, + AdImpression, + UserSubscription, + AuditLog, +) + +logger = logging.getLogger("finmind.gdpr") + + +def collect_user_data(user_id: int) -> dict: + """Collect all PII associated with a user for export. + + Returns a serialisable dict containing every piece of personal data + stored for the given user. + """ + user = db.session.get(User, user_id) + if not user: + return {} + + def _model_to_dict(instance): + """Convert a SQLAlchemy model instance to a plain dict.""" + return { + c.key: getattr(instance, c.key) + for c in db.inspect(instance).mapper.column_attrs + } + + def _serialize_values(d): + """Ensure all values are JSON-serialisable.""" + out = {} + for k, v in d.items(): + if isinstance(v, datetime): + out[k] = v.isoformat() + elif hasattr(v, "isoformat"): # date objects + out[k] = v.isoformat() + elif isinstance(v, (int, float, str, bool)) or v is None: + out[k] = v + else: + out[k] = str(v) + return out + + categories = db.session.query(Category).filter_by(user_id=user_id).all() + expenses = db.session.query(Expense).filter_by(user_id=user_id).all() + recurring = db.session.query(RecurringExpense).filter_by(user_id=user_id).all() + bills = db.session.query(Bill).filter_by(user_id=user_id).all() + reminders = db.session.query(Reminder).filter_by(user_id=user_id).all() + ad_impressions = db.session.query(AdImpression).filter_by(user_id=user_id).all() + subscriptions = db.session.query(UserSubscription).filter_by(user_id=user_id).all() + + return { + "export_timestamp": datetime.now(timezone.utc).isoformat(), + "user": _serialize_values(_model_to_dict(user)), + "categories": [_serialize_values(_model_to_dict(c)) for c in categories], + "expenses": [_serialize_values(_model_to_dict(e)) for e in expenses], + "recurring_expenses": [_serialize_values(_model_to_dict(r)) for r in recurring], + "bills": [_serialize_values(_model_to_dict(b)) for b in bills], + "reminders": [_serialize_values(_model_to_dict(r)) for r in reminders], + "ad_impressions": [_serialize_values(_model_to_dict(a)) for a in ad_impressions], + "subscriptions": [_serialize_values(_model_to_dict(s)) for s in subscriptions], + } + + +def permanently_delete_user(user_id: int) -> bool: + """Irreversibly delete all data associated with the given user. + + This operation cannot be undone. All related records across every table + are deleted, and finally the user row itself is removed. + + Returns True if the user existed and was deleted, False otherwise. + """ + user = db.session.get(User, user_id) + if not user: + return False + + # Delete related records in dependency order + # (Some tables have FK ON DELETE CASCADE, but we are explicit for safety, + # especially since SQLite tests may not enforce FK constraints.) + db.session.query(Reminder).filter_by(user_id=user_id).delete() + db.session.query(UserSubscription).filter_by(user_id=user_id).delete() + db.session.query(Bill).filter_by(user_id=user_id).delete() + db.session.query(Expense).filter_by(user_id=user_id).delete() + db.session.query(RecurringExpense).filter_by(user_id=user_id).delete() + db.session.query(Category).filter_by(user_id=user_id).delete() + # Ad impressions: user_id is nullable, set to NULL rather than delete + db.session.query(AdImpression).filter_by(user_id=user_id).update( + {"user_id": None}, synchronize_session="fetch" + ) + + # Finally delete the user row + db.session.delete(user) + db.session.commit() + logger.info("Permanently deleted user_id=%s and all associated data", user_id) + return True + + +def log_audit_action(user_id: int | None, action: str, ip_address: str | None = None) -> None: + """Record a GDPR audit log entry for export/delete operations.""" + entry = AuditLog( + user_id=user_id, + action=action, + ip_address=ip_address, + ) + db.session.add(entry) + db.session.commit() + logger.info( + "GDPR audit log: user_id=%s action=%s ip=%s", + user_id, + action, + ip_address, + ) diff --git a/packages/backend/tests/conftest.py b/packages/backend/tests/conftest.py index a7315b8c9..c208250e8 100644 --- a/packages/backend/tests/conftest.py +++ b/packages/backend/tests/conftest.py @@ -1,5 +1,6 @@ import os import pytest +from unittest.mock import MagicMock from app import create_app from app.config import Settings from app.extensions import db @@ -19,6 +20,42 @@ def _setup_db(app): db.create_all() +@pytest.fixture(autouse=True) +def _mock_redis(monkeypatch): + """Mock the global redis_client so tests don't need a live Redis server. + + Patch at every import site because modules capture the reference at + import time (``from ..extensions import redis_client``). + """ + mock = MagicMock() + mock.setex.return_value = True + mock.get.return_value = None + mock.delete.return_value = True + mock.exists.return_value = False + mock.keys.return_value = [] + mock.set.return_value = True + mock.expire.return_value = True + mock.ttl.return_value = -1 + mock.ping.return_value = True + + # Patch the canonical location + monkeypatch.setattr("app.extensions.redis_client", mock) + # Patch every module that imported the reference + try: + monkeypatch.setattr("app.routes.auth.redis_client", mock) + except AttributeError: + pass + try: + monkeypatch.setattr("app.routes.bills.redis_client", mock) + except AttributeError: + pass + try: + monkeypatch.setattr("app.routes.reminders.redis_client", mock) + except AttributeError: + pass + yield mock + + @pytest.fixture() def app_fixture(): # Ensure a clean env for tests diff --git a/packages/backend/tests/test_gdpr.py b/packages/backend/tests/test_gdpr.py new file mode 100644 index 000000000..39631ed17 --- /dev/null +++ b/packages/backend/tests/test_gdpr.py @@ -0,0 +1,313 @@ +"""Tests for GDPR-compliant PII export and deletion endpoints.""" + +from datetime import date, datetime + +from app.extensions import db +from app.models import ( + User, + Category, + Expense, + RecurringExpense, + Bill, + Reminder, + AdImpression, + UserSubscription, + AuditLog, + SubscriptionPlan, +) + + +def _register_and_login(client, app_fixture, email="gdpr@test.com", password="secret123"): + """Register a user and return (user_id, auth_headers).""" + r = client.post("/auth/register", json={"email": email, "password": password}) + assert r.status_code in (200, 201, 409) + r = client.post("/auth/login", json={"email": email, "password": password}) + assert r.status_code == 200 + data = r.get_json() + token = data["access_token"] + # Get user_id from DB (avoids needing app context for JWT decode) + with app_fixture.app_context(): + user = db.session.query(User).filter_by(email=email).first() + user_id = user.id + return user_id, {"Authorization": f"Bearer {token}"} + + +def _seed_user_data(app_fixture, user_id): + """Seed various PII data for a user so export/delete has something to work with.""" + with app_fixture.app_context(): + cat = Category(user_id=user_id, name="Food") + db.session.add(cat) + db.session.flush() + + expense = Expense( + user_id=user_id, + category_id=cat.id, + amount=42.50, + currency="INR", + notes="Lunch", + ) + db.session.add(expense) + + recurring = RecurringExpense( + user_id=user_id, + category_id=cat.id, + amount=100.00, + currency="INR", + notes="Netflix", + cadence="MONTHLY", + start_date=date(2026, 1, 1), + active=True, + ) + db.session.add(recurring) + + bill = Bill( + user_id=user_id, + name="Electricity", + amount=75.00, + currency="INR", + next_due_date=date(2026, 6, 1), + cadence="MONTHLY", + ) + db.session.add(bill) + db.session.flush() + + reminder = Reminder( + user_id=user_id, + bill_id=bill.id, + message="Pay electricity bill", + send_at=datetime(2026, 5, 30, 10, 0, 0), + ) + db.session.add(reminder) + + ad = AdImpression(user_id=user_id, placement="sidebar") + db.session.add(ad) + + # Create a subscription plan and user subscription + plan = SubscriptionPlan(name="Pro", price_cents=999, interval="monthly") + db.session.add(plan) + db.session.flush() + + sub = UserSubscription(user_id=user_id, plan_id=plan.id, active=True) + db.session.add(sub) + + db.session.commit() + + +# ─── Export tests ──────────────────────────────────────────────────────────── + + +def test_export_returns_all_user_pii(client, auth_header, app_fixture): + """GET /gdpr/users//export should return all PII as JSON.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + user_id = user.id + + _seed_user_data(app_fixture, user_id) + + r = client.get(f"/gdpr/users/{user_id}/export", headers=auth_header) + assert r.status_code == 200 + + data = r.get_json() + assert "export_timestamp" in data + assert "user" in data + assert data["user"]["id"] == user_id + assert data["user"]["email"] == "test@example.com" + + # All data categories present + assert isinstance(data["categories"], list) + assert len(data["categories"]) >= 1 + assert isinstance(data["expenses"], list) + assert len(data["expenses"]) >= 1 + assert isinstance(data["recurring_expenses"], list) + assert len(data["recurring_expenses"]) >= 1 + assert isinstance(data["bills"], list) + assert len(data["bills"]) >= 1 + assert isinstance(data["reminders"], list) + assert len(data["reminders"]) >= 1 + assert isinstance(data["ad_impressions"], list) + assert len(data["ad_impressions"]) >= 1 + assert isinstance(data["subscriptions"], list) + assert len(data["subscriptions"]) >= 1 + + +def test_export_creates_audit_log(client, auth_header, app_fixture): + """Exporting data should create a GDPR_DATA_EXPORT audit log entry.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + user_id = user.id + + r = client.get(f"/gdpr/users/{user_id}/export", headers=auth_header) + assert r.status_code == 200 + + with app_fixture.app_context(): + logs = ( + db.session.query(AuditLog) + .filter_by(user_id=user_id, action="GDPR_DATA_EXPORT") + .all() + ) + assert len(logs) >= 1 + + +def test_export_forbidden_for_other_user(client, app_fixture): + """A user cannot export another user's data.""" + _, auth1 = _register_and_login(client, app_fixture, "user1@gdpr.test", "pass1") + uid2, _ = _register_and_login(client, app_fixture, "user2@gdpr.test", "pass2") + + r = client.get(f"/gdpr/users/{uid2}/export", headers=auth1) + assert r.status_code == 403 + + +def test_export_requires_auth(client, app_fixture): + """Unauthenticated requests should be rejected.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + if user: + user_id = user.id + else: + user_id = 1 + + r = client.get(f"/gdpr/users/{user_id}/export") + assert r.status_code in (401, 422) + + +def test_export_user_not_found(client, auth_header, app_fixture): + """Export for a non-existent user_id returns 403 (ownership check runs first).""" + # The route checks current_uid != user_id before user existence; + # for user_id=99999 the auth user's id won't match, so 403 is returned. + r = client.get("/gdpr/users/99999/export", headers=auth_header) + assert r.status_code == 403 + + +# ─── Delete tests ──────────────────────────────────────────────────────────── + + +def test_delete_removes_user_and_all_data(client, auth_header, app_fixture): + """DELETE /gdpr/users/ should permanently remove the user and all PII.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + user_id = user.id + + _seed_user_data(app_fixture, user_id) + + r = client.delete( + f"/gdpr/users/{user_id}", + json={"confirm": True}, + headers=auth_header, + ) + assert r.status_code == 200 + assert "permanently deleted" in r.get_json()["message"].lower() + + # Verify all data is gone + with app_fixture.app_context(): + assert db.session.get(User, user_id) is None + assert db.session.query(Category).filter_by(user_id=user_id).count() == 0 + assert db.session.query(Expense).filter_by(user_id=user_id).count() == 0 + assert ( + db.session.query(RecurringExpense).filter_by(user_id=user_id).count() == 0 + ) + assert db.session.query(Bill).filter_by(user_id=user_id).count() == 0 + assert db.session.query(Reminder).filter_by(user_id=user_id).count() == 0 + assert db.session.query(UserSubscription).filter_by(user_id=user_id).count() == 0 + # Ad impressions: user_id set to NULL, not deleted + assert ( + db.session.query(AdImpression).filter_by(user_id=user_id).count() == 0 + ) + + +def test_delete_creates_audit_log_that_survives(client, auth_header, app_fixture): + """Deletion should log a GDPR_DATA_DELETE audit entry that persists after user removal.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + user_id = user.id + + r = client.delete( + f"/gdpr/users/{user_id}", + json={"confirm": True}, + headers=auth_header, + ) + assert r.status_code == 200 + + with app_fixture.app_context(): + logs = db.session.query(AuditLog).filter_by(action="GDPR_DATA_DELETE").all() + assert len(logs) >= 1 + # user_id should be NULL (anonymised) after user deletion + for log in logs: + assert log.user_id is None + + +def test_delete_requires_confirmation(client, auth_header, app_fixture): + """Deletion without confirm=true should return 400.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + user_id = user.id + + # No body at all + r = client.delete(f"/gdpr/users/{user_id}", headers=auth_header) + assert r.status_code == 400 + + # Body with confirm=false + r = client.delete( + f"/gdpr/users/{user_id}", + json={"confirm": False}, + headers=auth_header, + ) + assert r.status_code == 400 + + +def test_delete_forbidden_for_other_user(client, app_fixture): + """A user cannot delete another user.""" + _, auth1 = _register_and_login(client, app_fixture, "deleter1@gdpr.test", "pass1") + uid2, _ = _register_and_login(client, app_fixture, "deleter2@gdpr.test", "pass2") + + r = client.delete( + f"/gdpr/users/{uid2}", + json={"confirm": True}, + headers=auth1, + ) + assert r.status_code == 403 + + +def test_delete_requires_auth(client, app_fixture): + """Unauthenticated deletion should be rejected.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + if user: + user_id = user.id + else: + user_id = 1 + + r = client.delete(f"/gdpr/users/{user_id}", json={"confirm": True}) + assert r.status_code in (401, 422) + + +def test_delete_user_not_found(client, auth_header, app_fixture): + """Deleting a non-existent user_id returns 403 (ownership check runs first).""" + r = client.delete( + "/gdpr/users/99999", + json={"confirm": True}, + headers=auth_header, + ) + assert r.status_code == 403 + + +def test_ad_impressions_anonymised_on_delete(client, auth_header, app_fixture): + """Ad impressions should have user_id set to NULL (not deleted) on user deletion.""" + with app_fixture.app_context(): + user = db.session.query(User).first() + user_id = user.id + ad = AdImpression(user_id=user_id, placement="banner") + db.session.add(ad) + db.session.commit() + ad_id = ad.id + + r = client.delete( + f"/gdpr/users/{user_id}", + json={"confirm": True}, + headers=auth_header, + ) + assert r.status_code == 200 + + with app_fixture.app_context(): + ad_check = db.session.get(AdImpression, ad_id) + assert ad_check is not None + assert ad_check.user_id is None