From 072d202415730ec9957d5202a9b503af54285f59 Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:19:54 +0100 Subject: [PATCH 1/5] =?UTF-8?q?feat:=20v7.3.0=20=E2=80=94=20gap-fill=20rec?= =?UTF-8?q?overy,=20token=20URL=20auto-login,=20UX=20improvements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gap-fill: detect and recover skipped messages from interrupted backups - SQL LAG() window function detects gaps in message ID sequences - iter_messages(min_id, max_id) recovers missing messages from Telegram - FILL_GAPS=true / GAP_THRESHOLD=50 env vars for automated post-backup runs - CLI subcommand: fill-gaps --chat-id --threshold - Scheduler integration: runs automatically after each backup when enabled - 31 functional tests (real SQLite + mocked Telegram client) Frontend UX improvements: - Token URL auto-login (?token=XXX) for shareable one-click access - Shareable link UI in admin panel (copy token or full URL) - @username display in chat list --- src/__init__.py | 2 +- src/__main__.py | 50 +++ src/config.py | 6 + src/db/adapter.py | 43 +++ src/scheduler.py | 20 ++ src/telegram_backup.py | 126 +++++++ src/web/templates/index.html | 46 ++- tests/test_gap_fill.py | 642 +++++++++++++++++++++++++++++++++++ 8 files changed, 931 insertions(+), 4 deletions(-) create mode 100644 tests/test_gap_fill.py diff --git a/src/__init__.py b/src/__init__.py index 5621f19b..4ec82691 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -2,4 +2,4 @@ Telegram Backup Automation - Main Package """ -__version__ = "7.2.1" +__version__ = "7.3.0" diff --git a/src/__main__.py b/src/__main__.py index 0daca9c1..6b84fbaf 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -144,6 +144,25 @@ def create_parser() -> argparse.ArgumentParser: "--merge", action="store_true", help="Allow importing into a chat that already has messages" ) + # Fill gaps command + fill_gaps_parser = subparsers.add_parser( + "fill-gaps", + help="Detect and fill message gaps from failed backups", + description=( + "Scans backed-up chats for gaps in message ID sequences " + "and recovers skipped messages from Telegram. " + "Gaps are caused by API errors, rate limits, or interruptions " + "during previous backup runs." + ), + ) + fill_gaps_parser.add_argument( + "-c", "--chat-id", type=int, help="Fill gaps only for this specific chat ID" + ) + fill_gaps_parser.add_argument( + "-t", "--threshold", type=int, default=None, + help="Minimum gap size to investigate (overrides GAP_THRESHOLD env var)", + ) + return parser @@ -207,6 +226,35 @@ async def run_list_chats(args) -> int: return 1 +async def run_fill_gaps_cmd(args) -> int: + """Run fill-gaps command.""" + from .config import Config, setup_logging + from .telegram_backup import run_fill_gaps + + try: + config = Config() + if args.threshold is not None: + config.gap_threshold = args.threshold + setup_logging(config) + + summary = await run_fill_gaps(config, chat_id=args.chat_id) + print("\nGap-fill complete:") + print(f" Chats scanned: {summary['chats_scanned']}") + print(f" Chats with gaps: {summary['chats_with_gaps']}") + print(f" Total gaps found: {summary['total_gaps']}") + print(f" Messages recovered: {summary['total_recovered']}") + if summary["details"]: + for detail in summary["details"]: + print( + f" - {detail['chat_name']} (ID {detail['chat_id']}): " + f"{detail['gaps']} gaps, {detail['recovered']} recovered" + ) + return 0 + except Exception as e: + print(f"Gap-fill failed: {e}", file=sys.stderr) + return 1 + + async def run_import(args) -> int: """Run import command.""" from .config import Config, setup_logging @@ -304,6 +352,8 @@ def main() -> int: return asyncio.run(run_list_chats(args)) elif args.command == "import": return asyncio.run(run_import(args)) + elif args.command == "fill-gaps": + return asyncio.run(run_fill_gaps_cmd(args)) else: parser.print_help() return 0 diff --git a/src/config.py b/src/config.py index 28ffab7f..de0bbb22 100644 --- a/src/config.py +++ b/src/config.py @@ -147,6 +147,12 @@ def __init__(self): # Useful for recovering from interrupted backups or deleted media files self.verify_media = os.getenv("VERIFY_MEDIA", "false").lower() == "true" + # Gap-fill mode: detect and recover skipped messages + # When enabled, runs after each scheduled backup to find and fill gaps + # in message ID sequences caused by API errors or interruptions + self.fill_gaps = os.getenv("FILL_GAPS", "false").lower() == "true" + self.gap_threshold = int(os.getenv("GAP_THRESHOLD", "50")) + # Real-time listener mode # When enabled, runs a background listener that catches message edits and deletions # in real-time instead of batch-checking on each backup run diff --git a/src/db/adapter.py b/src/db/adapter.py index 0d5308b5..c27f2631 100644 --- a/src/db/adapter.py +++ b/src/db/adapter.py @@ -890,6 +890,49 @@ async def update_sync_status(self, chat_id: int, last_message_id: int, message_c await session.execute(stmt) await session.commit() + # ========== Gap Detection ========== + + async def detect_message_gaps(self, chat_id: int, threshold: int = 50) -> list[tuple[int, int, int]]: + """Detect gaps in message ID sequences for a chat. + + Uses a SQL LAG() window function to find gaps larger than threshold. + + Returns: + List of (gap_start_id, gap_end_id, gap_size) tuples where + gap_start is the last message ID before the gap and + gap_end is the first message ID after the gap. + """ + async with self.db_manager.async_session_factory() as session: + result = await session.execute( + text( + """ + SELECT gap_start, gap_end, gap_size FROM ( + SELECT + LAG(id) OVER (ORDER BY id) AS gap_start, + id AS gap_end, + id - LAG(id) OVER (ORDER BY id) AS gap_size + FROM messages + WHERE chat_id = :chat_id + ) gaps + WHERE gap_size > :threshold + ORDER BY gap_start + """ + ), + {"chat_id": chat_id, "threshold": threshold}, + ) + return [(row[0], row[1], row[2]) for row in result.fetchall()] + + async def get_chats_with_messages(self) -> list[int]: + """Get all chat IDs that exist in the chats table. + + Queries the chats table directly instead of scanning the messages table, + which would be extremely slow on large databases. + """ + async with self.db_manager.async_session_factory() as session: + stmt = select(Chat.id) + result = await session.execute(stmt) + return [row[0] for row in result.fetchall()] + # ========== Statistics ========== async def get_statistics(self) -> dict[str, Any]: diff --git a/src/scheduler.py b/src/scheduler.py index 6fc469bc..8dba6623 100644 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -76,6 +76,16 @@ async def _run_backup_job(self): # Run backup using shared client await run_backup(self.config, client=client) + # Run gap-fill if enabled + if self.config.fill_gaps: + try: + from .telegram_backup import run_fill_gaps + + logger.info("Running post-backup gap-fill...") + await run_fill_gaps(self.config, client=client) + except Exception as e: + logger.error(f"Gap-fill failed: {e}", exc_info=True) + # Reload tracked chats in listener after backup # (new chats may have been added) if self._listener: @@ -214,6 +224,16 @@ async def run_forever(self): await run_backup(self.config, client=self._connection.client) logger.info("Initial backup completed") + # Run gap-fill if enabled + if self.config.fill_gaps: + try: + from .telegram_backup import run_fill_gaps + + logger.info("Running post-backup gap-fill...") + await run_fill_gaps(self.config, client=self._connection.client) + except Exception as e: + logger.error(f"Gap-fill failed: {e}", exc_info=True) + # Reload tracked chats in listener after initial backup if self._listener: await self._listener._load_tracked_chats() diff --git a/src/telegram_backup.py b/src/telegram_backup.py index 4df6c578..aa3cb8e2 100644 --- a/src/telegram_backup.py +++ b/src/telegram_backup.py @@ -692,6 +692,109 @@ async def _commit_batch(self, batch_data: list[dict], chat_id: int) -> None: if reactions_list: await self.db.insert_reactions(msg["id"], chat_id, reactions_list) + async def _fill_gap_range(self, entity, chat_id: int, gap_start: int, gap_end: int) -> int: + """ + Fetch and store messages for a single gap range. + + Args: + entity: Telegram entity for the chat + chat_id: Chat identifier + gap_start: Last message ID before the gap + gap_end: First message ID after the gap + + Returns: + Number of recovered messages + """ + batch_data: list[dict] = [] + batch_size = self.config.batch_size + recovered = 0 + + async for message in self.client.iter_messages(entity, min_id=gap_start, max_id=gap_end, reverse=True): + msg_data = await self._process_message(message, chat_id) + batch_data.append(msg_data) + + if len(batch_data) >= batch_size: + await self._commit_batch(batch_data, chat_id) + recovered += len(batch_data) + batch_data = [] + + # Flush remaining messages + if batch_data: + await self._commit_batch(batch_data, chat_id) + recovered += len(batch_data) + + return recovered + + async def _fill_gaps(self, chat_id: int | None = None) -> dict: + """ + Detect and fill gaps in message ID sequences. + + Scans chats for missing message ID ranges and fetches them from Telegram. + + Args: + chat_id: If provided, scan only this chat. Otherwise scan all chats. + + Returns: + Summary dict with gap-fill statistics. + """ + threshold = self.config.gap_threshold + summary = { + "chats_scanned": 0, + "chats_with_gaps": 0, + "total_gaps": 0, + "total_recovered": 0, + "details": [], + } + + if chat_id is not None: + chat_ids = [chat_id] + else: + chat_ids = await self.db.get_chats_with_messages() + + logger.info(f"Gap-fill: scanning {len(chat_ids)} chat(s) with threshold={threshold}") + + for cid in chat_ids: + summary["chats_scanned"] += 1 + + try: + entity = await self.client.get_entity(cid) + except (ChannelPrivateError, ChatForbiddenError, UserBannedInChannelError) as e: + logger.warning(f"Gap-fill: skipping chat {cid} (no access): {e.__class__.__name__}") + continue + + chat_name = self._get_chat_name(entity) + gaps = await self.db.detect_message_gaps(cid, threshold) + + if not gaps: + continue + + summary["chats_with_gaps"] += 1 + chat_recovered = 0 + + logger.info(f"Gap-fill: {chat_name} (ID: {cid}) has {len(gaps)} gap(s)") + + for gap_start, gap_end, gap_size in gaps: + logger.info(f" → Filling gap: {gap_start}..{gap_end} (size {gap_size})") + recovered = await self._fill_gap_range(entity, cid, gap_start, gap_end) + chat_recovered += recovered + logger.info(f" Recovered {recovered} messages") + + summary["total_gaps"] += len(gaps) + summary["total_recovered"] += chat_recovered + summary["details"].append({ + "chat_id": cid, + "chat_name": chat_name, + "gaps": len(gaps), + "recovered": chat_recovered, + }) + + logger.info( + f"Gap-fill complete: {summary['chats_scanned']} chats scanned, " + f"{summary['total_gaps']} gaps found, {summary['total_recovered']} messages recovered" + ) + + return summary + async def _sync_deletions_and_edits(self, chat_id: int, entity): """ Sync deletions and edits for existing messages in the database. @@ -1640,6 +1743,29 @@ async def run_backup(config: Config, client: TelegramClient | None = None): await backup.db.close() +async def run_fill_gaps(config: Config, client: TelegramClient | None = None, chat_id: int | None = None) -> dict: + """ + Run gap-fill to recover missing messages in backed-up chats. + + Args: + config: Configuration object + client: Optional existing TelegramClient to use (for shared connection). + If provided, the operation will use this client instead of creating + its own, avoiding session file lock conflicts. + chat_id: If provided, scan only this chat. Otherwise scan all chats. + + Returns: + Summary dict with gap-fill statistics. + """ + backup = await TelegramBackup.create(config, client=client) + try: + await backup.connect() + return await backup._fill_gaps(chat_id=chat_id) + finally: + await backup.disconnect() + await backup.db.close() + + def main(): """Main entry point for CLI.""" import asyncio diff --git a/src/web/templates/index.html b/src/web/templates/index.html index 1725d0fd..b9189fa4 100644 --- a/src/web/templates/index.html +++ b/src/web/templates/index.html @@ -639,7 +639,8 @@

{{ formatDate(chat.last_message_date) }}

- {{ chat.type }} + @{{ chat.username }} + {{ chat.type }} • Forum • {{ chat.participants_count }} participants @@ -1311,9 +1312,15 @@

Create Share Token

Token created! Copy it now — it won't be shown again.
-
+
+ Token {{ adminNewToken }} - + +
+
+ Link + {{ adminNewTokenUrl }} +
@@ -1931,6 +1938,34 @@

Create Share Token

noDownload.value = !!data.no_download console.log('[DEBUG] authRequired:', authRequired.value, 'isAuthenticated:', isAuthenticated.value) + // Auto-login via ?token= URL parameter + if (!isAuthenticated.value && authRequired.value) { + const urlParams = new URLSearchParams(window.location.search) + const urlToken = urlParams.get('token') + if (urlToken) { + try { + const tokenRes = await fetch('/auth/token', { + method: 'POST', credentials: 'include', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ token: urlToken }), + }) + if (tokenRes.ok) { + const tokenData = await tokenRes.json() + if (tokenData.success) { + isAuthenticated.value = true + userRole.value = tokenData.role || 'token' + currentUsername.value = tokenData.username || '' + noDownload.value = !!tokenData.no_download + } + } + } catch (e) { console.error('Token auto-login failed:', e) } + // Remove token from URL to prevent leaking in browser history + const cleanUrl = new URL(window.location) + cleanUrl.searchParams.delete('token') + window.history.replaceState({}, '', cleanUrl) + } + } + if (isAuthenticated.value) { await loadChats() await loadStats() @@ -3419,6 +3454,10 @@

Create Share Token

const adminTokenForm = ref({ label: '', allowed_chat_ids: [], no_download: false, expires_at: '' }) const adminTokenError = ref('') const adminNewToken = ref('') + const adminNewTokenUrl = computed(() => { + if (!adminNewToken.value) return '' + return `${window.location.origin}/?token=${adminNewToken.value}` + }) const loadAdminViewers = async () => { try { @@ -3671,6 +3710,7 @@

Create Share Token

adminTokenForm, adminTokenError, adminNewToken, + adminNewTokenUrl, loadAdminTokens, createToken, revokeToken, diff --git a/tests/test_gap_fill.py b/tests/test_gap_fill.py new file mode 100644 index 00000000..69f74313 --- /dev/null +++ b/tests/test_gap_fill.py @@ -0,0 +1,642 @@ +"""Functional tests for the gap-fill feature (v7.3.0). + +Tests cover: +- detect_message_gaps: real SQL queries against an in-memory SQLite database +- _fill_gaps / _fill_gap_range: Telegram client mocks exercising actual control flow +- Config: env-var parsing for FILL_GAPS and GAP_THRESHOLD +""" + +import os +import shutil +import tempfile +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.pool import StaticPool + +from src.config import Config +from src.db.adapter import DatabaseAdapter +from src.db.base import DatabaseManager +from src.telegram_backup import TelegramBackup + + +# --------------------------------------------------------------------------- +# Helpers — lightweight in-memory async SQLite setup +# --------------------------------------------------------------------------- + +async def _create_in_memory_adapter(): + """Create a DatabaseAdapter backed by an in-memory SQLite database. + + Returns (adapter, engine) so the caller can dispose the engine after use. + """ + # StaticPool + check_same_thread=False keeps a single shared in-memory DB + # across all connections, which is required for aiosqlite in-memory testing. + engine = create_async_engine( + "sqlite+aiosqlite://", + poolclass=StaticPool, + connect_args={"check_same_thread": False}, + ) + + # Create the minimal schema needed for gap detection + async with engine.begin() as conn: + await conn.execute(text( + "CREATE TABLE IF NOT EXISTS chats (" + " id INTEGER PRIMARY KEY," + " type TEXT NOT NULL DEFAULT 'channel'," + " title TEXT," + " username TEXT," + " first_name TEXT," + " last_name TEXT," + " phone TEXT," + " description TEXT," + " participants_count INTEGER," + " is_forum INTEGER DEFAULT 0," + " is_archived INTEGER DEFAULT 0," + " last_synced_message_id INTEGER DEFAULT 0," + " created_at TEXT DEFAULT CURRENT_TIMESTAMP," + " updated_at TEXT DEFAULT CURRENT_TIMESTAMP" + ")" + )) + await conn.execute(text( + "CREATE TABLE IF NOT EXISTS messages (" + " id INTEGER NOT NULL," + " chat_id INTEGER NOT NULL," + " sender_id INTEGER," + " date TEXT NOT NULL DEFAULT '2025-01-01 00:00:00'," + " text TEXT," + " reply_to_msg_id INTEGER," + " reply_to_top_id INTEGER," + " reply_to_text TEXT," + " forward_from_id INTEGER," + " edit_date TEXT," + " raw_data TEXT," + " created_at TEXT DEFAULT CURRENT_TIMESTAMP," + " is_outgoing INTEGER DEFAULT 0," + " is_pinned INTEGER DEFAULT 0," + " PRIMARY KEY (id, chat_id)" + ")" + )) + + # Wire up a real DatabaseManager (skip its init() — we supply our own engine) + db_manager = DatabaseManager.__new__(DatabaseManager) + db_manager.engine = engine + db_manager.database_url = "sqlite+aiosqlite://" + db_manager._is_sqlite = True + db_manager.async_session_factory = async_sessionmaker( + engine, class_=AsyncSession, expire_on_commit=False, + ) + + adapter = DatabaseAdapter(db_manager) + return adapter, engine + + +async def _insert_messages(adapter: DatabaseAdapter, chat_id: int, msg_ids: list[int]): + """Insert message rows with the given IDs into the test database.""" + async with adapter.db_manager.async_session_factory() as session: + for mid in msg_ids: + await session.execute(text( + "INSERT INTO messages (id, chat_id, date) VALUES (:id, :cid, '2025-01-01 00:00:00')" + ), {"id": mid, "cid": chat_id}) + await session.commit() + + +async def _insert_chat(adapter: DatabaseAdapter, chat_id: int, title: str = "Test Chat"): + """Insert a chat row into the test database.""" + async with adapter.db_manager.async_session_factory() as session: + await session.execute(text( + "INSERT INTO chats (id, title, type) VALUES (:id, :title, 'channel')" + ), {"id": chat_id, "title": title}) + await session.commit() + + +# =========================================================================== +# 1. TestDetectMessageGaps — real SQL against in-memory SQLite +# =========================================================================== + +class TestDetectMessageGaps: + """Exercise detect_message_gaps with a real async SQLite database.""" + + async def test_no_gaps_consecutive_ids(self): + """Consecutive message IDs should produce zero gaps.""" + adapter, engine = await _create_in_memory_adapter() + try: + await _insert_messages(adapter, chat_id=100, msg_ids=list(range(1, 51))) + gaps = await adapter.detect_message_gaps(chat_id=100, threshold=50) + assert gaps == [] + finally: + await engine.dispose() + + async def test_single_large_gap(self): + """IDs 1-50 then 100-150 should return one gap (50, 100, 50).""" + adapter, engine = await _create_in_memory_adapter() + try: + ids = list(range(1, 51)) + list(range(100, 151)) + await _insert_messages(adapter, chat_id=200, msg_ids=ids) + gaps = await adapter.detect_message_gaps(chat_id=200, threshold=49) + + assert len(gaps) == 1 + gap_start, gap_end, gap_size = gaps[0] + assert gap_start == 50 + assert gap_end == 100 + assert gap_size == 50 + finally: + await engine.dispose() + + async def test_multiple_gaps_sorted(self): + """Multiple gaps should all be returned, sorted by gap_start.""" + adapter, engine = await _create_in_memory_adapter() + try: + # Gap 1: between 10 and 100 (size 90) + # Gap 2: between 110 and 300 (size 190) + ids = list(range(1, 11)) + list(range(100, 111)) + list(range(300, 311)) + await _insert_messages(adapter, chat_id=300, msg_ids=ids) + gaps = await adapter.detect_message_gaps(chat_id=300, threshold=50) + + assert len(gaps) == 2 + assert gaps[0] == (10, 100, 90) + assert gaps[1] == (110, 300, 190) + # Verify sorted by gap_start + assert gaps[0][0] < gaps[1][0] + finally: + await engine.dispose() + + async def test_gap_below_threshold_not_returned(self): + """A gap smaller than or equal to the threshold should not appear.""" + adapter, engine = await _create_in_memory_adapter() + try: + # IDs 1-10 then 60-70 → gap of 50 at threshold=50 means gap_size > threshold + # gap_size = 60 - 10 = 50, and the query uses > threshold, so 50 is NOT > 50 + ids = list(range(1, 11)) + list(range(60, 71)) + await _insert_messages(adapter, chat_id=400, msg_ids=ids) + gaps = await adapter.detect_message_gaps(chat_id=400, threshold=50) + + assert gaps == [], f"Gap of exactly threshold should not be returned, got {gaps}" + finally: + await engine.dispose() + + async def test_gap_just_above_threshold_returned(self): + """A gap of threshold+1 should be returned.""" + adapter, engine = await _create_in_memory_adapter() + try: + # IDs 1-10 then 62-70 → gap_size = 62 - 10 = 52 > 50 + ids = list(range(1, 11)) + list(range(62, 71)) + await _insert_messages(adapter, chat_id=401, msg_ids=ids) + gaps = await adapter.detect_message_gaps(chat_id=401, threshold=50) + + assert len(gaps) == 1 + assert gaps[0] == (10, 62, 52) + finally: + await engine.dispose() + + async def test_single_message_no_gaps(self): + """A single message in the chat should produce no gaps.""" + adapter, engine = await _create_in_memory_adapter() + try: + await _insert_messages(adapter, chat_id=500, msg_ids=[42]) + gaps = await adapter.detect_message_gaps(chat_id=500, threshold=50) + assert gaps == [] + finally: + await engine.dispose() + + async def test_empty_chat_no_gaps(self): + """A chat with zero messages should produce no gaps.""" + adapter, engine = await _create_in_memory_adapter() + try: + gaps = await adapter.detect_message_gaps(chat_id=999, threshold=50) + assert gaps == [] + finally: + await engine.dispose() + + async def test_different_chats_isolated(self): + """Gaps in one chat should not appear in another chat's results.""" + adapter, engine = await _create_in_memory_adapter() + try: + # Chat 1: has a gap + await _insert_messages(adapter, chat_id=10, msg_ids=[1, 2, 3, 100]) + # Chat 2: no gap + await _insert_messages(adapter, chat_id=20, msg_ids=[1, 2, 3, 4, 5]) + + gaps_chat1 = await adapter.detect_message_gaps(chat_id=10, threshold=50) + gaps_chat2 = await adapter.detect_message_gaps(chat_id=20, threshold=50) + + assert len(gaps_chat1) == 1 + assert gaps_chat1[0] == (3, 100, 97) + assert gaps_chat2 == [] + finally: + await engine.dispose() + + +# =========================================================================== +# 2. TestGetChatsWithMessages — real SQL +# =========================================================================== + +class TestGetChatsWithMessages: + """Exercise get_chats_with_messages with a real async SQLite database.""" + + async def test_returns_all_chat_ids(self): + """Should return all chat IDs from the chats table.""" + adapter, engine = await _create_in_memory_adapter() + try: + await _insert_chat(adapter, chat_id=-1001, title="Chat A") + await _insert_chat(adapter, chat_id=-1002, title="Chat B") + await _insert_chat(adapter, chat_id=-1003, title="Chat C") + + result = await adapter.get_chats_with_messages() + assert sorted(result) == [-1003, -1002, -1001] + finally: + await engine.dispose() + + async def test_returns_empty_when_no_chats(self): + """Should return empty list when no chats exist.""" + adapter, engine = await _create_in_memory_adapter() + try: + result = await adapter.get_chats_with_messages() + assert result == [] + finally: + await engine.dispose() + + +# =========================================================================== +# 3. TestFillGaps — mocked Telegram client, exercises _fill_gaps control flow +# =========================================================================== + +def _make_backup_instance(db_mock=None, client_mock=None, config_mock=None): + """Create a TelegramBackup instance with mocked dependencies.""" + backup = TelegramBackup.__new__(TelegramBackup) + backup.db = db_mock or AsyncMock() + backup.client = client_mock or AsyncMock() + backup.config = config_mock or MagicMock() + backup.config.gap_threshold = 50 + backup.config.batch_size = 100 + return backup + + +class TestFillGaps: + """Exercise _fill_gaps logic with mocked DB and Telegram client.""" + + async def test_fill_gaps_no_chat_id_scans_all_chats(self): + """When chat_id=None, _fill_gaps should query all chats from DB.""" + db = AsyncMock() + db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002]) + db.detect_message_gaps = AsyncMock(return_value=[]) + + client = AsyncMock() + entity = MagicMock() + entity.title = "Test Channel" + entity.id = 1001 + client.get_entity = AsyncMock(return_value=entity) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + result = await backup._fill_gaps(chat_id=None) + + db.get_chats_with_messages.assert_awaited_once() + assert result["chats_scanned"] == 2 + + async def test_fill_gaps_with_specific_chat_id(self): + """When chat_id is provided, only that chat should be scanned.""" + db = AsyncMock() + db.detect_message_gaps = AsyncMock(return_value=[]) + + client = AsyncMock() + entity = MagicMock() + entity.title = "Specific Chat" + entity.id = 5555 + client.get_entity = AsyncMock(return_value=entity) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + result = await backup._fill_gaps(chat_id=-1005555) + + # Should NOT have called get_chats_with_messages + db.get_chats_with_messages.assert_not_awaited() + assert result["chats_scanned"] == 1 + client.get_entity.assert_awaited_once_with(-1005555) + + async def test_fill_gaps_chat_id_zero_is_not_none(self): + """chat_id=0 is falsy but valid — must scan only chat 0, not all chats. + + This tests the critical `if chat_id is not None` fix (vs `if chat_id`). + """ + db = AsyncMock() + db.detect_message_gaps = AsyncMock(return_value=[]) + + client = AsyncMock() + entity = MagicMock() + entity.title = "Chat Zero" + entity.id = 0 + client.get_entity = AsyncMock(return_value=entity) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + result = await backup._fill_gaps(chat_id=0) + + # The key assertion: get_chats_with_messages must NOT be called + db.get_chats_with_messages.assert_not_awaited() + assert result["chats_scanned"] == 1 + client.get_entity.assert_awaited_once_with(0) + + async def test_fill_gaps_skips_inaccessible_chats(self): + """Chats raising ChannelPrivateError should be skipped, not crash.""" + from telethon.errors import ChannelPrivateError + + db = AsyncMock() + db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002, -1003]) + + accessible_entity = MagicMock() + accessible_entity.title = "Accessible" + accessible_entity.id = 1003 + + client = AsyncMock() + + async def fake_get_entity(cid): + if cid == -1001: + raise ChannelPrivateError(request=None) + if cid == -1002: + raise ChannelPrivateError(request=None) + return accessible_entity + + client.get_entity = AsyncMock(side_effect=fake_get_entity) + db.detect_message_gaps = AsyncMock(return_value=[]) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + result = await backup._fill_gaps(chat_id=None) + + # All 3 scanned, but only 1 was accessible + assert result["chats_scanned"] == 3 + # The 2 inaccessible chats had no gaps detected (skipped before gap query) + assert result["total_gaps"] == 0 + + async def test_fill_gaps_processes_detected_gaps(self): + """When gaps are found, _fill_gap_range should be called for each.""" + db = AsyncMock() + db.get_chats_with_messages = AsyncMock(return_value=[-1001]) + db.detect_message_gaps = AsyncMock(return_value=[ + (50, 100, 50), + (200, 300, 100), + ]) + + client = AsyncMock() + entity = MagicMock() + entity.title = "Gapped Chat" + entity.id = 1001 + client.get_entity = AsyncMock(return_value=entity) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + # Mock _fill_gap_range to return counts + backup._fill_gap_range = AsyncMock(side_effect=[10, 25]) + + result = await backup._fill_gaps(chat_id=None) + + assert result["chats_scanned"] == 1 + assert result["chats_with_gaps"] == 1 + assert result["total_gaps"] == 2 + assert result["total_recovered"] == 35 # 10 + 25 + assert len(result["details"]) == 1 + assert result["details"][0]["chat_id"] == -1001 + assert result["details"][0]["gaps"] == 2 + assert result["details"][0]["recovered"] == 35 + + async def test_fill_gaps_chat_without_gaps_not_in_details(self): + """Chats with no gaps should not appear in the details list.""" + db = AsyncMock() + db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002]) + db.detect_message_gaps = AsyncMock(side_effect=[ + [], # chat -1001: no gaps + [(10, 100, 90)], # chat -1002: one gap + ]) + + client = AsyncMock() + entity1 = MagicMock() + entity1.title = "No Gaps" + entity1.id = 1001 + entity2 = MagicMock() + entity2.title = "Has Gaps" + entity2.id = 1002 + + client.get_entity = AsyncMock(side_effect=[entity1, entity2]) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + backup._fill_gap_range = AsyncMock(return_value=15) + + result = await backup._fill_gaps(chat_id=None) + + assert result["chats_scanned"] == 2 + assert result["chats_with_gaps"] == 1 + assert len(result["details"]) == 1 + assert result["details"][0]["chat_id"] == -1002 + + async def test_fill_gaps_uses_config_threshold(self): + """The threshold passed to detect_message_gaps should come from config.""" + db = AsyncMock() + db.get_chats_with_messages = AsyncMock(return_value=[-1001]) + db.detect_message_gaps = AsyncMock(return_value=[]) + + client = AsyncMock() + entity = MagicMock() + entity.title = "Test" + entity.id = 1001 + client.get_entity = AsyncMock(return_value=entity) + + backup = _make_backup_instance(db_mock=db, client_mock=client) + backup.config.gap_threshold = 123 + + await backup._fill_gaps(chat_id=None) + + db.detect_message_gaps.assert_awaited_once_with(-1001, 123) + + +class TestFillGapRange: + """Exercise _fill_gap_range with a mocked Telegram client.""" + + async def test_fill_gap_range_returns_count(self): + """_fill_gap_range should return the total recovered message count.""" + db = AsyncMock() + client = AsyncMock() + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + # Simulate 5 messages returned from iter_messages + messages = [] + for i in range(51, 56): + msg = MagicMock() + msg.id = i + messages.append(msg) + + async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): + for m in messages: + yield m + + client.iter_messages = fake_iter_messages + backup._process_message = AsyncMock(side_effect=lambda m, c: {"id": m.id, "chat_id": c}) + backup._commit_batch = AsyncMock() + + entity = MagicMock() + count = await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100) + + assert count == 5 + backup._commit_batch.assert_awaited_once() + + async def test_fill_gap_range_batches_commits(self): + """Large gaps should be committed in batches according to config.batch_size.""" + db = AsyncMock() + client = AsyncMock() + + backup = _make_backup_instance(db_mock=db, client_mock=client) + backup.config.batch_size = 3 + + messages = [] + for i in range(51, 59): # 8 messages + msg = MagicMock() + msg.id = i + messages.append(msg) + + async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): + for m in messages: + yield m + + client.iter_messages = fake_iter_messages + backup._process_message = AsyncMock(side_effect=lambda m, c: {"id": m.id, "chat_id": c}) + backup._commit_batch = AsyncMock() + + entity = MagicMock() + count = await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100) + + assert count == 8 + # 8 messages / batch_size 3 = 2 full batches (3+3) + 1 flush (2) = 3 calls + assert backup._commit_batch.await_count == 3 + + async def test_fill_gap_range_empty_gap(self): + """When no messages exist in the gap range, should return 0.""" + db = AsyncMock() + client = AsyncMock() + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): + return + yield # noqa: unreachable - makes this an async generator + + client.iter_messages = fake_iter_messages + backup._process_message = AsyncMock() + backup._commit_batch = AsyncMock() + + entity = MagicMock() + count = await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100) + + assert count == 0 + backup._commit_batch.assert_not_awaited() + + async def test_fill_gap_range_passes_correct_ids_to_client(self): + """iter_messages should be called with min_id=gap_start, max_id=gap_end, reverse=True.""" + db = AsyncMock() + client = AsyncMock() + + backup = _make_backup_instance(db_mock=db, client_mock=client) + + call_kwargs = {} + + async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): + call_kwargs["min_id"] = min_id + call_kwargs["max_id"] = max_id + call_kwargs["reverse"] = reverse + return + yield # noqa: unreachable + + client.iter_messages = fake_iter_messages + backup._process_message = AsyncMock() + backup._commit_batch = AsyncMock() + + entity = MagicMock() + await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100) + + assert call_kwargs["min_id"] == 50 + assert call_kwargs["max_id"] == 100 + assert call_kwargs["reverse"] is True + + +# =========================================================================== +# 4. TestConfig — env-var parsing for gap-fill settings +# =========================================================================== + +class TestGapFillConfig: + """Test FILL_GAPS and GAP_THRESHOLD configuration.""" + + def setup_method(self): + self.temp_dir = tempfile.mkdtemp() + + def teardown_method(self): + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def _base_env(self, **extra): + env = { + "CHAT_TYPES": "private", + "BACKUP_PATH": self.temp_dir, + } + env.update(extra) + return env + + def test_fill_gaps_default_false(self): + """FILL_GAPS should default to False when not set.""" + with patch.dict(os.environ, self._base_env(), clear=True): + config = Config() + assert config.fill_gaps is False + + def test_fill_gaps_true(self): + """FILL_GAPS=true should set fill_gaps=True.""" + with patch.dict(os.environ, self._base_env(FILL_GAPS="true"), clear=True): + config = Config() + assert config.fill_gaps is True + + def test_fill_gaps_True_uppercase(self): + """FILL_GAPS=True (capitalized) should also work.""" + with patch.dict(os.environ, self._base_env(FILL_GAPS="True"), clear=True): + config = Config() + assert config.fill_gaps is True + + def test_fill_gaps_false_explicit(self): + """FILL_GAPS=false should set fill_gaps=False.""" + with patch.dict(os.environ, self._base_env(FILL_GAPS="false"), clear=True): + config = Config() + assert config.fill_gaps is False + + def test_fill_gaps_nonsense_is_false(self): + """FILL_GAPS=banana should evaluate to False (only 'true' is truthy).""" + with patch.dict(os.environ, self._base_env(FILL_GAPS="banana"), clear=True): + config = Config() + assert config.fill_gaps is False + + def test_gap_threshold_default(self): + """GAP_THRESHOLD should default to 50.""" + with patch.dict(os.environ, self._base_env(), clear=True): + config = Config() + assert config.gap_threshold == 50 + + def test_gap_threshold_custom(self): + """GAP_THRESHOLD=100 should set gap_threshold=100.""" + with patch.dict(os.environ, self._base_env(GAP_THRESHOLD="100"), clear=True): + config = Config() + assert config.gap_threshold == 100 + + def test_gap_threshold_small(self): + """GAP_THRESHOLD=1 should be accepted.""" + with patch.dict(os.environ, self._base_env(GAP_THRESHOLD="1"), clear=True): + config = Config() + assert config.gap_threshold == 1 + + def test_gap_threshold_large(self): + """GAP_THRESHOLD=10000 should be accepted.""" + with patch.dict(os.environ, self._base_env(GAP_THRESHOLD="10000"), clear=True): + config = Config() + assert config.gap_threshold == 10000 + + def test_both_settings_together(self): + """FILL_GAPS and GAP_THRESHOLD can be set simultaneously.""" + with patch.dict(os.environ, self._base_env(FILL_GAPS="true", GAP_THRESHOLD="200"), clear=True): + config = Config() + assert config.fill_gaps is True + assert config.gap_threshold == 200 From 8fe726eb101f1e3bb80d3d719294e6b13ba2b525 Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:23:38 +0100 Subject: [PATCH 2/5] fix: add pytest-asyncio to CI, fix lint issues in gap-fill tests --- .github/workflows/tests.yml | 2 +- tests/test_gap_fill.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0a2ae1f6..c94353a6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,7 +23,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install pytest pytest-cov beautifulsoup4 Pillow + pip install pytest pytest-cov pytest-asyncio beautifulsoup4 Pillow - name: Run tests run: | diff --git a/tests/test_gap_fill.py b/tests/test_gap_fill.py index 69f74313..679b9310 100644 --- a/tests/test_gap_fill.py +++ b/tests/test_gap_fill.py @@ -11,7 +11,6 @@ import tempfile from unittest.mock import AsyncMock, MagicMock, patch -import pytest from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine from sqlalchemy.pool import StaticPool @@ -21,7 +20,6 @@ from src.db.base import DatabaseManager from src.telegram_backup import TelegramBackup - # --------------------------------------------------------------------------- # Helpers — lightweight in-memory async SQLite setup # --------------------------------------------------------------------------- @@ -519,7 +517,7 @@ async def test_fill_gap_range_empty_gap(self): async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): return - yield # noqa: unreachable - makes this an async generator + yield # noqa: F811 - unreachable yield makes this an async generator client.iter_messages = fake_iter_messages backup._process_message = AsyncMock() @@ -545,7 +543,7 @@ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): call_kwargs["max_id"] = max_id call_kwargs["reverse"] = reverse return - yield # noqa: unreachable + yield # noqa: F811 - unreachable yield makes this an async generator client.iter_messages = fake_iter_messages backup._process_message = AsyncMock() From 3d26f4390da2a37d4992557df11c4918287667c0 Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:26:23 +0100 Subject: [PATCH 3/5] style: fix ruff formatting in gap-fill code --- src/__main__.py | 9 ++-- src/telegram_backup.py | 14 ++--- tests/test_gap_fill.py | 120 +++++++++++++++++++++++------------------ 3 files changed, 81 insertions(+), 62 deletions(-) diff --git a/src/__main__.py b/src/__main__.py index 6b84fbaf..af94f892 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -155,11 +155,12 @@ def create_parser() -> argparse.ArgumentParser: "during previous backup runs." ), ) + fill_gaps_parser.add_argument("-c", "--chat-id", type=int, help="Fill gaps only for this specific chat ID") fill_gaps_parser.add_argument( - "-c", "--chat-id", type=int, help="Fill gaps only for this specific chat ID" - ) - fill_gaps_parser.add_argument( - "-t", "--threshold", type=int, default=None, + "-t", + "--threshold", + type=int, + default=None, help="Minimum gap size to investigate (overrides GAP_THRESHOLD env var)", ) diff --git a/src/telegram_backup.py b/src/telegram_backup.py index aa3cb8e2..49edc9bc 100644 --- a/src/telegram_backup.py +++ b/src/telegram_backup.py @@ -781,12 +781,14 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict: summary["total_gaps"] += len(gaps) summary["total_recovered"] += chat_recovered - summary["details"].append({ - "chat_id": cid, - "chat_name": chat_name, - "gaps": len(gaps), - "recovered": chat_recovered, - }) + summary["details"].append( + { + "chat_id": cid, + "chat_name": chat_name, + "gaps": len(gaps), + "recovered": chat_recovered, + } + ) logger.info( f"Gap-fill complete: {summary['chats_scanned']} chats scanned, " diff --git a/tests/test_gap_fill.py b/tests/test_gap_fill.py index 679b9310..4ad4cb7e 100644 --- a/tests/test_gap_fill.py +++ b/tests/test_gap_fill.py @@ -24,6 +24,7 @@ # Helpers — lightweight in-memory async SQLite setup # --------------------------------------------------------------------------- + async def _create_in_memory_adapter(): """Create a DatabaseAdapter backed by an in-memory SQLite database. @@ -39,43 +40,47 @@ async def _create_in_memory_adapter(): # Create the minimal schema needed for gap detection async with engine.begin() as conn: - await conn.execute(text( - "CREATE TABLE IF NOT EXISTS chats (" - " id INTEGER PRIMARY KEY," - " type TEXT NOT NULL DEFAULT 'channel'," - " title TEXT," - " username TEXT," - " first_name TEXT," - " last_name TEXT," - " phone TEXT," - " description TEXT," - " participants_count INTEGER," - " is_forum INTEGER DEFAULT 0," - " is_archived INTEGER DEFAULT 0," - " last_synced_message_id INTEGER DEFAULT 0," - " created_at TEXT DEFAULT CURRENT_TIMESTAMP," - " updated_at TEXT DEFAULT CURRENT_TIMESTAMP" - ")" - )) - await conn.execute(text( - "CREATE TABLE IF NOT EXISTS messages (" - " id INTEGER NOT NULL," - " chat_id INTEGER NOT NULL," - " sender_id INTEGER," - " date TEXT NOT NULL DEFAULT '2025-01-01 00:00:00'," - " text TEXT," - " reply_to_msg_id INTEGER," - " reply_to_top_id INTEGER," - " reply_to_text TEXT," - " forward_from_id INTEGER," - " edit_date TEXT," - " raw_data TEXT," - " created_at TEXT DEFAULT CURRENT_TIMESTAMP," - " is_outgoing INTEGER DEFAULT 0," - " is_pinned INTEGER DEFAULT 0," - " PRIMARY KEY (id, chat_id)" - ")" - )) + await conn.execute( + text( + "CREATE TABLE IF NOT EXISTS chats (" + " id INTEGER PRIMARY KEY," + " type TEXT NOT NULL DEFAULT 'channel'," + " title TEXT," + " username TEXT," + " first_name TEXT," + " last_name TEXT," + " phone TEXT," + " description TEXT," + " participants_count INTEGER," + " is_forum INTEGER DEFAULT 0," + " is_archived INTEGER DEFAULT 0," + " last_synced_message_id INTEGER DEFAULT 0," + " created_at TEXT DEFAULT CURRENT_TIMESTAMP," + " updated_at TEXT DEFAULT CURRENT_TIMESTAMP" + ")" + ) + ) + await conn.execute( + text( + "CREATE TABLE IF NOT EXISTS messages (" + " id INTEGER NOT NULL," + " chat_id INTEGER NOT NULL," + " sender_id INTEGER," + " date TEXT NOT NULL DEFAULT '2025-01-01 00:00:00'," + " text TEXT," + " reply_to_msg_id INTEGER," + " reply_to_top_id INTEGER," + " reply_to_text TEXT," + " forward_from_id INTEGER," + " edit_date TEXT," + " raw_data TEXT," + " created_at TEXT DEFAULT CURRENT_TIMESTAMP," + " is_outgoing INTEGER DEFAULT 0," + " is_pinned INTEGER DEFAULT 0," + " PRIMARY KEY (id, chat_id)" + ")" + ) + ) # Wire up a real DatabaseManager (skip its init() — we supply our own engine) db_manager = DatabaseManager.__new__(DatabaseManager) @@ -83,7 +88,9 @@ async def _create_in_memory_adapter(): db_manager.database_url = "sqlite+aiosqlite://" db_manager._is_sqlite = True db_manager.async_session_factory = async_sessionmaker( - engine, class_=AsyncSession, expire_on_commit=False, + engine, + class_=AsyncSession, + expire_on_commit=False, ) adapter = DatabaseAdapter(db_manager) @@ -94,18 +101,19 @@ async def _insert_messages(adapter: DatabaseAdapter, chat_id: int, msg_ids: list """Insert message rows with the given IDs into the test database.""" async with adapter.db_manager.async_session_factory() as session: for mid in msg_ids: - await session.execute(text( - "INSERT INTO messages (id, chat_id, date) VALUES (:id, :cid, '2025-01-01 00:00:00')" - ), {"id": mid, "cid": chat_id}) + await session.execute( + text("INSERT INTO messages (id, chat_id, date) VALUES (:id, :cid, '2025-01-01 00:00:00')"), + {"id": mid, "cid": chat_id}, + ) await session.commit() async def _insert_chat(adapter: DatabaseAdapter, chat_id: int, title: str = "Test Chat"): """Insert a chat row into the test database.""" async with adapter.db_manager.async_session_factory() as session: - await session.execute(text( - "INSERT INTO chats (id, title, type) VALUES (:id, :title, 'channel')" - ), {"id": chat_id, "title": title}) + await session.execute( + text("INSERT INTO chats (id, title, type) VALUES (:id, :title, 'channel')"), {"id": chat_id, "title": title} + ) await session.commit() @@ -113,6 +121,7 @@ async def _insert_chat(adapter: DatabaseAdapter, chat_id: int, title: str = "Tes # 1. TestDetectMessageGaps — real SQL against in-memory SQLite # =========================================================================== + class TestDetectMessageGaps: """Exercise detect_message_gaps with a real async SQLite database.""" @@ -230,6 +239,7 @@ async def test_different_chats_isolated(self): # 2. TestGetChatsWithMessages — real SQL # =========================================================================== + class TestGetChatsWithMessages: """Exercise get_chats_with_messages with a real async SQLite database.""" @@ -260,6 +270,7 @@ async def test_returns_empty_when_no_chats(self): # 3. TestFillGaps — mocked Telegram client, exercises _fill_gaps control flow # =========================================================================== + def _make_backup_instance(db_mock=None, client_mock=None, config_mock=None): """Create a TelegramBackup instance with mocked dependencies.""" backup = TelegramBackup.__new__(TelegramBackup) @@ -372,10 +383,12 @@ async def test_fill_gaps_processes_detected_gaps(self): """When gaps are found, _fill_gap_range should be called for each.""" db = AsyncMock() db.get_chats_with_messages = AsyncMock(return_value=[-1001]) - db.detect_message_gaps = AsyncMock(return_value=[ - (50, 100, 50), - (200, 300, 100), - ]) + db.detect_message_gaps = AsyncMock( + return_value=[ + (50, 100, 50), + (200, 300, 100), + ] + ) client = AsyncMock() entity = MagicMock() @@ -403,10 +416,12 @@ async def test_fill_gaps_chat_without_gaps_not_in_details(self): """Chats with no gaps should not appear in the details list.""" db = AsyncMock() db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002]) - db.detect_message_gaps = AsyncMock(side_effect=[ - [], # chat -1001: no gaps - [(10, 100, 90)], # chat -1002: one gap - ]) + db.detect_message_gaps = AsyncMock( + side_effect=[ + [], # chat -1001: no gaps + [(10, 100, 90)], # chat -1002: one gap + ] + ) client = AsyncMock() entity1 = MagicMock() @@ -561,6 +576,7 @@ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None): # 4. TestConfig — env-var parsing for gap-fill settings # =========================================================================== + class TestGapFillConfig: """Test FILL_GAPS and GAP_THRESHOLD configuration.""" From 18f4edba905f3073c3919a4ce9e9fa5a917f8951 Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Sun, 15 Mar 2026 12:48:56 +0100 Subject: [PATCH 4/5] fix: address security review findings for gap-fill and token URLs - Gap-fill now continues on per-chat/per-gap errors instead of aborting - Scheduler accurately reports gap-fill errors in completion message - Gap-fill respects backup config (CHAT_IDS, CHAT_TYPES, excludes) - Token URL share dialog warns about server access log exposure - Stats recalculated after gap-fill recovers messages --- src/scheduler.py | 25 ++++++++++++---- src/telegram_backup.py | 56 +++++++++++++++++++++++++++++++----- src/web/templates/index.html | 1 + 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/src/scheduler.py b/src/scheduler.py index 8dba6623..d9008dd9 100644 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -77,13 +77,21 @@ async def _run_backup_job(self): await run_backup(self.config, client=client) # Run gap-fill if enabled + gap_fill_ok = True if self.config.fill_gaps: try: from .telegram_backup import run_fill_gaps logger.info("Running post-backup gap-fill...") - await run_fill_gaps(self.config, client=client) + result = await run_fill_gaps(self.config, client=client) + if result.get("errors", 0) > 0: + gap_fill_ok = False + logger.warning( + f"Gap-fill completed with {result['errors']} error(s) " + f"({result['total_recovered']} messages recovered)" + ) except Exception as e: + gap_fill_ok = False logger.error(f"Gap-fill failed: {e}", exc_info=True) # Reload tracked chats in listener after backup @@ -91,7 +99,10 @@ async def _run_backup_job(self): if self._listener: await self._listener._load_tracked_chats() - logger.info("Scheduled backup completed successfully") + if gap_fill_ok: + logger.info("Scheduled backup completed successfully") + else: + logger.warning("Scheduled backup completed, but gap-fill had errors") except Exception as e: logger.error(f"Scheduled backup failed: {e}", exc_info=True) @@ -229,10 +240,14 @@ async def run_forever(self): try: from .telegram_backup import run_fill_gaps - logger.info("Running post-backup gap-fill...") - await run_fill_gaps(self.config, client=self._connection.client) + logger.info("Running initial gap-fill...") + result = await run_fill_gaps(self.config, client=self._connection.client) + if result.get("errors", 0) > 0: + logger.warning( + f"Initial gap-fill completed with {result['errors']} error(s)" + ) except Exception as e: - logger.error(f"Gap-fill failed: {e}", exc_info=True) + logger.error(f"Initial gap-fill failed: {e}", exc_info=True) # Reload tracked chats in listener after initial backup if self._listener: diff --git a/src/telegram_backup.py b/src/telegram_backup.py index 49edc9bc..646870d6 100644 --- a/src/telegram_backup.py +++ b/src/telegram_backup.py @@ -743,13 +743,28 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict: "chats_with_gaps": 0, "total_gaps": 0, "total_recovered": 0, + "errors": 0, "details": [], } if chat_id is not None: chat_ids = [chat_id] else: - chat_ids = await self.db.get_chats_with_messages() + # Only scan chats that current config would back up (respects + # CHAT_IDS whitelist, CHAT_TYPES, and all exclude lists) + all_chat_ids = await self.db.get_chats_with_messages() + chat_ids = [] + for cid in all_chat_ids: + chat_info = await self.db.get_chat_by_id(cid) + if not chat_info: + continue + ctype = chat_info.get("type", "") + is_user = ctype == "private" + is_group = ctype in ("group", "supergroup") + is_channel = ctype == "channel" + is_bot = ctype == "bot" + if self.config.should_backup_chat(cid, is_user, is_group, is_channel, is_bot): + chat_ids.append(cid) logger.info(f"Gap-fill: scanning {len(chat_ids)} chat(s) with threshold={threshold}") @@ -761,9 +776,19 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict: except (ChannelPrivateError, ChatForbiddenError, UserBannedInChannelError) as e: logger.warning(f"Gap-fill: skipping chat {cid} (no access): {e.__class__.__name__}") continue + except Exception as e: + logger.error(f"Gap-fill: failed to get entity for chat {cid}: {e}") + summary["errors"] += 1 + continue chat_name = self._get_chat_name(entity) - gaps = await self.db.detect_message_gaps(cid, threshold) + + try: + gaps = await self.db.detect_message_gaps(cid, threshold) + except Exception as e: + logger.error(f"Gap-fill: failed to detect gaps for {chat_name} ({cid}): {e}") + summary["errors"] += 1 + continue if not gaps: continue @@ -775,9 +800,13 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict: for gap_start, gap_end, gap_size in gaps: logger.info(f" → Filling gap: {gap_start}..{gap_end} (size {gap_size})") - recovered = await self._fill_gap_range(entity, cid, gap_start, gap_end) - chat_recovered += recovered - logger.info(f" Recovered {recovered} messages") + try: + recovered = await self._fill_gap_range(entity, cid, gap_start, gap_end) + chat_recovered += recovered + logger.info(f" Recovered {recovered} messages") + except Exception as e: + logger.error(f" Error filling gap {gap_start}..{gap_end}: {e}") + summary["errors"] += 1 summary["total_gaps"] += len(gaps) summary["total_recovered"] += chat_recovered @@ -790,9 +819,11 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict: } ) + status = "complete" if summary["errors"] == 0 else "complete with errors" logger.info( - f"Gap-fill complete: {summary['chats_scanned']} chats scanned, " + f"Gap-fill {status}: {summary['chats_scanned']} chats scanned, " f"{summary['total_gaps']} gaps found, {summary['total_recovered']} messages recovered" + + (f", {summary['errors']} error(s)" if summary["errors"] else "") ) return summary @@ -1762,7 +1793,18 @@ async def run_fill_gaps(config: Config, client: TelegramClient | None = None, ch backup = await TelegramBackup.create(config, client=client) try: await backup.connect() - return await backup._fill_gaps(chat_id=chat_id) + summary = await backup._fill_gaps(chat_id=chat_id) + + # Refresh cached stats if messages were recovered so the viewer + # doesn't show stale totals until the next scheduled recalculation + if summary["total_recovered"] > 0: + try: + await backup.db.calculate_and_store_statistics() + logger.info("Stats recalculated after gap-fill recovery") + except Exception as e: + logger.warning(f"Failed to recalculate stats after gap-fill: {e}") + + return summary finally: await backup.disconnect() await backup.db.close() diff --git a/src/web/templates/index.html b/src/web/templates/index.html index b9189fa4..369f138d 100644 --- a/src/web/templates/index.html +++ b/src/web/templates/index.html @@ -1322,6 +1322,7 @@

Create Share Token

{{ adminNewTokenUrl }}
+
Note: token may appear in server access logs when the link is opened.
From 17af9c615732ed7994948fb0add2fcc924181886 Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Sun, 15 Mar 2026 12:50:52 +0100 Subject: [PATCH 5/5] style: fix ruff formatting in scheduler --- src/scheduler.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/scheduler.py b/src/scheduler.py index d9008dd9..25bb5145 100644 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -243,9 +243,7 @@ async def run_forever(self): logger.info("Running initial gap-fill...") result = await run_fill_gaps(self.config, client=self._connection.client) if result.get("errors", 0) > 0: - logger.warning( - f"Initial gap-fill completed with {result['errors']} error(s)" - ) + logger.warning(f"Initial gap-fill completed with {result['errors']} error(s)") except Exception as e: logger.error(f"Initial gap-fill failed: {e}", exc_info=True)