From 072d202415730ec9957d5202a9b503af54285f59 Mon Sep 17 00:00:00 2001 From: GeiserX <9169332+GeiserX@users.noreply.github.com> Date: Sat, 14 Mar 2026 20:19:54 +0100 Subject: [PATCH 1/5] =?UTF-8?q?feat:=20v7.3.0=20=E2=80=94=20gap-fill=20rec?= =?UTF-8?q?overy,=20token=20URL=20auto-login,=20UX=20improvements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gap-fill: detect and recover skipped messages from interrupted backups - SQL LAG() window function detects gaps in message ID sequences - iter_messages(min_id, max_id) recovers missing messages from Telegram - FILL_GAPS=true / GAP_THRESHOLD=50 env vars for automated post-backup runs - CLI subcommand: fill-gaps --chat-id --threshold - Scheduler integration: runs automatically after each backup when enabled - 31 functional tests (real SQLite + mocked Telegram client) Frontend UX improvements: - Token URL auto-login (?token=XXX) for shareable one-click access - Shareable link UI in admin panel (copy token or full URL) - @username display in chat list --- src/__init__.py | 2 +- src/__main__.py | 50 +++ src/config.py | 6 + src/db/adapter.py | 43 +++ src/scheduler.py | 20 ++ src/telegram_backup.py | 126 +++++++ src/web/templates/index.html | 46 ++- tests/test_gap_fill.py | 642 +++++++++++++++++++++++++++++++++++ 8 files changed, 931 insertions(+), 4 deletions(-) create mode 100644 tests/test_gap_fill.py diff --git a/src/__init__.py b/src/__init__.py index 5621f19b..4ec82691 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -2,4 +2,4 @@ Telegram Backup Automation - Main Package """ -__version__ = "7.2.1" +__version__ = "7.3.0" diff --git a/src/__main__.py b/src/__main__.py index 0daca9c1..6b84fbaf 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -144,6 +144,25 @@ def create_parser() -> argparse.ArgumentParser: "--merge", action="store_true", help="Allow importing into a chat that already has messages" ) + # Fill gaps command + fill_gaps_parser = subparsers.add_parser( + "fill-gaps", + help="Detect and fill message gaps from failed backups", + description=( + "Scans backed-up chats for gaps in message ID sequences " + "and recovers skipped messages from Telegram. " + "Gaps are caused by API errors, rate limits, or interruptions " + "during previous backup runs." + ), + ) + fill_gaps_parser.add_argument( + "-c", "--chat-id", type=int, help="Fill gaps only for this specific chat ID" + ) + fill_gaps_parser.add_argument( + "-t", "--threshold", type=int, default=None, + help="Minimum gap size to investigate (overrides GAP_THRESHOLD env var)", + ) + return parser @@ -207,6 +226,35 @@ async def run_list_chats(args) -> int: return 1 +async def run_fill_gaps_cmd(args) -> int: + """Run fill-gaps command.""" + from .config import Config, setup_logging + from .telegram_backup import run_fill_gaps + + try: + config = Config() + if args.threshold is not None: + config.gap_threshold = args.threshold + setup_logging(config) + + summary = await run_fill_gaps(config, chat_id=args.chat_id) + print("\nGap-fill complete:") + print(f" Chats scanned: {summary['chats_scanned']}") + print(f" Chats with gaps: {summary['chats_with_gaps']}") + print(f" Total gaps found: {summary['total_gaps']}") + print(f" Messages recovered: {summary['total_recovered']}") + if summary["details"]: + for detail in summary["details"]: + print( + f" - {detail['chat_name']} (ID {detail['chat_id']}): " + f"{detail['gaps']} gaps, {detail['recovered']} recovered" + ) + return 0 + except Exception as e: + print(f"Gap-fill failed: {e}", file=sys.stderr) + return 1 + + async def run_import(args) -> int: """Run import command.""" from .config import Config, setup_logging @@ -304,6 +352,8 @@ def main() -> int: return asyncio.run(run_list_chats(args)) elif args.command == "import": return asyncio.run(run_import(args)) + elif args.command == "fill-gaps": + return asyncio.run(run_fill_gaps_cmd(args)) else: parser.print_help() return 0 diff --git a/src/config.py b/src/config.py index 28ffab7f..de0bbb22 100644 --- a/src/config.py +++ b/src/config.py @@ -147,6 +147,12 @@ def __init__(self): # Useful for recovering from interrupted backups or deleted media files self.verify_media = os.getenv("VERIFY_MEDIA", "false").lower() == "true" + # Gap-fill mode: detect and recover skipped messages + # When enabled, runs after each scheduled backup to find and fill gaps + # in message ID sequences caused by API errors or interruptions + self.fill_gaps = os.getenv("FILL_GAPS", "false").lower() == "true" + self.gap_threshold = int(os.getenv("GAP_THRESHOLD", "50")) + # Real-time listener mode # When enabled, runs a background listener that catches message edits and deletions # in real-time instead of batch-checking on each backup run diff --git a/src/db/adapter.py b/src/db/adapter.py index 0d5308b5..c27f2631 100644 --- a/src/db/adapter.py +++ b/src/db/adapter.py @@ -890,6 +890,49 @@ async def update_sync_status(self, chat_id: int, last_message_id: int, message_c await session.execute(stmt) await session.commit() + # ========== Gap Detection ========== + + async def detect_message_gaps(self, chat_id: int, threshold: int = 50) -> list[tuple[int, int, int]]: + """Detect gaps in message ID sequences for a chat. + + Uses a SQL LAG() window function to find gaps larger than threshold. + + Returns: + List of (gap_start_id, gap_end_id, gap_size) tuples where + gap_start is the last message ID before the gap and + gap_end is the first message ID after the gap. + """ + async with self.db_manager.async_session_factory() as session: + result = await session.execute( + text( + """ + SELECT gap_start, gap_end, gap_size FROM ( + SELECT + LAG(id) OVER (ORDER BY id) AS gap_start, + id AS gap_end, + id - LAG(id) OVER (ORDER BY id) AS gap_size + FROM messages + WHERE chat_id = :chat_id + ) gaps + WHERE gap_size > :threshold + ORDER BY gap_start + """ + ), + {"chat_id": chat_id, "threshold": threshold}, + ) + return [(row[0], row[1], row[2]) for row in result.fetchall()] + + async def get_chats_with_messages(self) -> list[int]: + """Get all chat IDs that exist in the chats table. + + Queries the chats table directly instead of scanning the messages table, + which would be extremely slow on large databases. + """ + async with self.db_manager.async_session_factory() as session: + stmt = select(Chat.id) + result = await session.execute(stmt) + return [row[0] for row in result.fetchall()] + # ========== Statistics ========== async def get_statistics(self) -> dict[str, Any]: diff --git a/src/scheduler.py b/src/scheduler.py index 6fc469bc..8dba6623 100644 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -76,6 +76,16 @@ async def _run_backup_job(self): # Run backup using shared client await run_backup(self.config, client=client) + # Run gap-fill if enabled + if self.config.fill_gaps: + try: + from .telegram_backup import run_fill_gaps + + logger.info("Running post-backup gap-fill...") + await run_fill_gaps(self.config, client=client) + except Exception as e: + logger.error(f"Gap-fill failed: {e}", exc_info=True) + # Reload tracked chats in listener after backup # (new chats may have been added) if self._listener: @@ -214,6 +224,16 @@ async def run_forever(self): await run_backup(self.config, client=self._connection.client) logger.info("Initial backup completed") + # Run gap-fill if enabled + if self.config.fill_gaps: + try: + from .telegram_backup import run_fill_gaps + + logger.info("Running post-backup gap-fill...") + await run_fill_gaps(self.config, client=self._connection.client) + except Exception as e: + logger.error(f"Gap-fill failed: {e}", exc_info=True) + # Reload tracked chats in listener after initial backup if self._listener: await self._listener._load_tracked_chats() diff --git a/src/telegram_backup.py b/src/telegram_backup.py index 4df6c578..aa3cb8e2 100644 --- a/src/telegram_backup.py +++ b/src/telegram_backup.py @@ -692,6 +692,109 @@ async def _commit_batch(self, batch_data: list[dict], chat_id: int) -> None: if reactions_list: await self.db.insert_reactions(msg["id"], chat_id, reactions_list) + async def _fill_gap_range(self, entity, chat_id: int, gap_start: int, gap_end: int) -> int: + """ + Fetch and store messages for a single gap range. + + Args: + entity: Telegram entity for the chat + chat_id: Chat identifier + gap_start: Last message ID before the gap + gap_end: First message ID after the gap + + Returns: + Number of recovered messages + """ + batch_data: list[dict] = [] + batch_size = self.config.batch_size + recovered = 0 + + async for message in self.client.iter_messages(entity, min_id=gap_start, max_id=gap_end, reverse=True): + msg_data = await self._process_message(message, chat_id) + batch_data.append(msg_data) + + if len(batch_data) >= batch_size: + await self._commit_batch(batch_data, chat_id) + recovered += len(batch_data) + batch_data = [] + + # Flush remaining messages + if batch_data: + await self._commit_batch(batch_data, chat_id) + recovered += len(batch_data) + + return recovered + + async def _fill_gaps(self, chat_id: int | None = None) -> dict: + """ + Detect and fill gaps in message ID sequences. + + Scans chats for missing message ID ranges and fetches them from Telegram. + + Args: + chat_id: If provided, scan only this chat. Otherwise scan all chats. + + Returns: + Summary dict with gap-fill statistics. + """ + threshold = self.config.gap_threshold + summary = { + "chats_scanned": 0, + "chats_with_gaps": 0, + "total_gaps": 0, + "total_recovered": 0, + "details": [], + } + + if chat_id is not None: + chat_ids = [chat_id] + else: + chat_ids = await self.db.get_chats_with_messages() + + logger.info(f"Gap-fill: scanning {len(chat_ids)} chat(s) with threshold={threshold}") + + for cid in chat_ids: + summary["chats_scanned"] += 1 + + try: + entity = await self.client.get_entity(cid) + except (ChannelPrivateError, ChatForbiddenError, UserBannedInChannelError) as e: + logger.warning(f"Gap-fill: skipping chat {cid} (no access): {e.__class__.__name__}") + continue + + chat_name = self._get_chat_name(entity) + gaps = await self.db.detect_message_gaps(cid, threshold) + + if not gaps: + continue + + summary["chats_with_gaps"] += 1 + chat_recovered = 0 + + logger.info(f"Gap-fill: {chat_name} (ID: {cid}) has {len(gaps)} gap(s)") + + for gap_start, gap_end, gap_size in gaps: + logger.info(f" → Filling gap: {gap_start}..{gap_end} (size {gap_size})") + recovered = await self._fill_gap_range(entity, cid, gap_start, gap_end) + chat_recovered += recovered + logger.info(f" Recovered {recovered} messages") + + summary["total_gaps"] += len(gaps) + summary["total_recovered"] += chat_recovered + summary["details"].append({ + "chat_id": cid, + "chat_name": chat_name, + "gaps": len(gaps), + "recovered": chat_recovered, + }) + + logger.info( + f"Gap-fill complete: {summary['chats_scanned']} chats scanned, " + f"{summary['total_gaps']} gaps found, {summary['total_recovered']} messages recovered" + ) + + return summary + async def _sync_deletions_and_edits(self, chat_id: int, entity): """ Sync deletions and edits for existing messages in the database. @@ -1640,6 +1743,29 @@ async def run_backup(config: Config, client: TelegramClient | None = None): await backup.db.close() +async def run_fill_gaps(config: Config, client: TelegramClient | None = None, chat_id: int | None = None) -> dict: + """ + Run gap-fill to recover missing messages in backed-up chats. + + Args: + config: Configuration object + client: Optional existing TelegramClient to use (for shared connection). + If provided, the operation will use this client instead of creating + its own, avoiding session file lock conflicts. + chat_id: If provided, scan only this chat. Otherwise scan all chats. + + Returns: + Summary dict with gap-fill statistics. + """ + backup = await TelegramBackup.create(config, client=client) + try: + await backup.connect() + return await backup._fill_gaps(chat_id=chat_id) + finally: + await backup.disconnect() + await backup.db.close() + + def main(): """Main entry point for CLI.""" import asyncio diff --git a/src/web/templates/index.html b/src/web/templates/index.html index 1725d0fd..b9189fa4 100644 --- a/src/web/templates/index.html +++ b/src/web/templates/index.html @@ -639,7 +639,8 @@
- {{ chat.type }}
+ @{{ chat.username }}
+ • {{ chat.type }}
• Forum
• {{ chat.participants_count }} participants
@@ -1311,9 +1312,15 @@ Create Share Token
{{ adminNewToken }}
-
+
+ {{ adminNewTokenUrl }}
+
Create Share Token
noDownload.value = !!data.no_download
console.log('[DEBUG] authRequired:', authRequired.value, 'isAuthenticated:', isAuthenticated.value)
+ // Auto-login via ?token= URL parameter
+ if (!isAuthenticated.value && authRequired.value) {
+ const urlParams = new URLSearchParams(window.location.search)
+ const urlToken = urlParams.get('token')
+ if (urlToken) {
+ try {
+ const tokenRes = await fetch('/auth/token', {
+ method: 'POST', credentials: 'include',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ token: urlToken }),
+ })
+ if (tokenRes.ok) {
+ const tokenData = await tokenRes.json()
+ if (tokenData.success) {
+ isAuthenticated.value = true
+ userRole.value = tokenData.role || 'token'
+ currentUsername.value = tokenData.username || ''
+ noDownload.value = !!tokenData.no_download
+ }
+ }
+ } catch (e) { console.error('Token auto-login failed:', e) }
+ // Remove token from URL to prevent leaking in browser history
+ const cleanUrl = new URL(window.location)
+ cleanUrl.searchParams.delete('token')
+ window.history.replaceState({}, '', cleanUrl)
+ }
+ }
+
if (isAuthenticated.value) {
await loadChats()
await loadStats()
@@ -3419,6 +3454,10 @@ Create Share Token
const adminTokenForm = ref({ label: '', allowed_chat_ids: [], no_download: false, expires_at: '' })
const adminTokenError = ref('')
const adminNewToken = ref('')
+ const adminNewTokenUrl = computed(() => {
+ if (!adminNewToken.value) return ''
+ return `${window.location.origin}/?token=${adminNewToken.value}`
+ })
const loadAdminViewers = async () => {
try {
@@ -3671,6 +3710,7 @@ Create Share Token
adminTokenForm,
adminTokenError,
adminNewToken,
+ adminNewTokenUrl,
loadAdminTokens,
createToken,
revokeToken,
diff --git a/tests/test_gap_fill.py b/tests/test_gap_fill.py
new file mode 100644
index 00000000..69f74313
--- /dev/null
+++ b/tests/test_gap_fill.py
@@ -0,0 +1,642 @@
+"""Functional tests for the gap-fill feature (v7.3.0).
+
+Tests cover:
+- detect_message_gaps: real SQL queries against an in-memory SQLite database
+- _fill_gaps / _fill_gap_range: Telegram client mocks exercising actual control flow
+- Config: env-var parsing for FILL_GAPS and GAP_THRESHOLD
+"""
+
+import os
+import shutil
+import tempfile
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
+from sqlalchemy.pool import StaticPool
+
+from src.config import Config
+from src.db.adapter import DatabaseAdapter
+from src.db.base import DatabaseManager
+from src.telegram_backup import TelegramBackup
+
+
+# ---------------------------------------------------------------------------
+# Helpers — lightweight in-memory async SQLite setup
+# ---------------------------------------------------------------------------
+
+async def _create_in_memory_adapter():
+ """Create a DatabaseAdapter backed by an in-memory SQLite database.
+
+ Returns (adapter, engine) so the caller can dispose the engine after use.
+ """
+ # StaticPool + check_same_thread=False keeps a single shared in-memory DB
+ # across all connections, which is required for aiosqlite in-memory testing.
+ engine = create_async_engine(
+ "sqlite+aiosqlite://",
+ poolclass=StaticPool,
+ connect_args={"check_same_thread": False},
+ )
+
+ # Create the minimal schema needed for gap detection
+ async with engine.begin() as conn:
+ await conn.execute(text(
+ "CREATE TABLE IF NOT EXISTS chats ("
+ " id INTEGER PRIMARY KEY,"
+ " type TEXT NOT NULL DEFAULT 'channel',"
+ " title TEXT,"
+ " username TEXT,"
+ " first_name TEXT,"
+ " last_name TEXT,"
+ " phone TEXT,"
+ " description TEXT,"
+ " participants_count INTEGER,"
+ " is_forum INTEGER DEFAULT 0,"
+ " is_archived INTEGER DEFAULT 0,"
+ " last_synced_message_id INTEGER DEFAULT 0,"
+ " created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
+ " updated_at TEXT DEFAULT CURRENT_TIMESTAMP"
+ ")"
+ ))
+ await conn.execute(text(
+ "CREATE TABLE IF NOT EXISTS messages ("
+ " id INTEGER NOT NULL,"
+ " chat_id INTEGER NOT NULL,"
+ " sender_id INTEGER,"
+ " date TEXT NOT NULL DEFAULT '2025-01-01 00:00:00',"
+ " text TEXT,"
+ " reply_to_msg_id INTEGER,"
+ " reply_to_top_id INTEGER,"
+ " reply_to_text TEXT,"
+ " forward_from_id INTEGER,"
+ " edit_date TEXT,"
+ " raw_data TEXT,"
+ " created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
+ " is_outgoing INTEGER DEFAULT 0,"
+ " is_pinned INTEGER DEFAULT 0,"
+ " PRIMARY KEY (id, chat_id)"
+ ")"
+ ))
+
+ # Wire up a real DatabaseManager (skip its init() — we supply our own engine)
+ db_manager = DatabaseManager.__new__(DatabaseManager)
+ db_manager.engine = engine
+ db_manager.database_url = "sqlite+aiosqlite://"
+ db_manager._is_sqlite = True
+ db_manager.async_session_factory = async_sessionmaker(
+ engine, class_=AsyncSession, expire_on_commit=False,
+ )
+
+ adapter = DatabaseAdapter(db_manager)
+ return adapter, engine
+
+
+async def _insert_messages(adapter: DatabaseAdapter, chat_id: int, msg_ids: list[int]):
+ """Insert message rows with the given IDs into the test database."""
+ async with adapter.db_manager.async_session_factory() as session:
+ for mid in msg_ids:
+ await session.execute(text(
+ "INSERT INTO messages (id, chat_id, date) VALUES (:id, :cid, '2025-01-01 00:00:00')"
+ ), {"id": mid, "cid": chat_id})
+ await session.commit()
+
+
+async def _insert_chat(adapter: DatabaseAdapter, chat_id: int, title: str = "Test Chat"):
+ """Insert a chat row into the test database."""
+ async with adapter.db_manager.async_session_factory() as session:
+ await session.execute(text(
+ "INSERT INTO chats (id, title, type) VALUES (:id, :title, 'channel')"
+ ), {"id": chat_id, "title": title})
+ await session.commit()
+
+
+# ===========================================================================
+# 1. TestDetectMessageGaps — real SQL against in-memory SQLite
+# ===========================================================================
+
+class TestDetectMessageGaps:
+ """Exercise detect_message_gaps with a real async SQLite database."""
+
+ async def test_no_gaps_consecutive_ids(self):
+ """Consecutive message IDs should produce zero gaps."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ await _insert_messages(adapter, chat_id=100, msg_ids=list(range(1, 51)))
+ gaps = await adapter.detect_message_gaps(chat_id=100, threshold=50)
+ assert gaps == []
+ finally:
+ await engine.dispose()
+
+ async def test_single_large_gap(self):
+ """IDs 1-50 then 100-150 should return one gap (50, 100, 50)."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ ids = list(range(1, 51)) + list(range(100, 151))
+ await _insert_messages(adapter, chat_id=200, msg_ids=ids)
+ gaps = await adapter.detect_message_gaps(chat_id=200, threshold=49)
+
+ assert len(gaps) == 1
+ gap_start, gap_end, gap_size = gaps[0]
+ assert gap_start == 50
+ assert gap_end == 100
+ assert gap_size == 50
+ finally:
+ await engine.dispose()
+
+ async def test_multiple_gaps_sorted(self):
+ """Multiple gaps should all be returned, sorted by gap_start."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ # Gap 1: between 10 and 100 (size 90)
+ # Gap 2: between 110 and 300 (size 190)
+ ids = list(range(1, 11)) + list(range(100, 111)) + list(range(300, 311))
+ await _insert_messages(adapter, chat_id=300, msg_ids=ids)
+ gaps = await adapter.detect_message_gaps(chat_id=300, threshold=50)
+
+ assert len(gaps) == 2
+ assert gaps[0] == (10, 100, 90)
+ assert gaps[1] == (110, 300, 190)
+ # Verify sorted by gap_start
+ assert gaps[0][0] < gaps[1][0]
+ finally:
+ await engine.dispose()
+
+ async def test_gap_below_threshold_not_returned(self):
+ """A gap smaller than or equal to the threshold should not appear."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ # IDs 1-10 then 60-70 → gap of 50 at threshold=50 means gap_size > threshold
+ # gap_size = 60 - 10 = 50, and the query uses > threshold, so 50 is NOT > 50
+ ids = list(range(1, 11)) + list(range(60, 71))
+ await _insert_messages(adapter, chat_id=400, msg_ids=ids)
+ gaps = await adapter.detect_message_gaps(chat_id=400, threshold=50)
+
+ assert gaps == [], f"Gap of exactly threshold should not be returned, got {gaps}"
+ finally:
+ await engine.dispose()
+
+ async def test_gap_just_above_threshold_returned(self):
+ """A gap of threshold+1 should be returned."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ # IDs 1-10 then 62-70 → gap_size = 62 - 10 = 52 > 50
+ ids = list(range(1, 11)) + list(range(62, 71))
+ await _insert_messages(adapter, chat_id=401, msg_ids=ids)
+ gaps = await adapter.detect_message_gaps(chat_id=401, threshold=50)
+
+ assert len(gaps) == 1
+ assert gaps[0] == (10, 62, 52)
+ finally:
+ await engine.dispose()
+
+ async def test_single_message_no_gaps(self):
+ """A single message in the chat should produce no gaps."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ await _insert_messages(adapter, chat_id=500, msg_ids=[42])
+ gaps = await adapter.detect_message_gaps(chat_id=500, threshold=50)
+ assert gaps == []
+ finally:
+ await engine.dispose()
+
+ async def test_empty_chat_no_gaps(self):
+ """A chat with zero messages should produce no gaps."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ gaps = await adapter.detect_message_gaps(chat_id=999, threshold=50)
+ assert gaps == []
+ finally:
+ await engine.dispose()
+
+ async def test_different_chats_isolated(self):
+ """Gaps in one chat should not appear in another chat's results."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ # Chat 1: has a gap
+ await _insert_messages(adapter, chat_id=10, msg_ids=[1, 2, 3, 100])
+ # Chat 2: no gap
+ await _insert_messages(adapter, chat_id=20, msg_ids=[1, 2, 3, 4, 5])
+
+ gaps_chat1 = await adapter.detect_message_gaps(chat_id=10, threshold=50)
+ gaps_chat2 = await adapter.detect_message_gaps(chat_id=20, threshold=50)
+
+ assert len(gaps_chat1) == 1
+ assert gaps_chat1[0] == (3, 100, 97)
+ assert gaps_chat2 == []
+ finally:
+ await engine.dispose()
+
+
+# ===========================================================================
+# 2. TestGetChatsWithMessages — real SQL
+# ===========================================================================
+
+class TestGetChatsWithMessages:
+ """Exercise get_chats_with_messages with a real async SQLite database."""
+
+ async def test_returns_all_chat_ids(self):
+ """Should return all chat IDs from the chats table."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ await _insert_chat(adapter, chat_id=-1001, title="Chat A")
+ await _insert_chat(adapter, chat_id=-1002, title="Chat B")
+ await _insert_chat(adapter, chat_id=-1003, title="Chat C")
+
+ result = await adapter.get_chats_with_messages()
+ assert sorted(result) == [-1003, -1002, -1001]
+ finally:
+ await engine.dispose()
+
+ async def test_returns_empty_when_no_chats(self):
+ """Should return empty list when no chats exist."""
+ adapter, engine = await _create_in_memory_adapter()
+ try:
+ result = await adapter.get_chats_with_messages()
+ assert result == []
+ finally:
+ await engine.dispose()
+
+
+# ===========================================================================
+# 3. TestFillGaps — mocked Telegram client, exercises _fill_gaps control flow
+# ===========================================================================
+
+def _make_backup_instance(db_mock=None, client_mock=None, config_mock=None):
+ """Create a TelegramBackup instance with mocked dependencies."""
+ backup = TelegramBackup.__new__(TelegramBackup)
+ backup.db = db_mock or AsyncMock()
+ backup.client = client_mock or AsyncMock()
+ backup.config = config_mock or MagicMock()
+ backup.config.gap_threshold = 50
+ backup.config.batch_size = 100
+ return backup
+
+
+class TestFillGaps:
+ """Exercise _fill_gaps logic with mocked DB and Telegram client."""
+
+ async def test_fill_gaps_no_chat_id_scans_all_chats(self):
+ """When chat_id=None, _fill_gaps should query all chats from DB."""
+ db = AsyncMock()
+ db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002])
+ db.detect_message_gaps = AsyncMock(return_value=[])
+
+ client = AsyncMock()
+ entity = MagicMock()
+ entity.title = "Test Channel"
+ entity.id = 1001
+ client.get_entity = AsyncMock(return_value=entity)
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ result = await backup._fill_gaps(chat_id=None)
+
+ db.get_chats_with_messages.assert_awaited_once()
+ assert result["chats_scanned"] == 2
+
+ async def test_fill_gaps_with_specific_chat_id(self):
+ """When chat_id is provided, only that chat should be scanned."""
+ db = AsyncMock()
+ db.detect_message_gaps = AsyncMock(return_value=[])
+
+ client = AsyncMock()
+ entity = MagicMock()
+ entity.title = "Specific Chat"
+ entity.id = 5555
+ client.get_entity = AsyncMock(return_value=entity)
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ result = await backup._fill_gaps(chat_id=-1005555)
+
+ # Should NOT have called get_chats_with_messages
+ db.get_chats_with_messages.assert_not_awaited()
+ assert result["chats_scanned"] == 1
+ client.get_entity.assert_awaited_once_with(-1005555)
+
+ async def test_fill_gaps_chat_id_zero_is_not_none(self):
+ """chat_id=0 is falsy but valid — must scan only chat 0, not all chats.
+
+ This tests the critical `if chat_id is not None` fix (vs `if chat_id`).
+ """
+ db = AsyncMock()
+ db.detect_message_gaps = AsyncMock(return_value=[])
+
+ client = AsyncMock()
+ entity = MagicMock()
+ entity.title = "Chat Zero"
+ entity.id = 0
+ client.get_entity = AsyncMock(return_value=entity)
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ result = await backup._fill_gaps(chat_id=0)
+
+ # The key assertion: get_chats_with_messages must NOT be called
+ db.get_chats_with_messages.assert_not_awaited()
+ assert result["chats_scanned"] == 1
+ client.get_entity.assert_awaited_once_with(0)
+
+ async def test_fill_gaps_skips_inaccessible_chats(self):
+ """Chats raising ChannelPrivateError should be skipped, not crash."""
+ from telethon.errors import ChannelPrivateError
+
+ db = AsyncMock()
+ db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002, -1003])
+
+ accessible_entity = MagicMock()
+ accessible_entity.title = "Accessible"
+ accessible_entity.id = 1003
+
+ client = AsyncMock()
+
+ async def fake_get_entity(cid):
+ if cid == -1001:
+ raise ChannelPrivateError(request=None)
+ if cid == -1002:
+ raise ChannelPrivateError(request=None)
+ return accessible_entity
+
+ client.get_entity = AsyncMock(side_effect=fake_get_entity)
+ db.detect_message_gaps = AsyncMock(return_value=[])
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ result = await backup._fill_gaps(chat_id=None)
+
+ # All 3 scanned, but only 1 was accessible
+ assert result["chats_scanned"] == 3
+ # The 2 inaccessible chats had no gaps detected (skipped before gap query)
+ assert result["total_gaps"] == 0
+
+ async def test_fill_gaps_processes_detected_gaps(self):
+ """When gaps are found, _fill_gap_range should be called for each."""
+ db = AsyncMock()
+ db.get_chats_with_messages = AsyncMock(return_value=[-1001])
+ db.detect_message_gaps = AsyncMock(return_value=[
+ (50, 100, 50),
+ (200, 300, 100),
+ ])
+
+ client = AsyncMock()
+ entity = MagicMock()
+ entity.title = "Gapped Chat"
+ entity.id = 1001
+ client.get_entity = AsyncMock(return_value=entity)
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ # Mock _fill_gap_range to return counts
+ backup._fill_gap_range = AsyncMock(side_effect=[10, 25])
+
+ result = await backup._fill_gaps(chat_id=None)
+
+ assert result["chats_scanned"] == 1
+ assert result["chats_with_gaps"] == 1
+ assert result["total_gaps"] == 2
+ assert result["total_recovered"] == 35 # 10 + 25
+ assert len(result["details"]) == 1
+ assert result["details"][0]["chat_id"] == -1001
+ assert result["details"][0]["gaps"] == 2
+ assert result["details"][0]["recovered"] == 35
+
+ async def test_fill_gaps_chat_without_gaps_not_in_details(self):
+ """Chats with no gaps should not appear in the details list."""
+ db = AsyncMock()
+ db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002])
+ db.detect_message_gaps = AsyncMock(side_effect=[
+ [], # chat -1001: no gaps
+ [(10, 100, 90)], # chat -1002: one gap
+ ])
+
+ client = AsyncMock()
+ entity1 = MagicMock()
+ entity1.title = "No Gaps"
+ entity1.id = 1001
+ entity2 = MagicMock()
+ entity2.title = "Has Gaps"
+ entity2.id = 1002
+
+ client.get_entity = AsyncMock(side_effect=[entity1, entity2])
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+ backup._fill_gap_range = AsyncMock(return_value=15)
+
+ result = await backup._fill_gaps(chat_id=None)
+
+ assert result["chats_scanned"] == 2
+ assert result["chats_with_gaps"] == 1
+ assert len(result["details"]) == 1
+ assert result["details"][0]["chat_id"] == -1002
+
+ async def test_fill_gaps_uses_config_threshold(self):
+ """The threshold passed to detect_message_gaps should come from config."""
+ db = AsyncMock()
+ db.get_chats_with_messages = AsyncMock(return_value=[-1001])
+ db.detect_message_gaps = AsyncMock(return_value=[])
+
+ client = AsyncMock()
+ entity = MagicMock()
+ entity.title = "Test"
+ entity.id = 1001
+ client.get_entity = AsyncMock(return_value=entity)
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+ backup.config.gap_threshold = 123
+
+ await backup._fill_gaps(chat_id=None)
+
+ db.detect_message_gaps.assert_awaited_once_with(-1001, 123)
+
+
+class TestFillGapRange:
+ """Exercise _fill_gap_range with a mocked Telegram client."""
+
+ async def test_fill_gap_range_returns_count(self):
+ """_fill_gap_range should return the total recovered message count."""
+ db = AsyncMock()
+ client = AsyncMock()
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ # Simulate 5 messages returned from iter_messages
+ messages = []
+ for i in range(51, 56):
+ msg = MagicMock()
+ msg.id = i
+ messages.append(msg)
+
+ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
+ for m in messages:
+ yield m
+
+ client.iter_messages = fake_iter_messages
+ backup._process_message = AsyncMock(side_effect=lambda m, c: {"id": m.id, "chat_id": c})
+ backup._commit_batch = AsyncMock()
+
+ entity = MagicMock()
+ count = await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100)
+
+ assert count == 5
+ backup._commit_batch.assert_awaited_once()
+
+ async def test_fill_gap_range_batches_commits(self):
+ """Large gaps should be committed in batches according to config.batch_size."""
+ db = AsyncMock()
+ client = AsyncMock()
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+ backup.config.batch_size = 3
+
+ messages = []
+ for i in range(51, 59): # 8 messages
+ msg = MagicMock()
+ msg.id = i
+ messages.append(msg)
+
+ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
+ for m in messages:
+ yield m
+
+ client.iter_messages = fake_iter_messages
+ backup._process_message = AsyncMock(side_effect=lambda m, c: {"id": m.id, "chat_id": c})
+ backup._commit_batch = AsyncMock()
+
+ entity = MagicMock()
+ count = await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100)
+
+ assert count == 8
+ # 8 messages / batch_size 3 = 2 full batches (3+3) + 1 flush (2) = 3 calls
+ assert backup._commit_batch.await_count == 3
+
+ async def test_fill_gap_range_empty_gap(self):
+ """When no messages exist in the gap range, should return 0."""
+ db = AsyncMock()
+ client = AsyncMock()
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
+ return
+ yield # noqa: unreachable - makes this an async generator
+
+ client.iter_messages = fake_iter_messages
+ backup._process_message = AsyncMock()
+ backup._commit_batch = AsyncMock()
+
+ entity = MagicMock()
+ count = await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100)
+
+ assert count == 0
+ backup._commit_batch.assert_not_awaited()
+
+ async def test_fill_gap_range_passes_correct_ids_to_client(self):
+ """iter_messages should be called with min_id=gap_start, max_id=gap_end, reverse=True."""
+ db = AsyncMock()
+ client = AsyncMock()
+
+ backup = _make_backup_instance(db_mock=db, client_mock=client)
+
+ call_kwargs = {}
+
+ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
+ call_kwargs["min_id"] = min_id
+ call_kwargs["max_id"] = max_id
+ call_kwargs["reverse"] = reverse
+ return
+ yield # noqa: unreachable
+
+ client.iter_messages = fake_iter_messages
+ backup._process_message = AsyncMock()
+ backup._commit_batch = AsyncMock()
+
+ entity = MagicMock()
+ await backup._fill_gap_range(entity, chat_id=-1001, gap_start=50, gap_end=100)
+
+ assert call_kwargs["min_id"] == 50
+ assert call_kwargs["max_id"] == 100
+ assert call_kwargs["reverse"] is True
+
+
+# ===========================================================================
+# 4. TestConfig — env-var parsing for gap-fill settings
+# ===========================================================================
+
+class TestGapFillConfig:
+ """Test FILL_GAPS and GAP_THRESHOLD configuration."""
+
+ def setup_method(self):
+ self.temp_dir = tempfile.mkdtemp()
+
+ def teardown_method(self):
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def _base_env(self, **extra):
+ env = {
+ "CHAT_TYPES": "private",
+ "BACKUP_PATH": self.temp_dir,
+ }
+ env.update(extra)
+ return env
+
+ def test_fill_gaps_default_false(self):
+ """FILL_GAPS should default to False when not set."""
+ with patch.dict(os.environ, self._base_env(), clear=True):
+ config = Config()
+ assert config.fill_gaps is False
+
+ def test_fill_gaps_true(self):
+ """FILL_GAPS=true should set fill_gaps=True."""
+ with patch.dict(os.environ, self._base_env(FILL_GAPS="true"), clear=True):
+ config = Config()
+ assert config.fill_gaps is True
+
+ def test_fill_gaps_True_uppercase(self):
+ """FILL_GAPS=True (capitalized) should also work."""
+ with patch.dict(os.environ, self._base_env(FILL_GAPS="True"), clear=True):
+ config = Config()
+ assert config.fill_gaps is True
+
+ def test_fill_gaps_false_explicit(self):
+ """FILL_GAPS=false should set fill_gaps=False."""
+ with patch.dict(os.environ, self._base_env(FILL_GAPS="false"), clear=True):
+ config = Config()
+ assert config.fill_gaps is False
+
+ def test_fill_gaps_nonsense_is_false(self):
+ """FILL_GAPS=banana should evaluate to False (only 'true' is truthy)."""
+ with patch.dict(os.environ, self._base_env(FILL_GAPS="banana"), clear=True):
+ config = Config()
+ assert config.fill_gaps is False
+
+ def test_gap_threshold_default(self):
+ """GAP_THRESHOLD should default to 50."""
+ with patch.dict(os.environ, self._base_env(), clear=True):
+ config = Config()
+ assert config.gap_threshold == 50
+
+ def test_gap_threshold_custom(self):
+ """GAP_THRESHOLD=100 should set gap_threshold=100."""
+ with patch.dict(os.environ, self._base_env(GAP_THRESHOLD="100"), clear=True):
+ config = Config()
+ assert config.gap_threshold == 100
+
+ def test_gap_threshold_small(self):
+ """GAP_THRESHOLD=1 should be accepted."""
+ with patch.dict(os.environ, self._base_env(GAP_THRESHOLD="1"), clear=True):
+ config = Config()
+ assert config.gap_threshold == 1
+
+ def test_gap_threshold_large(self):
+ """GAP_THRESHOLD=10000 should be accepted."""
+ with patch.dict(os.environ, self._base_env(GAP_THRESHOLD="10000"), clear=True):
+ config = Config()
+ assert config.gap_threshold == 10000
+
+ def test_both_settings_together(self):
+ """FILL_GAPS and GAP_THRESHOLD can be set simultaneously."""
+ with patch.dict(os.environ, self._base_env(FILL_GAPS="true", GAP_THRESHOLD="200"), clear=True):
+ config = Config()
+ assert config.fill_gaps is True
+ assert config.gap_threshold == 200
From 8fe726eb101f1e3bb80d3d719294e6b13ba2b525 Mon Sep 17 00:00:00 2001
From: GeiserX <9169332+GeiserX@users.noreply.github.com>
Date: Sat, 14 Mar 2026 20:23:38 +0100
Subject: [PATCH 2/5] fix: add pytest-asyncio to CI, fix lint issues in
gap-fill tests
---
.github/workflows/tests.yml | 2 +-
tests/test_gap_fill.py | 6 ++----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0a2ae1f6..c94353a6 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -23,7 +23,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- pip install pytest pytest-cov beautifulsoup4 Pillow
+ pip install pytest pytest-cov pytest-asyncio beautifulsoup4 Pillow
- name: Run tests
run: |
diff --git a/tests/test_gap_fill.py b/tests/test_gap_fill.py
index 69f74313..679b9310 100644
--- a/tests/test_gap_fill.py
+++ b/tests/test_gap_fill.py
@@ -11,7 +11,6 @@
import tempfile
from unittest.mock import AsyncMock, MagicMock, patch
-import pytest
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.pool import StaticPool
@@ -21,7 +20,6 @@
from src.db.base import DatabaseManager
from src.telegram_backup import TelegramBackup
-
# ---------------------------------------------------------------------------
# Helpers — lightweight in-memory async SQLite setup
# ---------------------------------------------------------------------------
@@ -519,7 +517,7 @@ async def test_fill_gap_range_empty_gap(self):
async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
return
- yield # noqa: unreachable - makes this an async generator
+ yield # noqa: F811 - unreachable yield makes this an async generator
client.iter_messages = fake_iter_messages
backup._process_message = AsyncMock()
@@ -545,7 +543,7 @@ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
call_kwargs["max_id"] = max_id
call_kwargs["reverse"] = reverse
return
- yield # noqa: unreachable
+ yield # noqa: F811 - unreachable yield makes this an async generator
client.iter_messages = fake_iter_messages
backup._process_message = AsyncMock()
From 3d26f4390da2a37d4992557df11c4918287667c0 Mon Sep 17 00:00:00 2001
From: GeiserX <9169332+GeiserX@users.noreply.github.com>
Date: Sat, 14 Mar 2026 20:26:23 +0100
Subject: [PATCH 3/5] style: fix ruff formatting in gap-fill code
---
src/__main__.py | 9 ++--
src/telegram_backup.py | 14 ++---
tests/test_gap_fill.py | 120 +++++++++++++++++++++++------------------
3 files changed, 81 insertions(+), 62 deletions(-)
diff --git a/src/__main__.py b/src/__main__.py
index 6b84fbaf..af94f892 100644
--- a/src/__main__.py
+++ b/src/__main__.py
@@ -155,11 +155,12 @@ def create_parser() -> argparse.ArgumentParser:
"during previous backup runs."
),
)
+ fill_gaps_parser.add_argument("-c", "--chat-id", type=int, help="Fill gaps only for this specific chat ID")
fill_gaps_parser.add_argument(
- "-c", "--chat-id", type=int, help="Fill gaps only for this specific chat ID"
- )
- fill_gaps_parser.add_argument(
- "-t", "--threshold", type=int, default=None,
+ "-t",
+ "--threshold",
+ type=int,
+ default=None,
help="Minimum gap size to investigate (overrides GAP_THRESHOLD env var)",
)
diff --git a/src/telegram_backup.py b/src/telegram_backup.py
index aa3cb8e2..49edc9bc 100644
--- a/src/telegram_backup.py
+++ b/src/telegram_backup.py
@@ -781,12 +781,14 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict:
summary["total_gaps"] += len(gaps)
summary["total_recovered"] += chat_recovered
- summary["details"].append({
- "chat_id": cid,
- "chat_name": chat_name,
- "gaps": len(gaps),
- "recovered": chat_recovered,
- })
+ summary["details"].append(
+ {
+ "chat_id": cid,
+ "chat_name": chat_name,
+ "gaps": len(gaps),
+ "recovered": chat_recovered,
+ }
+ )
logger.info(
f"Gap-fill complete: {summary['chats_scanned']} chats scanned, "
diff --git a/tests/test_gap_fill.py b/tests/test_gap_fill.py
index 679b9310..4ad4cb7e 100644
--- a/tests/test_gap_fill.py
+++ b/tests/test_gap_fill.py
@@ -24,6 +24,7 @@
# Helpers — lightweight in-memory async SQLite setup
# ---------------------------------------------------------------------------
+
async def _create_in_memory_adapter():
"""Create a DatabaseAdapter backed by an in-memory SQLite database.
@@ -39,43 +40,47 @@ async def _create_in_memory_adapter():
# Create the minimal schema needed for gap detection
async with engine.begin() as conn:
- await conn.execute(text(
- "CREATE TABLE IF NOT EXISTS chats ("
- " id INTEGER PRIMARY KEY,"
- " type TEXT NOT NULL DEFAULT 'channel',"
- " title TEXT,"
- " username TEXT,"
- " first_name TEXT,"
- " last_name TEXT,"
- " phone TEXT,"
- " description TEXT,"
- " participants_count INTEGER,"
- " is_forum INTEGER DEFAULT 0,"
- " is_archived INTEGER DEFAULT 0,"
- " last_synced_message_id INTEGER DEFAULT 0,"
- " created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
- " updated_at TEXT DEFAULT CURRENT_TIMESTAMP"
- ")"
- ))
- await conn.execute(text(
- "CREATE TABLE IF NOT EXISTS messages ("
- " id INTEGER NOT NULL,"
- " chat_id INTEGER NOT NULL,"
- " sender_id INTEGER,"
- " date TEXT NOT NULL DEFAULT '2025-01-01 00:00:00',"
- " text TEXT,"
- " reply_to_msg_id INTEGER,"
- " reply_to_top_id INTEGER,"
- " reply_to_text TEXT,"
- " forward_from_id INTEGER,"
- " edit_date TEXT,"
- " raw_data TEXT,"
- " created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
- " is_outgoing INTEGER DEFAULT 0,"
- " is_pinned INTEGER DEFAULT 0,"
- " PRIMARY KEY (id, chat_id)"
- ")"
- ))
+ await conn.execute(
+ text(
+ "CREATE TABLE IF NOT EXISTS chats ("
+ " id INTEGER PRIMARY KEY,"
+ " type TEXT NOT NULL DEFAULT 'channel',"
+ " title TEXT,"
+ " username TEXT,"
+ " first_name TEXT,"
+ " last_name TEXT,"
+ " phone TEXT,"
+ " description TEXT,"
+ " participants_count INTEGER,"
+ " is_forum INTEGER DEFAULT 0,"
+ " is_archived INTEGER DEFAULT 0,"
+ " last_synced_message_id INTEGER DEFAULT 0,"
+ " created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
+ " updated_at TEXT DEFAULT CURRENT_TIMESTAMP"
+ ")"
+ )
+ )
+ await conn.execute(
+ text(
+ "CREATE TABLE IF NOT EXISTS messages ("
+ " id INTEGER NOT NULL,"
+ " chat_id INTEGER NOT NULL,"
+ " sender_id INTEGER,"
+ " date TEXT NOT NULL DEFAULT '2025-01-01 00:00:00',"
+ " text TEXT,"
+ " reply_to_msg_id INTEGER,"
+ " reply_to_top_id INTEGER,"
+ " reply_to_text TEXT,"
+ " forward_from_id INTEGER,"
+ " edit_date TEXT,"
+ " raw_data TEXT,"
+ " created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
+ " is_outgoing INTEGER DEFAULT 0,"
+ " is_pinned INTEGER DEFAULT 0,"
+ " PRIMARY KEY (id, chat_id)"
+ ")"
+ )
+ )
# Wire up a real DatabaseManager (skip its init() — we supply our own engine)
db_manager = DatabaseManager.__new__(DatabaseManager)
@@ -83,7 +88,9 @@ async def _create_in_memory_adapter():
db_manager.database_url = "sqlite+aiosqlite://"
db_manager._is_sqlite = True
db_manager.async_session_factory = async_sessionmaker(
- engine, class_=AsyncSession, expire_on_commit=False,
+ engine,
+ class_=AsyncSession,
+ expire_on_commit=False,
)
adapter = DatabaseAdapter(db_manager)
@@ -94,18 +101,19 @@ async def _insert_messages(adapter: DatabaseAdapter, chat_id: int, msg_ids: list
"""Insert message rows with the given IDs into the test database."""
async with adapter.db_manager.async_session_factory() as session:
for mid in msg_ids:
- await session.execute(text(
- "INSERT INTO messages (id, chat_id, date) VALUES (:id, :cid, '2025-01-01 00:00:00')"
- ), {"id": mid, "cid": chat_id})
+ await session.execute(
+ text("INSERT INTO messages (id, chat_id, date) VALUES (:id, :cid, '2025-01-01 00:00:00')"),
+ {"id": mid, "cid": chat_id},
+ )
await session.commit()
async def _insert_chat(adapter: DatabaseAdapter, chat_id: int, title: str = "Test Chat"):
"""Insert a chat row into the test database."""
async with adapter.db_manager.async_session_factory() as session:
- await session.execute(text(
- "INSERT INTO chats (id, title, type) VALUES (:id, :title, 'channel')"
- ), {"id": chat_id, "title": title})
+ await session.execute(
+ text("INSERT INTO chats (id, title, type) VALUES (:id, :title, 'channel')"), {"id": chat_id, "title": title}
+ )
await session.commit()
@@ -113,6 +121,7 @@ async def _insert_chat(adapter: DatabaseAdapter, chat_id: int, title: str = "Tes
# 1. TestDetectMessageGaps — real SQL against in-memory SQLite
# ===========================================================================
+
class TestDetectMessageGaps:
"""Exercise detect_message_gaps with a real async SQLite database."""
@@ -230,6 +239,7 @@ async def test_different_chats_isolated(self):
# 2. TestGetChatsWithMessages — real SQL
# ===========================================================================
+
class TestGetChatsWithMessages:
"""Exercise get_chats_with_messages with a real async SQLite database."""
@@ -260,6 +270,7 @@ async def test_returns_empty_when_no_chats(self):
# 3. TestFillGaps — mocked Telegram client, exercises _fill_gaps control flow
# ===========================================================================
+
def _make_backup_instance(db_mock=None, client_mock=None, config_mock=None):
"""Create a TelegramBackup instance with mocked dependencies."""
backup = TelegramBackup.__new__(TelegramBackup)
@@ -372,10 +383,12 @@ async def test_fill_gaps_processes_detected_gaps(self):
"""When gaps are found, _fill_gap_range should be called for each."""
db = AsyncMock()
db.get_chats_with_messages = AsyncMock(return_value=[-1001])
- db.detect_message_gaps = AsyncMock(return_value=[
- (50, 100, 50),
- (200, 300, 100),
- ])
+ db.detect_message_gaps = AsyncMock(
+ return_value=[
+ (50, 100, 50),
+ (200, 300, 100),
+ ]
+ )
client = AsyncMock()
entity = MagicMock()
@@ -403,10 +416,12 @@ async def test_fill_gaps_chat_without_gaps_not_in_details(self):
"""Chats with no gaps should not appear in the details list."""
db = AsyncMock()
db.get_chats_with_messages = AsyncMock(return_value=[-1001, -1002])
- db.detect_message_gaps = AsyncMock(side_effect=[
- [], # chat -1001: no gaps
- [(10, 100, 90)], # chat -1002: one gap
- ])
+ db.detect_message_gaps = AsyncMock(
+ side_effect=[
+ [], # chat -1001: no gaps
+ [(10, 100, 90)], # chat -1002: one gap
+ ]
+ )
client = AsyncMock()
entity1 = MagicMock()
@@ -561,6 +576,7 @@ async def fake_iter_messages(entity, min_id=None, max_id=None, reverse=None):
# 4. TestConfig — env-var parsing for gap-fill settings
# ===========================================================================
+
class TestGapFillConfig:
"""Test FILL_GAPS and GAP_THRESHOLD configuration."""
From 18f4edba905f3073c3919a4ce9e9fa5a917f8951 Mon Sep 17 00:00:00 2001
From: GeiserX <9169332+GeiserX@users.noreply.github.com>
Date: Sun, 15 Mar 2026 12:48:56 +0100
Subject: [PATCH 4/5] fix: address security review findings for gap-fill and
token URLs
- Gap-fill now continues on per-chat/per-gap errors instead of aborting
- Scheduler accurately reports gap-fill errors in completion message
- Gap-fill respects backup config (CHAT_IDS, CHAT_TYPES, excludes)
- Token URL share dialog warns about server access log exposure
- Stats recalculated after gap-fill recovers messages
---
src/scheduler.py | 25 ++++++++++++----
src/telegram_backup.py | 56 +++++++++++++++++++++++++++++++-----
src/web/templates/index.html | 1 +
3 files changed, 70 insertions(+), 12 deletions(-)
diff --git a/src/scheduler.py b/src/scheduler.py
index 8dba6623..d9008dd9 100644
--- a/src/scheduler.py
+++ b/src/scheduler.py
@@ -77,13 +77,21 @@ async def _run_backup_job(self):
await run_backup(self.config, client=client)
# Run gap-fill if enabled
+ gap_fill_ok = True
if self.config.fill_gaps:
try:
from .telegram_backup import run_fill_gaps
logger.info("Running post-backup gap-fill...")
- await run_fill_gaps(self.config, client=client)
+ result = await run_fill_gaps(self.config, client=client)
+ if result.get("errors", 0) > 0:
+ gap_fill_ok = False
+ logger.warning(
+ f"Gap-fill completed with {result['errors']} error(s) "
+ f"({result['total_recovered']} messages recovered)"
+ )
except Exception as e:
+ gap_fill_ok = False
logger.error(f"Gap-fill failed: {e}", exc_info=True)
# Reload tracked chats in listener after backup
@@ -91,7 +99,10 @@ async def _run_backup_job(self):
if self._listener:
await self._listener._load_tracked_chats()
- logger.info("Scheduled backup completed successfully")
+ if gap_fill_ok:
+ logger.info("Scheduled backup completed successfully")
+ else:
+ logger.warning("Scheduled backup completed, but gap-fill had errors")
except Exception as e:
logger.error(f"Scheduled backup failed: {e}", exc_info=True)
@@ -229,10 +240,14 @@ async def run_forever(self):
try:
from .telegram_backup import run_fill_gaps
- logger.info("Running post-backup gap-fill...")
- await run_fill_gaps(self.config, client=self._connection.client)
+ logger.info("Running initial gap-fill...")
+ result = await run_fill_gaps(self.config, client=self._connection.client)
+ if result.get("errors", 0) > 0:
+ logger.warning(
+ f"Initial gap-fill completed with {result['errors']} error(s)"
+ )
except Exception as e:
- logger.error(f"Gap-fill failed: {e}", exc_info=True)
+ logger.error(f"Initial gap-fill failed: {e}", exc_info=True)
# Reload tracked chats in listener after initial backup
if self._listener:
diff --git a/src/telegram_backup.py b/src/telegram_backup.py
index 49edc9bc..646870d6 100644
--- a/src/telegram_backup.py
+++ b/src/telegram_backup.py
@@ -743,13 +743,28 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict:
"chats_with_gaps": 0,
"total_gaps": 0,
"total_recovered": 0,
+ "errors": 0,
"details": [],
}
if chat_id is not None:
chat_ids = [chat_id]
else:
- chat_ids = await self.db.get_chats_with_messages()
+ # Only scan chats that current config would back up (respects
+ # CHAT_IDS whitelist, CHAT_TYPES, and all exclude lists)
+ all_chat_ids = await self.db.get_chats_with_messages()
+ chat_ids = []
+ for cid in all_chat_ids:
+ chat_info = await self.db.get_chat_by_id(cid)
+ if not chat_info:
+ continue
+ ctype = chat_info.get("type", "")
+ is_user = ctype == "private"
+ is_group = ctype in ("group", "supergroup")
+ is_channel = ctype == "channel"
+ is_bot = ctype == "bot"
+ if self.config.should_backup_chat(cid, is_user, is_group, is_channel, is_bot):
+ chat_ids.append(cid)
logger.info(f"Gap-fill: scanning {len(chat_ids)} chat(s) with threshold={threshold}")
@@ -761,9 +776,19 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict:
except (ChannelPrivateError, ChatForbiddenError, UserBannedInChannelError) as e:
logger.warning(f"Gap-fill: skipping chat {cid} (no access): {e.__class__.__name__}")
continue
+ except Exception as e:
+ logger.error(f"Gap-fill: failed to get entity for chat {cid}: {e}")
+ summary["errors"] += 1
+ continue
chat_name = self._get_chat_name(entity)
- gaps = await self.db.detect_message_gaps(cid, threshold)
+
+ try:
+ gaps = await self.db.detect_message_gaps(cid, threshold)
+ except Exception as e:
+ logger.error(f"Gap-fill: failed to detect gaps for {chat_name} ({cid}): {e}")
+ summary["errors"] += 1
+ continue
if not gaps:
continue
@@ -775,9 +800,13 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict:
for gap_start, gap_end, gap_size in gaps:
logger.info(f" → Filling gap: {gap_start}..{gap_end} (size {gap_size})")
- recovered = await self._fill_gap_range(entity, cid, gap_start, gap_end)
- chat_recovered += recovered
- logger.info(f" Recovered {recovered} messages")
+ try:
+ recovered = await self._fill_gap_range(entity, cid, gap_start, gap_end)
+ chat_recovered += recovered
+ logger.info(f" Recovered {recovered} messages")
+ except Exception as e:
+ logger.error(f" Error filling gap {gap_start}..{gap_end}: {e}")
+ summary["errors"] += 1
summary["total_gaps"] += len(gaps)
summary["total_recovered"] += chat_recovered
@@ -790,9 +819,11 @@ async def _fill_gaps(self, chat_id: int | None = None) -> dict:
}
)
+ status = "complete" if summary["errors"] == 0 else "complete with errors"
logger.info(
- f"Gap-fill complete: {summary['chats_scanned']} chats scanned, "
+ f"Gap-fill {status}: {summary['chats_scanned']} chats scanned, "
f"{summary['total_gaps']} gaps found, {summary['total_recovered']} messages recovered"
+ + (f", {summary['errors']} error(s)" if summary["errors"] else "")
)
return summary
@@ -1762,7 +1793,18 @@ async def run_fill_gaps(config: Config, client: TelegramClient | None = None, ch
backup = await TelegramBackup.create(config, client=client)
try:
await backup.connect()
- return await backup._fill_gaps(chat_id=chat_id)
+ summary = await backup._fill_gaps(chat_id=chat_id)
+
+ # Refresh cached stats if messages were recovered so the viewer
+ # doesn't show stale totals until the next scheduled recalculation
+ if summary["total_recovered"] > 0:
+ try:
+ await backup.db.calculate_and_store_statistics()
+ logger.info("Stats recalculated after gap-fill recovery")
+ except Exception as e:
+ logger.warning(f"Failed to recalculate stats after gap-fill: {e}")
+
+ return summary
finally:
await backup.disconnect()
await backup.db.close()
diff --git a/src/web/templates/index.html b/src/web/templates/index.html
index b9189fa4..369f138d 100644
--- a/src/web/templates/index.html
+++ b/src/web/templates/index.html
@@ -1322,6 +1322,7 @@ Create Share Token
{{ adminNewTokenUrl }}