Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 157 additions & 30 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
```
# Python
# Database files
*.db

# Python specific
__pycache__/
*.pyc
*.pyo
Expand All @@ -20,42 +22,23 @@ nosetests.xml
coverage.xml
*.cover
*.log
.git/modules/
*.sublime-project
*.sublime-workspace
*.pot
*.po
*~
.pytest_cache/
.mypy_cache/
.hypothesis/

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
.shared_clones/

# IDEs
# IDE and editor files
.vscode/
.idea/

# Environment
.env
.env.local
*.env
*.env.*
.env.*
*.swp
*.swo
*.tmp

# OS generated files
.DS_Store
Expand All @@ -65,4 +48,148 @@ MANIFEST
.Trashes
ehthumbs.db
Thumbs.db
```
desktop.ini

# Build and distribution artifacts
dist/
build/
*.egg
*.pyc
*.pyo
*.pyd
*.so
*.dylib
*.dll
*.exe
*.out
*.o
*.obj
target/
.gradle/
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Environment variables
.env
.env.local
*.env
.env.*
!.env.example

# Testing
.coverage
htmlcov/
.tox/
.nox/
coverage/
*.coverage
.coverage.*

# Security
secret.key
secrets.json
config/secrets.yml

# Sync state and temporary data
data/db/sync_state.db
*.tmp
*.temp
*.bak
*.backup
*.swp
*.swo
*~
.DS_Store
Thumbs.db
*.lock
*.log
*.out
*.pid
*.seed
*.id
*.idx
*.dat
*.bin
*.cache
*.cached
*.session
*.sqlite
*.db
*.db-shm
*.db-wal
*.fdb
*.fdb-shm
*.fdb-wal
*.mdb
*.ndb
*.sdb
*.sdb-shm
*.sdb-wal
*.ldb
*.idb
*.pdb
*.gdb
*.gdb-index
*.core
*.stackdump
*.dmp
*.crash
*.crashpad
*.minidump
*.dSYM/
*.sym
*.map
*.lst
*.asm
*.o
*.obj
*.lib
*.a
*.so
*.dylib
*.dll
*.exe
*.out
*.jar
*.war
*.ear
*.zip
*.tar
*.gz
*.tgz
*.bz2
*.xz
*.7z
*.rar
*.zst
*.lz4
*.lzh
*.cab
*.arj
*.rpm
*.deb
*.Z
*.lz
*.lzo
*.tar.gz
*.tar.bz2
*.tar.xz
*.tar.zst
*.tmp
*.temp
*.tmp.*
*.temp.*
*.tmp-*
*.temp-*
*.tmp_*
*.temp_*
*.tmp/*
*.temp/*
*.tmp.*/**
*.temp.*/**
*.tmp-*/**
*.temp-*/**
*.tmp_*/**
*.temp_*/**
Binary file modified data/db/sync_state.db
Binary file not shown.
103 changes: 103 additions & 0 deletions src/core/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,106 @@ def is_db_empty(conn: sqlite3.Connection) -> bool:
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM icd_nodes_state")
return cursor.fetchone()[0] == 0


def has_pending_nodes(conn: sqlite3.Connection) -> bool:
"""Check if there are any PENDING nodes in the queue."""
cursor = conn.cursor()
cursor.execute("SELECT 1 FROM icd_nodes_state WHERE status = 'PENDING' LIMIT 1")
return cursor.fetchone() is not None


def detect_stuck_state(conn: sqlite3.Connection) -> bool:
"""Detect 'stuck' or 'dead' database state.

A stuck state occurs when:
- Database is NOT empty (has at least one record)
- But has NO PENDING nodes to process

This indicates the sync process completed without actually syncing data,
or the queue initialization was skipped due to non-empty DB check.

Returns True if stuck state is detected.
"""
cursor = conn.cursor()

# Check total count
cursor.execute("SELECT COUNT(*) FROM icd_nodes_state")
total_count = cursor.fetchone()[0]

if total_count == 0:
return False # Empty DB is not stuck, it needs seeding

# Check for PENDING nodes
cursor.execute("SELECT COUNT(*) FROM icd_nodes_state WHERE status = 'PENDING'")
pending_count = cursor.fetchone()[0]

# Stuck if: has records but no pending nodes
return total_count > 0 and pending_count == 0


def recover_from_stuck_state(conn: sqlite3.Connection) -> int:
"""Recover from stuck state by re-seeding the queue.

If the root node has 'release' or 'latestRelease' data, extract those URIs
and add them as PENDING. Otherwise, mark the root node as PENDING again.

Returns the number of nodes added to the queue.
"""
cursor = conn.cursor()

# Find nodes with BASE_DONE status that might have release info
cursor.execute("""
SELECT uri, raw_data FROM icd_nodes_state
WHERE status = 'BASE_DONE' AND raw_data LIKE '%release%'
LIMIT 1
""")
row = cursor.fetchone()

inserted_count = 0

if row:
import json
root_uri = row[0]
raw_data = json.loads(row[1])

# Extract release URIs
release_uris = set()

# From 'release' array
releases = raw_data.get("release", [])
if isinstance(releases, list):
for rel in releases:
if isinstance(rel, str):
release_uris.add(rel.replace("http://", "https://"))
elif isinstance(rel, dict) and "@id" in rel:
release_uris.add(rel["@id"].replace("http://", "https://"))

# From 'latestRelease'
latest = raw_data.get("latestRelease", "")
if isinstance(latest, str) and latest:
release_uris.add(latest.replace("http://", "https://"))
elif isinstance(latest, dict) and "@id" in latest:
release_uris.add(latest["@id"].replace("http://", "https://"))

# Insert release URIs as PENDING
if release_uris:
for uri in release_uris:
cursor.execute("""
INSERT OR IGNORE INTO icd_nodes_state (uri, status)
VALUES (?, 'PENDING')
""", (uri,))
if cursor.rowcount > 0:
inserted_count += 1

conn.commit()
return inserted_count

# Fallback: re-insert the root URI as PENDING
cursor.execute("""
UPDATE OR IGNORE icd_nodes_state SET status = 'PENDING'
WHERE uri = (SELECT uri FROM icd_nodes_state LIMIT 1)
""")
conn.commit()

return cursor.rowcount
Loading
Loading