From 5e625dff133b176c290501c804065aec3fb5bc73 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 21 May 2026 21:52:04 +0000 Subject: [PATCH 01/25] Absorbing the old files into new cli_ files, updated tests and docs --- docs/getting-started/maintenance.md | 410 +++-- omop_alchemy/maintenance/__init__.py | 43 +- omop_alchemy/maintenance/_cli_utils.py | 86 + omop_alchemy/maintenance/analyze_tables.py | 101 -- omop_alchemy/maintenance/cli.py | 1285 +------------- .../maintenance/{backup.py => cli_backup.py} | 140 +- .../{defaults.py => cli_config.py} | 74 + .../{foreign_keys.py => cli_foreign_keys.py} | 231 ++- omop_alchemy/maintenance/cli_fulltext.py | 138 ++ .../{indexes.py => cli_indexes.py} | 99 +- omop_alchemy/maintenance/cli_schema.py | 1536 +++++++++++++++++ omop_alchemy/maintenance/cli_tables.py | 564 ++++++ .../{load_vocab.py => cli_vocab.py} | 180 +- omop_alchemy/maintenance/create_tables.py | 129 -- omop_alchemy/maintenance/data_summary.py | 58 - omop_alchemy/maintenance/doctor.py | 346 ---- omop_alchemy/maintenance/info.py | 408 ----- omop_alchemy/maintenance/reconcile.py | 432 ----- omop_alchemy/maintenance/reset_sequences.py | 139 -- omop_alchemy/maintenance/truncate_tables.py | 154 -- omop_alchemy/maintenance/ui.py | 54 +- tests/conftest.py | 2 +- tests/test_analyze_tables.py | 4 +- tests/test_cli_config.py | 12 +- tests/test_create_tables.py | 2 +- tests/test_data_summary.py | 4 +- tests/test_foreign_keys.py | 46 +- tests/test_fulltext.py | 4 +- tests/test_indexes.py | 12 +- tests/test_load_vocab_postgres.py | 2 +- tests/test_load_vocab_source.py | 43 +- tests/test_truncate_tables.py | 14 +- 32 files changed, 3420 insertions(+), 3332 deletions(-) create mode 100644 omop_alchemy/maintenance/_cli_utils.py delete mode 100644 omop_alchemy/maintenance/analyze_tables.py rename omop_alchemy/maintenance/{backup.py => cli_backup.py} (64%) rename omop_alchemy/maintenance/{defaults.py => cli_config.py} (69%) rename omop_alchemy/maintenance/{foreign_keys.py => cli_foreign_keys.py} (64%) create mode 100644 omop_alchemy/maintenance/cli_fulltext.py rename omop_alchemy/maintenance/{indexes.py => cli_indexes.py} (72%) create mode 100644 omop_alchemy/maintenance/cli_schema.py create mode 100644 omop_alchemy/maintenance/cli_tables.py rename omop_alchemy/maintenance/{load_vocab.py => cli_vocab.py} (71%) delete mode 100644 omop_alchemy/maintenance/create_tables.py delete mode 100644 omop_alchemy/maintenance/data_summary.py delete mode 100644 omop_alchemy/maintenance/doctor.py delete mode 100644 omop_alchemy/maintenance/info.py delete mode 100644 omop_alchemy/maintenance/reconcile.py delete mode 100644 omop_alchemy/maintenance/reset_sequences.py delete mode 100644 omop_alchemy/maintenance/truncate_tables.py diff --git a/docs/getting-started/maintenance.md b/docs/getting-started/maintenance.md index 4f8f09a..86d3e5d 100644 --- a/docs/getting-started/maintenance.md +++ b/docs/getting-started/maintenance.md @@ -1,183 +1,389 @@ # Maintenance CLI -OMOP Alchemy includes a maintenance CLI for common operational tasks on an OMOP CDM -database. +The `omop-alchemy` maintenance CLI handles everything you need to operate an OMOP CDM +database: creating tables, loading Athena vocabularies, managing indexes and foreign key +enforcement, running health checks, and taking backups. It talks directly to a SQLAlchemy +engine, so all connection details are controlled by the same engine URL configuration you +use for the ORM. > **Alpha status** > Treat this CLI as alpha operational tooling. Interfaces and behavior may still change. --- -## Entrypoint +## Connection setup + +Every command accepts three connection flags: + +| Flag | Purpose | +| --- | --- | +| `--dotenv ` | Load a `.env` file before building the engine | +| `--engine-schema ` | Select the engine by name (see below) | +| `--db-schema ` | Override the target schema inside the database | + +**Engine schema selection.** OMOP Alchemy supports multiple named engine configurations. +The `--engine-schema` value maps to an environment variable `ENGINE_`. +For example, `--engine-schema cdm` looks for `ENGINE_CDM`. With no `--engine-schema`, +it falls back to the bare `ENGINE` variable. + +**Database schema (`--db-schema`).** On PostgreSQL this sets the `search_path` for ORM +CSV loading and qualifies table references for schema-aware operations. On SQLite it +is ignored by most commands. + +### Saving defaults + +Instead of typing the same flags on every command, save your defaults once: ```bash -omop-alchemy --help -python -m omop_alchemy.maintenance.cli --help +omop-alchemy config set-overrides \ + --dotenv .env \ + --engine-schema cdm \ + --db-schema public \ + --athena-source ./athena_files ``` -If you need PostgreSQL driver support: +This writes `.omop-alchemy.toml` into your project root (the nearest ancestor directory +containing `pyproject.toml`). If no project root is found, it writes to the current +directory. You can override the location with `OMOP_MAINT_DEFAULTS_FILE`. + +Inspect or clear saved defaults: ```bash -uv sync --extra postgres +omop-alchemy config show +omop-alchemy config clear-overrides # clears everything +omop-alchemy config clear-overrides --db-schema # clears one field ``` +**Resolution order for each flag:** + +1. Explicit CLI flag (highest priority) +2. Saved `.omop-alchemy.toml` default +3. Command-level fallback (lowest priority) + --- -## Connection and defaults +## Backend support + +Some commands depend on PostgreSQL-specific features and will return a clear error +if you run them against SQLite. -Common flags used by many commands: +| Command group | Requires PostgreSQL | Why | +| --- | --- | --- | +| `load-vocab-source` | No (PostgreSQL + SQLite) | Uses ORM CSV loader; `--bulk-mode` and `--db-schema` are PostgreSQL-only | +| `indexes` | No (cluster apply is PostgreSQL-only) | Index DDL is standard SQL; `CLUSTER` is PostgreSQL | +| `create-missing-tables`, `reconcile-schema`, `data-summary`, `info`, `doctor` | No | Pure SQLAlchemy metadata operations | +| `reset-sequences` | Yes | PostgreSQL sequences (`SETVAL`) | +| `truncate-tables` | Yes | PostgreSQL `TRUNCATE` with `RESTART IDENTITY` and `CASCADE` | +| `foreign-keys` | Yes | PostgreSQL internal RI trigger `ALTER TABLE ... DISABLE/ENABLE TRIGGER ALL` | +| `analyze-tables` | No (`--vacuum` is PostgreSQL-only) | `ANALYZE` is standard; `VACUUM ANALYZE` is PostgreSQL | +| `fulltext` | Yes | PostgreSQL `tsvector`, `tsquery`, and `GIN` indexes | +| `backup-database`, `restore-database` | Yes | `pg_dump` / `pg_restore` / `psql` | -- `--dotenv` -- `--engine-schema` -- `--db-schema` +--- + +## Workflow guides -!!! info "Defaults file discovery" +### Fresh database setup - Project-local defaults are stored in `.omop-alchemy.toml`. - - - the CLI looks for the nearest ancestor directory containing `pyproject.toml` - and uses `/.omop-alchemy.toml` - - if no ancestor project marker is found, it falls back to `./.omop-alchemy.toml` - in the current working directory - - to force a fixed path, set `OMOP_MAINT_DEFAULTS_FILE` - - running `omop-alchemy` from outside your intended project tree may use a different - defaults file than expected +Use this when you are starting with an empty database and want to get an OMOP schema +populated from scratch. ```bash -omop-alchemy config show -omop-alchemy config set-overrides --dotenv .env --engine-schema cdm --db-schema public --athena-source ./athena_source -omop-alchemy config clear-overrides -omop-alchemy config clear-overrides --db-schema -``` +# 1. Create any OMOP tables that don't exist yet (safe to run on an existing DB) +omop-alchemy create-missing-tables --dry-run # preview first +omop-alchemy create-missing-tables + +# 2. Load Athena vocabulary files +omop-alchemy load-vocab-source --athena-source ./athena_files -Resolution order: +# 3. Reset sequences so new clinical inserts start above vocabulary IDs (PostgreSQL) +omop-alchemy reset-sequences +``` -1. explicit CLI flag -2. saved `.omop-alchemy.toml` default -3. command fallback +The `create-missing-tables` command compares ORM metadata against the live schema and +creates only what is missing. It is idempotent — running it again on a populated database +does nothing. -`engine_schema` selects the configured engine URL (`ENGINE_` or `ENGINE`). -`db_schema` selects the schema inside that database. +`load-vocab-source` automatically creates any missing vocabulary tables before loading, +so you can run it immediately after step 1 or even skip step 1 for vocabulary-only setups. --- -## Backend support at a glance +### Full vocabulary reload -| Area | Commands | -| --- | --- | -| Backend-agnostic | `info`, `doctor`, `data-summary`, `reconcile-schema`, `create-missing-tables`, `indexes`, `load-vocab-source`, `analyze-tables` (PostgreSQL/SQLite) | -| PostgreSQL-only | `backup-database`, `restore-database`, `fulltext`, `reset-sequences`, `truncate-tables`, `foreign-keys` | +Run this when you download a new Athena export and want to replace the existing vocabulary. -If a PostgreSQL-only command runs on an unsupported backend, the CLI returns a short -user-facing error. +```bash +# Suspend FK enforcement and drop indexes so loading is fast +omop-alchemy foreign-keys disable +omop-alchemy indexes disable --vocab ---- +# Clear existing vocabulary data +omop-alchemy truncate-tables --scope vocabulary --restart-identities --yes -## Command quick reference +# Load new vocabulary (bulk-mode is on by default: does not re-toggle indexes per table) +omop-alchemy load-vocab-source --athena-source ./athena_files --merge-strategy replace -| Command | Purpose | Key options | Backend | -| --- | --- | --- | --- | -| `info` | Show readiness and compatibility | `--engine-schema` | All | -| `doctor` | Read-only operational health check | `--deep`, `--vocab` | All (`--deep` is PostgreSQL-focused) | -| `data-summary` | Show managed tables and row counts | `--vocab`, `--include-missing` | All | -| `reconcile-schema` | Inspect drift between ORM and live schema | `--vocab` | All | -| `create-missing-tables` | Create absent ORM-managed tables | `--dry-run`, `--no-vocab` | All | -| `load-vocab-source` | Load Athena vocab CSVs | `--athena-source`, `--dry-run`, `--merge-strategy` | PostgreSQL, SQLite | -| `truncate-tables` | Truncate selected tables | `--scope` or `--table`, `--yes`, `--cascade`, `--restart-identities` | PostgreSQL | -| `reset-sequences` | Reset owned PK sequences to `MAX(pk) + 1` | `--dry-run`, `--vocab` | PostgreSQL | -| `foreign-keys` | Disable/enable/validate FK trigger enforcement | subcommands: `disable`, `enable`, `validate`, `status`; `--strict` | PostgreSQL | -| `analyze-tables` | Refresh planner statistics | `--scope`, `--table`, `--vacuum` | PostgreSQL, SQLite (`--vacuum` PostgreSQL-only) | -| `indexes` | Disable/enable ORM-defined secondary indexes | subcommands: `disable`, `enable`; `--dry-run`, `--vocab` | All (cluster apply on PostgreSQL) | -| `fulltext` | Manage sidecar `tsvector` columns and indexes | subcommands: `install`, `populate`, `drop`; `--regconfig`, `--no-create-indexes` | PostgreSQL | -| `backup-database` | Create PostgreSQL backup artifact | `--output-path`, `--format` | PostgreSQL | -| `restore-database` | Restore PostgreSQL backup artifact | backup path, `--format`, `--dry-run` | PostgreSQL | +# Rebuild indexes and re-enable FK enforcement +omop-alchemy indexes enable --vocab +omop-alchemy foreign-keys enable --strict + +# Refresh full-text sidecar vectors (if installed) +omop-alchemy fulltext populate +``` + +**About `--bulk-mode` (default on PostgreSQL):** +`load-vocab-source` disables FK triggers and drops vocabulary indexes once before the +load loop, then rebuilds them once at the end. This is much faster than the alternative +of toggling per table — for a full Athena export the difference can be 10–20×. SQLite +ignores this flag. Pass `--no-bulk-mode` if you need per-table rollback safety. + +**About `--merge-strategy replace`:** +`replace` truncates each target table and reloads from the CSV. Use `upsert` for +incremental vocabulary patches where you do not want to lose custom extensions. +Use `insert_if_empty` as the fastest path when the target tables are guaranteed empty. + +**About `--strict` on `foreign-keys enable`:** +`--strict` validates all FK relationships before re-enabling RI triggers. If violations +are found, no triggers are re-enabled and you get a report of the problematic rows. +Omit `--strict` to re-enable unconditionally. --- -## Minimal examples by area +### ETL bulk load cycle -### Inspect +Use this before and after a large clinical data load to avoid the overhead of FK and +index maintenance during insertion. ```bash -omop-alchemy info -omop-alchemy doctor -omop-alchemy doctor --deep +# Before your ETL runs: suspend enforcement and remove indexes +omop-alchemy foreign-keys disable +omop-alchemy indexes disable + +# --- your ETL process runs here --- + +# After ETL: restore state +omop-alchemy reset-sequences +omop-alchemy indexes enable +omop-alchemy foreign-keys enable --strict +omop-alchemy analyze-tables --scope clinical ``` -### Schema +`analyze-tables` refreshes planner statistics after a large load so query plans don't +degrade. `--scope clinical` targets only clinical tables; omit `--scope` to analyze +everything. + +`reset-sequences` ensures that any auto-increment columns are positioned above the +maximum key value present in the table. This matters when your ETL inserts explicit IDs +(common in OMOP) — without a reset, the next ORM insert would try to reuse an ID that +already exists. + +--- + +### Health checks + +**Quick read-only check:** ```bash -omop-alchemy reconcile-schema -omop-alchemy create-missing-tables --dry-run -omop-alchemy create-missing-tables +omop-alchemy doctor ``` -### Vocabulary +Runs a fast, non-destructive pass over connection readiness, schema drift, and FK +trigger status. The output tells you what is wrong and what to do about it. + +**Deep FK validation (PostgreSQL):** ```bash -omop-alchemy load-vocab-source -omop-alchemy load-vocab-source --athena-source ./athena_source --dry-run +omop-alchemy doctor --deep ``` -### Bulk reload helpers +Adds a full FK constraint scan — it actually queries the data to find rows that violate +declared FK relationships. On large databases this can be slow; use it when you suspect +data integrity issues after an ETL or vocabulary patch. + +**Full environment introspection:** ```bash -omop-alchemy foreign-keys disable -omop-alchemy indexes disable -omop-alchemy truncate-tables --scope clinical --restart-identities --yes +omop-alchemy info ``` -After ETL: +Shows the active engine URL, installed backend driver, OMOP Alchemy version, optional +dependency state (orm-loader, psycopg2/psycopg, etc.), and which maintenance commands +are available given the current backend. Run this first when diagnosing "why doesn't +this command work". + +**When doctor reports a problem:** + +| Doctor output | What it means | Fix | +| --- | --- | --- | +| Schema drift: missing tables | ORM has tables not in DB | `create-missing-tables` | +| Schema drift: extra tables | DB has tables ORM doesn't know | Review manually; may be custom extensions | +| FK triggers disabled | RI enforcement was suspended | `foreign-keys enable` or `foreign-keys enable --strict` | +| FK violations found | Data fails FK constraints | Investigate data, then `foreign-keys enable --strict` | + +--- + +### Schema drift + +The `reconcile-schema` command compares your ORM metadata against the live database and +reports what it finds: ```bash -omop-alchemy reset-sequences -omop-alchemy indexes enable -omop-alchemy foreign-keys enable --strict -omop-alchemy analyze-tables --scope clinical +omop-alchemy reconcile-schema +omop-alchemy reconcile-schema --dry-run # same output, no changes ``` -### Full-text sidecars +Output categories: + +- **missing** — table is in ORM metadata but not in the database. Fix: `create-missing-tables`. +- **extra** — table is in the database but not in ORM metadata. This can mean custom tables, staging tables, or leftover artifacts. The CLI does not touch these. +- **matched** — table exists in both and metadata is consistent. +- **drifted** — table exists in both but column definitions differ (types, nullability, defaults). The CLI does not auto-migrate; you need to handle schema migrations manually. + +For safe deployment: run `reconcile-schema` first, then `create-missing-tables --dry-run`, +then `create-missing-tables`. + +--- + +### Full-text search sidecars + +Full-text search support adds `tsvector` sidecar columns (and `GIN` indexes) to the +`concept` and `concept_synonym` tables, enabling fast text search over vocabulary. ```bash +# Install the sidecar columns and indexes (once, after vocabulary tables exist) omop-alchemy fulltext install + +# Populate sidecar vectors from current vocabulary data omop-alchemy fulltext populate +``` + +**You must rerun `fulltext populate` after every vocabulary reload.** Sidecar vectors +do not auto-refresh when the underlying concept data changes. + +To remove the sidecars: + +```bash omop-alchemy fulltext drop ``` +The `--regconfig` option controls the PostgreSQL text search configuration +(default `english`). For multilingual vocabularies, use a suitable config such as +`simple`. + For query-side usage and optional ORM metadata registration, see [PostgreSQL Full-Text Search](../advanced/fulltext.md). +--- + ### Backup and restore +These commands wrap `pg_dump` and `pg_restore` / `psql`. PostgreSQL client tools must +be installed and on `PATH`. + ```bash -omop-alchemy backup-database --engine-schema source --output-path ./cdm.dump -omop-alchemy restore-database ./cdm.dump --format custom --engine-schema target +# Create a backup (custom format is recommended — smaller and restorable in parallel) +omop-alchemy backup-database \ + --engine-schema source \ + --output-path ./cdm-backup.dump \ + --format custom + +# Restore into a target database (the DB must already exist and be empty) +omop-alchemy restore-database ./cdm-backup.dump \ + --format custom \ + --engine-schema target ``` ---- +**Format comparison:** + +| Format | File extension | Restore tool | Advantages | +| --- | --- | --- | --- | +| `custom` (default) | `.dump` | `pg_restore` | Compressed; supports parallel restore (`-j`) and selective restore | +| `plain` | `.sql` | `psql` | Human-readable SQL; editable but much larger | -## High-impact gotchas - -- Run destructive commands with `--dry-run` first. -- `truncate-tables`, `foreign-keys`, `fulltext`, `backup-database`, and - `restore-database` are PostgreSQL-only. -- `foreign-keys disable` and `enable` toggle PostgreSQL RI triggers; they do not drop - FK definitions. -- `fulltext populate` must be rerun after bulk vocabulary changes because sidecar - vectors do not auto-refresh. -- `indexes enable` may also apply PostgreSQL clustering, which can be heavy. -- `restore-database` restores into the configured target DB; it does not create or - clean that DB for you. -- `restore-database` now requires an explicit `--format` (`custom` or `plain`); - there is no automatic format detection. +**Restore caveats:** +- The target database must already exist. The CLI does not create or drop databases. +- For `plain` format, `--db-schema` has no effect; the schema is embedded in the SQL. +- For `custom` format, `--db-schema` restricts the restore to the named schema only. + +Use `--dry-run` on `backup-database` to see the `pg_dump` command that would be run +without executing it. --- -## Help +## Recovery: when things go wrong + +### Bulk load or vocabulary reload fails mid-way + +If `load-vocab-source` (with `--bulk-mode`) or your ETL process fails after FK triggers +and indexes have been disabled, they stay disabled. The database continues to accept +writes but does not enforce FK constraints, and queries may use slow sequential scans. + +To recover: + +```bash +omop-alchemy indexes enable --vocab # or without --vocab if you disabled all indexes +omop-alchemy foreign-keys enable +``` + +If you used `--strict` originally and now have data violations: + +```bash +omop-alchemy foreign-keys validate # see what's broken +# fix the data +omop-alchemy foreign-keys enable --strict +``` + +### FK validation fails after `enable --strict` + +```bash +omop-alchemy foreign-keys validate +``` + +This reports exactly which tables have violations, which constraints are affected, and +how many rows fail. Fix the data, then retry `foreign-keys enable --strict`. + +If you need to re-enable FK triggers despite the violations (for example, to allow the +application to run while you investigate), use `foreign-keys enable` without `--strict`. + +### Sequences are out of sync after a bulk insert + +After any load that inserts explicit primary key values: ```bash -omop-alchemy --help -omop-alchemy doctor --help -omop-alchemy fulltext --help -omop-alchemy config --help +omop-alchemy reset-sequences # all managed tables +omop-alchemy reset-sequences --vocab # vocabulary tables only ``` + +`reset-sequences` sets each owned sequence to `MAX(pk) + 1`. It reports every table +it touches and the old/new sequence positions. + +--- + +## Command reference + +| Command | Purpose | Key options | Backend | +| --- | --- | --- | --- | +| `info` | Inspect CLI readiness, backend, and dependency state | `--engine-schema` | All | +| `doctor` | Read-only health check: connection, schema, FK state | `--deep`, `--vocab` | All (`--deep` PostgreSQL-focused) | +| `data-summary` | Show managed tables and row counts | `--vocab`, `--include-missing` | All | +| `reconcile-schema` | Compare ORM metadata vs live schema | `--vocab`, `--dry-run` | All | +| `create-missing-tables` | Create OMOP tables absent from DB | `--dry-run`, `--no-vocab` | All | +| `load-vocab-source` | Load Athena vocabulary CSVs | `--athena-source`, `--merge-strategy`, `--bulk-mode/--no-bulk-mode`, `--dry-run` | PostgreSQL, SQLite | +| `truncate-tables` | Truncate selected tables | `--scope`, `--table`, `--yes`, `--cascade`, `--restart-identities` | PostgreSQL | +| `reset-sequences` | Reset owned PK sequences to `MAX(pk) + 1` | `--dry-run`, `--vocab` | PostgreSQL | +| `foreign-keys disable` | Suspend FK RI trigger enforcement | `--vocab`, `--dry-run` | PostgreSQL | +| `foreign-keys enable` | Re-enable FK RI trigger enforcement | `--strict`, `--vocab`, `--dry-run` | PostgreSQL | +| `foreign-keys validate` | Report FK constraint violations | `--vocab` | PostgreSQL | +| `foreign-keys status` | Show current trigger enable/disable state | `--vocab` | PostgreSQL | +| `analyze-tables` | Refresh planner statistics | `--scope`, `--table`, `--vacuum` | PostgreSQL, SQLite (`--vacuum` PostgreSQL-only) | +| `indexes disable` | Drop ORM-defined secondary indexes | `--vocab`, `--dry-run` | All | +| `indexes enable` | Recreate ORM-defined secondary indexes | `--vocab`, `--dry-run` | All (cluster on PostgreSQL) | +| `fulltext install` | Add tsvector sidecar columns to vocabulary tables | `--regconfig`, `--no-create-indexes` | PostgreSQL | +| `fulltext populate` | Populate sidecar tsvector vectors | `--regconfig` | PostgreSQL | +| `fulltext drop` | Remove tsvector sidecar columns and indexes | | PostgreSQL | +| `backup-database` | Create a `pg_dump` backup artifact | `--output-path`, `--format`, `--db-schema`, `--dry-run` | PostgreSQL | +| `restore-database` | Restore a backup artifact into the target DB | `--format` (required), `--db-schema`, `--dry-run` | PostgreSQL | +| `config show` | Print current saved defaults | | All | +| `config set-overrides` | Save connection defaults | `--dotenv`, `--engine-schema`, `--db-schema`, `--athena-source` | All | +| `config clear-overrides` | Remove saved defaults | per-field flags | All | diff --git a/omop_alchemy/maintenance/__init__.py b/omop_alchemy/maintenance/__init__.py index e68ba08..87be939 100644 --- a/omop_alchemy/maintenance/__init__.py +++ b/omop_alchemy/maintenance/__init__.py @@ -1,16 +1,18 @@ -from .backup import ( +from .cli_backup import ( BackupFormat, DatabaseBackupResult, DatabaseRestoreResult, create_database_backup, restore_database_backup, ) -from .analyze_tables import AnalyzeTableResult, analyze_tables -from .create_tables import TableCreationResult, collect_missing_tables, create_missing_tables -from .data_summary import TableSummaryResult, collect_data_summary -from .defaults import ConnectionDefaults, clear_connection_defaults, defaults_path, load_connection_defaults, save_connection_defaults -from .doctor import DoctorCheck, DoctorRecommendation, DoctorReport, collect_doctor_report -from .foreign_keys import ( +from .cli_config import ( + ConnectionDefaults, + clear_connection_defaults, + defaults_path, + load_connection_defaults, + save_connection_defaults, +) +from .cli_foreign_keys import ( ForeignKeyAction, ForeignKeyConstraintViolation, ForeignKeyManagementResult, @@ -23,29 +25,44 @@ manage_foreign_key_triggers, validate_foreign_key_constraints, ) -from .info import CommandSupport, DependencyStatus, MaintenanceInfo, collect_maintenance_info -from .indexes import ( +from .cli_indexes import ( IndexAction, IndexManagementResult, IndexTarget, collect_index_targets, manage_indexes, ) -from .load_vocab import VocabularyLoadReport, VocabularyLoadResult, load_vocab_source -from .reconcile import ( +from .cli_schema import ( + CommandSupport, + DependencyStatus, + DoctorCheck, + DoctorRecommendation, + DoctorReport, + MaintenanceInfo, ReconciliationIssue, SchemaReconciliationReport, + TableCreationResult, TableReconciliationResult, + TableSummaryResult, + collect_data_summary, + collect_doctor_report, + collect_maintenance_info, + collect_missing_tables, + create_missing_tables, reconcile_schema, ) -from .reset_sequences import ( +from .cli_tables import ( + AnalyzeTableResult, SequenceResetResult, SequenceTarget, + TruncateTableResult, + analyze_tables, collect_sequence_targets, reset_model_sequences, + truncate_tables, ) +from .cli_vocab import VocabularyLoadReport, VocabularyLoadResult, load_vocab_source from .tables import MaintenanceTable, TableCategory, collect_maintenance_tables, select_maintenance_tables -from .truncate_tables import TruncateTableResult, truncate_tables __all__ = [ "analyze_tables", diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py new file mode 100644 index 0000000..bc6d29c --- /dev/null +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import functools +import logging + +import typer +from sqlalchemy.engine import Engine +from sqlalchemy.exc import SQLAlchemyError + +from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment + +from .cli_config import ConnectionDefaults, defaults_path, load_connection_defaults +from .tables import TableScope +from .ui import console, render_error + + +def resolve_connection( + *, + dotenv: str | None, + engine_schema: str | None, + db_schema: str | None, + athena_source: str | None = None, +) -> ConnectionDefaults: + saved = load_connection_defaults() + return ConnectionDefaults( + dotenv=dotenv if dotenv is not None else saved.dotenv, + engine_schema=engine_schema if engine_schema is not None else saved.engine_schema, + db_schema=db_schema if db_schema is not None else saved.db_schema, + athena_source=athena_source if athena_source is not None else saved.athena_source, + ) + + +def build_engine(*, dotenv: str | None, engine_schema: str | None) -> Engine: + load_environment(dotenv or "") + return create_engine_with_dependencies(get_engine_name(engine_schema), future=True) + + +def handle_error(exc: Exception) -> None: + if isinstance(exc, RuntimeError): + console.print(render_error(str(exc))) + raise typer.Exit(code=1) from exc + if isinstance(exc, SQLAlchemyError): + detail = str(exc).strip() + message = f"Database operation failed: {exc.__class__.__name__}." + if detail: + message = f"{message} Detail: {detail}" + console.print(render_error(message)) + raise typer.Exit(code=1) from exc + raise exc + + +def resolve_selection( + *, + scope: TableScope | None, + tables: list[str] | None, + default_scope: TableScope | None = None, +) -> tuple[TableScope | None, tuple[str, ...] | None]: + if scope is not None and tables: + raise RuntimeError("Use either `--scope` or `--table`, not both.") + selected = tuple(tables) if tables else None + if selected is not None: + return None, selected + return scope or default_scope, None + + +@functools.lru_cache(maxsize=None) +def configure_logging() -> None: + mode = (load_connection_defaults().logging or "file").strip().lower() + if mode not in {"file", "console", "off"}: + mode = "file" + if mode == "off": + return + + formatter = logging.Formatter("%(asctime)s | %(levelname)-8s | %(name)s | %(message)s") + if mode == "file": + log_path = defaults_path().parent / "logging" / "omop-alchemy.log" + log_path.parent.mkdir(parents=True, exist_ok=True) + handler: logging.Handler = logging.FileHandler(log_path, encoding="utf-8") + else: + handler = logging.StreamHandler() + handler.setFormatter(formatter) + + root_logger = logging.getLogger() + root_logger.addHandler(handler) + if root_logger.level in {logging.NOTSET, logging.WARNING, logging.ERROR, logging.CRITICAL}: + root_logger.setLevel(logging.INFO) diff --git a/omop_alchemy/maintenance/analyze_tables.py b/omop_alchemy/maintenance/analyze_tables.py deleted file mode 100644 index 1f82413..0000000 --- a/omop_alchemy/maintenance/analyze_tables.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -import sqlalchemy as sa - -from ..backend_support import Dialect, require_backend -from .tables import ( - TableCategory, - TableScope, - qualified_table_name, - resolve_maintenance_tables, -) - - -@dataclass(frozen=True) -class AnalyzeTableResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - operation: str - status: str - detail: str - - -def analyze_tables( - engine: sa.Engine, - *, - db_schema: str | None = None, - scope: TableScope | None = None, - table_names: tuple[str, ...] | None = None, - vacuum: bool = False, - dry_run: bool = False, -) -> list[AnalyzeTableResult]: - if scope is not None and table_names is not None: - raise RuntimeError("Use either `scope` or `table_names`, not both.") - - require_backend( - engine, - feature="Table analysis", - supported_dialects=(Dialect.POSTGRESQL, Dialect.SQLITE), - ) - - if vacuum and engine.dialect.name != Dialect.POSTGRESQL: - raise RuntimeError( - "VACUUM ANALYZE is only supported for PostgreSQL engines. " - f"Current dialect: '{engine.dialect.name}'." - ) - - selected_tables = resolve_maintenance_tables( - scope=scope, - table_names=table_names, - ) - inspector = sa.inspect(engine) - operation = "VACUUM ANALYZE" if vacuum else "ANALYZE" - results: list[AnalyzeTableResult] = [] - - connection_factory = ( - engine.connect().execution_options(isolation_level="AUTOCOMMIT") - if vacuum - else engine.connect() - ) - - with connection_factory as connection: - for maintenance_table in selected_tables: - if not inspector.has_table(maintenance_table.table_name, schema=db_schema): - results.append( - AnalyzeTableResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - operation=operation, - status="skipped", - detail="table not present in target database", - ) - ) - continue - - qualified_name = qualified_table_name(maintenance_table.table_name, db_schema) - if not dry_run: - connection.exec_driver_sql(f"{operation} {qualified_name}") - - results.append( - AnalyzeTableResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - operation=operation, - status="planned" if dry_run else "applied", - detail=( - f"{operation.lower()} would run" - if dry_run - else f"{operation.lower()} completed" - ), - ) - ) - - return results diff --git a/omop_alchemy/maintenance/cli.py b/omop_alchemy/maintenance/cli.py index de2c23f..a403a51 100644 --- a/omop_alchemy/maintenance/cli.py +++ b/omop_alchemy/maintenance/cli.py @@ -1,93 +1,20 @@ from __future__ import annotations -import logging import typer -from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeElapsedColumn -from sqlalchemy.engine import Engine -from sqlalchemy.exc import SQLAlchemyError -from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment -from omop_alchemy.cdm.handlers.fulltext import ( - drop_fulltext_columns, - install_fulltext_columns, - populate_fulltext_columns, -) - -from .analyze_tables import analyze_tables -from .backup import BackupFormat, create_database_backup, restore_database_backup -from ..backend_support import POSTGRESQL_ONLY_HELP -from .create_tables import create_missing_tables -from .data_summary import collect_data_summary -from .defaults import ( - ConnectionDefaults, - clear_connection_defaults, - defaults_path, - load_connection_defaults, - save_connection_defaults, -) -from .foreign_keys import ( - ForeignKeyAction, - collect_foreign_key_trigger_status, - manage_foreign_key_triggers, - validate_foreign_key_constraints, -) -from .doctor import collect_doctor_report +from . import ( + cli_backup as backup, + cli_config as config, + cli_foreign_keys as foreign_keys, + cli_fulltext as fulltext, + cli_indexes as indexes, + cli_schema as schema, + cli_tables as tables, + cli_vocab as vocab, +) +from ._cli_utils import configure_logging from .help import install_help_customizations -from .info import collect_maintenance_info -from .indexes import IndexAction, manage_indexes -from .load_vocab import MergeStrategy, VocabularyLoadProgress, load_vocab_source -from .reconcile import reconcile_schema -from .reset_sequences import reset_model_sequences -from .tables import TableScope -from .truncate_tables import truncate_tables -from .ui import ( - render_analyze_note, - render_analyze_results, - render_analyze_summary, - render_backup_result, - render_backup_summary, - console, - render_command_header, - render_connection_defaults, - render_data_summary_results, - render_data_summary_summary, - render_doctor_checks, - render_doctor_recommendations, - render_doctor_summary, - render_error, - render_foreign_key_note, - render_foreign_key_results, - render_foreign_key_status_results, - render_foreign_key_status_summary, - render_foreign_key_summary, - render_foreign_key_validation_issues, - render_foreign_key_validation_results, - render_foreign_key_validation_summary, - render_fulltext_results, - render_fulltext_summary, - render_info_command_support, - render_info_database, - render_info_dependencies, - render_info_environment, - render_info_summary, - render_index_note, - render_index_results, - render_index_summary, - render_vocab_load_results, - render_vocab_load_summary, - render_reconciliation_issues, - render_reconciliation_results, - render_reconciliation_summary, - render_restore_result, - render_restore_summary, - render_sequence_reset_results, - render_sequence_reset_summary, - render_table_creation_results, - render_table_creation_summary, - render_truncate_note, - render_truncate_results, - render_truncate_summary, -) + install_help_customizations() app = typer.Typer( @@ -98,1195 +25,27 @@ ), rich_markup_mode="rich", ) -config_app = typer.Typer( - help="Manage persisted maintenance CLI connection overrides.", - rich_markup_mode="rich", -) -foreign_keys_app = typer.Typer( - help=f"Manage PostgreSQL RI trigger enforcement for OMOP tables. {POSTGRESQL_ONLY_HELP}", - rich_markup_mode="rich", -) -indexes_app = typer.Typer( - help="Manage ORM-defined secondary indexes.", - rich_markup_mode="rich", -) -fulltext_app = typer.Typer( - help=f"Manage PostgreSQL full-text sidecar tsvector columns for OMOP vocabulary tables. {POSTGRESQL_ONLY_HELP}", - rich_markup_mode="rich", -) -app.add_typer(config_app, name="config") -app.add_typer(foreign_keys_app, name="foreign-keys") -app.add_typer(indexes_app, name="indexes") -app.add_typer(fulltext_app, name="fulltext") - -_CLI_LOGGING_CONFIGURED = False +# Subgroups +app.add_typer(config.app, name="config") +app.add_typer(foreign_keys.app, name="foreign-keys") +app.add_typer(indexes.app, name="indexes") +app.add_typer(fulltext.app, name="fulltext") -def _configure_cli_logging() -> None: - global _CLI_LOGGING_CONFIGURED - if _CLI_LOGGING_CONFIGURED: - return - - mode = (load_connection_defaults().logging or "file").strip().lower() - if mode not in {"file", "console", "off"}: - mode = "file" - - if mode == "off": - _CLI_LOGGING_CONFIGURED = True - return - - formatter = logging.Formatter( - "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s" - ) - - if mode == "file": - log_path = defaults_path().parent / "logging" / "omop-alchemy.log" - log_path.parent.mkdir(parents=True, exist_ok=True) - handler: logging.Handler = logging.FileHandler(log_path, encoding="utf-8") - else: - handler = logging.StreamHandler() - - handler.setFormatter(formatter) - - root_logger = logging.getLogger() - root_logger.addHandler(handler) - if root_logger.level in {logging.NOTSET, logging.WARNING, logging.ERROR, logging.CRITICAL}: - root_logger.setLevel(logging.INFO) - - _CLI_LOGGING_CONFIGURED = True +# Flat root-level commands lifted from each domain module +for _sub in (schema.app, vocab.app, tables.app, backup.app): + for _cmd in _sub.registered_commands: + app.registered_commands.append(_cmd) @app.callback() def app_callback() -> None: - _configure_cli_logging() + configure_logging() def main() -> None: app() -def _resolve_connection_context( - *, - dotenv: str | None, - engine_schema: str | None, - db_schema: str | None, - athena_source: str | None = None, -) -> ConnectionDefaults: - saved_defaults = load_connection_defaults() - return ConnectionDefaults( - dotenv=dotenv if dotenv is not None else saved_defaults.dotenv, - engine_schema=( - engine_schema if engine_schema is not None else saved_defaults.engine_schema - ), - db_schema=db_schema if db_schema is not None else saved_defaults.db_schema, - athena_source=( - athena_source if athena_source is not None else saved_defaults.athena_source - ), - ) - - -def _build_engine(*, dotenv: str | None, engine_schema: str | None) -> Engine: - load_environment(dotenv or "") - return create_engine_with_dependencies( - get_engine_name(engine_schema), - future=True, - ) - - -def _handle_cli_error(exc: Exception) -> None: - if isinstance(exc, RuntimeError): - console.print(render_error(str(exc))) - raise typer.Exit(code=1) from exc - - if isinstance(exc, SQLAlchemyError): - detail = str(exc).strip() - message = f"Database operation failed: {exc.__class__.__name__}." - if detail: - message = f"{message} Detail: {detail}" - console.print( - render_error(message) - ) - raise typer.Exit(code=1) from exc - - raise exc - - -def _resolve_selection( - *, - scope: TableScope | None, - tables: list[str] | None, - default_scope: TableScope | None = None, -) -> tuple[TableScope | None, tuple[str, ...] | None]: - if scope is not None and tables: - raise RuntimeError("Use either `--scope` or `--table`, not both.") - - selected_tables = tuple(tables) if tables else None - if selected_tables is not None: - return None, selected_tables - - return scope or default_scope, None - - -@config_app.command("show") -def config_show_command() -> None: - defaults = load_connection_defaults() - console.print( - render_connection_defaults( - defaults, - path=str(defaults_path()), - ) - ) - - -@config_app.command("set-overrides") -def config_set_overrides_command( - dotenv: str | None = typer.Option(None, help="Override dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Override engine schema selector."), - db_schema: str | None = typer.Option(None, help="Override database schema."), - athena_source: str | None = typer.Option(None, help="Override path to unzipped Athena vocabulary files."), -) -> None: - current = load_connection_defaults() - updated = current.with_updates( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - athena_source=athena_source, - ) - path = save_connection_defaults(updated) - console.print( - render_connection_defaults( - updated, - path=str(path), - title="Saved Overrides", - ) - ) - - -@config_app.command("clear-overrides") -def config_clear_overrides_command( - dotenv: bool = typer.Option(False, "--dotenv", help="Clear overridden dotenv."), - engine_schema: bool = typer.Option(False, "--engine-schema", help="Clear overridden engine schema."), - db_schema: bool = typer.Option(False, "--db-schema", help="Clear overridden database schema."), - athena_source: bool = typer.Option(False, "--athena-source", help="Clear overridden Athena source path."), -) -> None: - path = clear_connection_defaults( - clear_dotenv=dotenv, - clear_engine_schema=engine_schema, - clear_db_schema=db_schema, - clear_athena_source=athena_source, - ) - - if path is None: - console.print( - render_connection_defaults( - ConnectionDefaults(), - path=str(defaults_path()), - title="Overrides Already Clear", - ) - ) - return - - console.print( - render_connection_defaults( - load_connection_defaults(), - path=str(path), - title="Overrides Cleared", - ) - ) - - -@app.command( - "info", - help="Inspect maintenance CLI readiness, backend compatibility, and current installation state.", -) -def info_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(True, "--vocab/--no-vocab"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="info", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - ) - try: - load_environment(connection_defaults.dotenv or "") - with console.status("Inspecting maintenance environment..."): - info = collect_maintenance_info( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_info_environment(info)) - console.print(render_info_database(info)) - console.print(render_info_dependencies(info)) - console.print(render_info_command_support(info.command_support)) - console.print(render_info_summary(info)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "doctor", - help="Run a read-only maintenance health check across connection readiness, schema drift, and FK state.", -) -def doctor_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - deep: bool = typer.Option(False, "--deep", help="Include heavier checks such as PostgreSQL foreign key validation."), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="doctor", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - ) - try: - load_environment(connection_defaults.dotenv or "") - with console.status("Running maintenance doctor checks..."): - report = collect_doctor_report( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - deep=deep, - ) - console.print(render_info_environment(report.info)) - console.print(render_info_database(report.info)) - console.print(render_doctor_checks(report.checks)) - if deep and report.foreign_key_validation is not None: - console.print( - render_foreign_key_validation_issues( - report.foreign_key_validation.violations - ) - ) - console.print(render_doctor_recommendations(report.recommendations)) - console.print(render_doctor_summary(report, deep=deep)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "backup-database", - help=f"Create a PostgreSQL dump artifact that can be restored into another environment. {POSTGRESQL_ONLY_HELP}", -) -def backup_database_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Optional schema-limited backup."), - output_path: str | None = typer.Option(None, help="Backup artifact path. Defaults to a timestamped file in the current directory."), - format: BackupFormat = typer.Option(BackupFormat.CUSTOM, help="Backup format."), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="backup-database", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Creating restore-ready PostgreSQL backup..."): - result = create_database_backup( - engine, - output_path=output_path, - format=format, - db_schema=connection_defaults.db_schema, - dry_run=dry_run, - ) - console.print(render_backup_result(result)) - console.print(render_backup_summary(result, dry_run=dry_run)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "restore-database", - help=f"Restore a PostgreSQL backup artifact into the configured target database. {POSTGRESQL_ONLY_HELP}", -) -def restore_database_command( - input_path: str = typer.Argument(..., help="Backup artifact path to restore."), - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Optional schema-limited restore for custom-format dumps."), - format: BackupFormat = typer.Option(..., help="Restore format. Required: choose `custom` or `plain`."), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="restore-database", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Restoring PostgreSQL backup artifact..."): - result = restore_database_backup( - engine, - input_path=input_path, - format=format, - db_schema=connection_defaults.db_schema, - dry_run=dry_run, - ) - console.print(render_restore_result(result)) - console.print(render_restore_summary(result, dry_run=dry_run)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "reconcile-schema", - help="Compare ORM-managed SQLAlchemy metadata against the current target database schema.", -) -def reconcile_schema_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="reconcile-schema", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Reconciling ORM metadata against target database schema..."): - report = reconcile_schema( - engine, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_reconciliation_results(report.table_results)) - console.print(render_reconciliation_issues(report.issues)) - console.print(render_reconciliation_summary(report)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "reset-sequences", - help=f"Reset owned sequences from table max + 1. {POSTGRESQL_ONLY_HELP}", -) -def reset_sequences_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="reset-sequences", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Resetting PostgreSQL sequences..."): - results = reset_model_sequences( - engine, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_sequence_reset_results(results)) - console.print(render_sequence_reset_summary(results, dry_run=dry_run)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "data-summary", - help="Summarise ORM-managed OMOP tables present in the target database.", -) -def data_summary_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - include_missing: bool = typer.Option(False, "--include-missing"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="data-summary", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Collecting table summary..."): - results = collect_data_summary( - engine, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - existing_only=not include_missing, - ) - console.print(render_data_summary_results(results)) - console.print(render_data_summary_summary(results)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "analyze-tables", - help="Refresh planner statistics for selected ORM-managed tables.", -) -def analyze_tables_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - scope: TableScope | None = typer.Option( - None, - "--scope", - help="Category scope to analyze. Defaults to all ORM-managed tables when omitted.", - case_sensitive=False, - ), - table: list[str] | None = typer.Option( - None, - "--table", - help="Specific ORM-managed table name to analyze. Repeat for multiple tables.", - ), - vacuum: bool = typer.Option( - False, - "--vacuum", - help="Use VACUUM ANALYZE instead of ANALYZE. PostgreSQL only.", - ), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - resolved_scope, resolved_tables = _resolve_selection( - scope=scope, - tables=table, - default_scope=TableScope.ALL, - ) - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="analyze-tables", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Refreshing planner statistics for selected tables..."): - results = analyze_tables( - engine, - db_schema=connection_defaults.db_schema, - scope=resolved_scope, - table_names=resolved_tables, - vacuum=vacuum, - dry_run=dry_run, - ) - console.print(render_analyze_results(results)) - console.print(render_analyze_summary(results, dry_run=dry_run)) - console.print(render_analyze_note()) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "create-missing-tables", - help="Create missing ORM-managed OMOP tables from metadata.", -) -def create_missing_tables_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(True, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="create-missing-tables", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Creating missing tables..."): - results = create_missing_tables( - engine, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_table_creation_results(results)) - console.print(render_table_creation_summary(results, dry_run=dry_run)) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "truncate-tables", - help=f"Truncate selected ORM-managed tables. {POSTGRESQL_ONLY_HELP}", -) -def truncate_tables_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - scope: TableScope | None = typer.Option( - None, - "--scope", - help="Category scope to truncate.", - case_sensitive=False, - ), - table: list[str] | None = typer.Option( - None, - "--table", - help="Specific ORM-managed table name to truncate. Repeat for multiple tables.", - ), - restart_identities: bool = typer.Option( - False, - "--restart-identities", - help="Restart owned identities during truncation.", - ), - cascade: bool = typer.Option( - False, - "--cascade", - help="Include dependent tables via PostgreSQL CASCADE.", - ), - yes: bool = typer.Option( - False, - "--yes", - help="Confirm that you want to apply this destructive operation.", - ), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - resolved_scope, resolved_tables = _resolve_selection( - scope=scope, - tables=table, - ) - if resolved_scope is None and resolved_tables is None: - console.print( - render_error( - "Select tables to truncate with `--scope` or one or more `--table` values." - ) - ) - raise typer.Exit(code=1) - if not dry_run and not yes: - console.print( - render_error( - "Truncation is destructive. Re-run with `--yes`, or use `--dry-run` first." - ) - ) - raise typer.Exit(code=1) - - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="truncate-tables", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Truncating selected tables..."): - results = truncate_tables( - engine, - db_schema=connection_defaults.db_schema, - scope=resolved_scope, - table_names=resolved_tables, - restart_identities=restart_identities, - cascade=cascade, - dry_run=dry_run, - ) - console.print(render_truncate_results(results)) - console.print( - render_truncate_summary( - results, - dry_run=dry_run, - restart_identities=restart_identities, - cascade=cascade, - ) - ) - console.print(render_truncate_note()) - except Exception as exc: - _handle_cli_error(exc) - - -@app.command( - "load-vocab-source", - help="Load Athena vocabulary CSV files from a configured source path using the ORM staged CSV loader.", -) -def load_vocab_source_command( - athena_source: str | None = typer.Option(None, help="Path to unzipped Athena vocabulary CSV files."), - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override. PostgreSQL only; uses search_path for ORM CSV loading."), - merge_strategy: MergeStrategy = typer.Option( - "replace", - help="CSV merge strategy. One of `replace` (default, keeps DB in sync), `upsert` (incremental, non-destructive), or `insert_if_empty` (fast path for a fresh empty target).", - ), - chunksize: int | None = typer.Option( - 100_000, - help="Chunk size for fallback ORM CSV loading. Defaults to 100 000 rows; pass 0 to disable chunking.", - ), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - athena_source=athena_source, - ) - console.print( - render_command_header( - command_name="load-vocab-source", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=True, - mode_label="dry-run" if dry_run else "apply", - ) - ) - - if connection_defaults.athena_source is None: - console.print( - render_error( - "No Athena vocabulary source path is configured. " - "Set it with `omop-alchemy config set-overrides --athena-source ` " - "or pass `--athena-source`." - ) - ) - raise typer.Exit(code=1) - - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - - with Progress( - SpinnerColumn(), - TextColumn("[bold cyan]{task.description}"), - BarColumn(bar_width=None), - TaskProgressColumn(), - TimeElapsedColumn(), - console=console, - transient=False, - ) as progress: - task_id = progress.add_task( - "Preparing Athena vocabulary load...", - total=100.0, - completed=0, - ) - completed_tables: list[str] = [] - - def _update_progress(event: VocabularyLoadProgress) -> None: - progress.update( - task_id, - completed=event.percent, - description=event.detail, - ) - if event.phase == "commit-complete" and event.table_name is not None: - completed_tables.append(event.table_name) - progress.console.print( - ( - f"[green]loaded[/green] " - f"[bold]{event.table_name}[/bold] " - f"({len(completed_tables)}/{event.table_count})" - ) - ) - - report = load_vocab_source( - engine, - source_path=connection_defaults.athena_source, - db_schema=connection_defaults.db_schema, - dry_run=dry_run, - merge_strategy=merge_strategy, - chunksize=None if chunksize == 0 else chunksize, - progress_callback=_update_progress, - ) - progress.update( - task_id, - completed=100.0, - description="Athena vocabulary load complete", - ) - console.print(render_vocab_load_results(report.results)) - console.print(render_vocab_load_summary(report, dry_run=dry_run)) - except Exception as exc: - _handle_cli_error(exc) - - -def _foreign_key_command( - *, - action: ForeignKeyAction, - strict: bool, - dotenv: str | None, - engine_schema: str | None, - db_schema: str | None, - vocabulary_included: bool, - dry_run: bool, -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name=( - f"foreign-keys {action.value} --strict" - if action is ForeignKeyAction.ENABLE and strict - else f"foreign-keys {action.value}" - ), - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status( - "Validating and enabling PostgreSQL foreign key trigger enforcement..." - if action is ForeignKeyAction.ENABLE and strict - else "Managing PostgreSQL foreign key trigger enforcement..." - ): - results = manage_foreign_key_triggers( - engine, - action=action, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - strict=strict, - ) - console.print(render_foreign_key_results(results)) - console.print(render_foreign_key_summary(results, dry_run=dry_run)) - console.print(render_foreign_key_note(action, strict=strict)) - except Exception as exc: - _handle_cli_error(exc) - - -@foreign_keys_app.command("disable") -def disable_foreign_keys_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _foreign_key_command( - action=ForeignKeyAction.DISABLE, - strict=False, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - - -@foreign_keys_app.command("enable") -def enable_foreign_keys_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - strict: bool = typer.Option(False, "--strict", help="Validate all selected foreign key relationships before enabling trigger enforcement."), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _foreign_key_command( - action=ForeignKeyAction.ENABLE, - strict=strict, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - - -@foreign_keys_app.command("status") -def foreign_key_status_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="foreign-keys status", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Inspecting foreign key trigger status..."): - results = collect_foreign_key_trigger_status( - engine, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_foreign_key_status_results(results)) - console.print(render_foreign_key_status_summary(results)) - except Exception as exc: - _handle_cli_error(exc) - - -@foreign_keys_app.command( - "validate", - help="Validate selected foreign key relationships and report violating constraints.", -) -def foreign_key_validate_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name="foreign-keys validate", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Validating selected foreign key relationships..."): - report = validate_foreign_key_constraints( - engine, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_foreign_key_validation_results(report.results)) - console.print(render_foreign_key_validation_issues(report.violations)) - console.print(render_foreign_key_validation_summary(report)) - except Exception as exc: - _handle_cli_error(exc) - - -def _index_command( - *, - action: IndexAction, - dotenv: str | None, - engine_schema: str | None, - db_schema: str | None, - vocabulary_included: bool, - dry_run: bool, -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name=f"indexes {action.value}", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Managing metadata-defined indexes..."): - results = manage_indexes( - engine, - action=action, - db_schema=connection_defaults.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_index_results(results)) - console.print(render_index_summary(results, dry_run=dry_run)) - console.print(render_index_note(action)) - except Exception as exc: - _handle_cli_error(exc) - - -@indexes_app.command("disable") -def disable_indexes_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _index_command( - action=IndexAction.DISABLE, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - - -@indexes_app.command("enable") -def enable_indexes_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _index_command( - action=IndexAction.ENABLE, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - - -def _fulltext_command( - *, - action: str, - dotenv: str | None, - engine_schema: str | None, - db_schema: str | None, - dry_run: bool, - regconfig: str | None = None, - create_indexes: bool | None = None, - fastupdate: bool | None = None, - drop_indexes: bool | None = None, -) -> None: - connection_defaults = _resolve_connection_context( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - ) - console.print( - render_command_header( - command_name=f"fulltext {action}", - engine_schema=connection_defaults.engine_schema, - db_schema=connection_defaults.db_schema, - vocabulary_included=True, - mode_label="dry-run" if dry_run else "apply", - ) - ) - try: - engine = _build_engine( - dotenv=connection_defaults.dotenv, - engine_schema=connection_defaults.engine_schema, - ) - with console.status("Managing PostgreSQL full-text sidecar columns..."): - if action == "install": - results = install_fulltext_columns( - engine, - db_schema=connection_defaults.db_schema, - create_indexes=True if create_indexes is None else create_indexes, - fastupdate=False if fastupdate is None else fastupdate, - dry_run=dry_run, - ) - elif action == "populate": - results = populate_fulltext_columns( - engine, - db_schema=connection_defaults.db_schema, - regconfig="english" if regconfig is None else regconfig, - dry_run=dry_run, - ) - else: - results = drop_fulltext_columns( - engine, - db_schema=connection_defaults.db_schema, - drop_indexes=True if drop_indexes is None else drop_indexes, - dry_run=dry_run, - ) - console.print(render_fulltext_results(results)) - console.print( - render_fulltext_summary( - results, - action=action, - dry_run=dry_run, - ) - ) - except Exception as exc: - _handle_cli_error(exc) - - -@fulltext_app.command("install") -def install_fulltext_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - create_indexes: bool = typer.Option(True, "--create-indexes/--no-create-indexes", help="Create GIN indexes alongside the tsvector columns."), - fastupdate: bool = typer.Option(False, "--fastupdate/--no-fastupdate", help="Set PostgreSQL GIN fastupdate on created indexes."), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _fulltext_command( - action="install", - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - dry_run=dry_run, - create_indexes=create_indexes, - fastupdate=fastupdate, - ) - - -@fulltext_app.command("populate") -def populate_fulltext_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - regconfig: str = typer.Option("english", help="PostgreSQL text search configuration to use for vector population."), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _fulltext_command( - action="populate", - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - dry_run=dry_run, - regconfig=regconfig, - ) - - -@fulltext_app.command("drop") -def drop_fulltext_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - drop_indexes: bool = typer.Option(True, "--drop-indexes/--no-drop-indexes", help="Drop managed GIN indexes before dropping the tsvector columns."), - dry_run: bool = typer.Option(False, "--dry-run"), -) -> None: - _fulltext_command( - action="drop", - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - dry_run=dry_run, - drop_indexes=drop_indexes, - ) - - if __name__ == "__main__": main() diff --git a/omop_alchemy/maintenance/backup.py b/omop_alchemy/maintenance/cli_backup.py similarity index 64% rename from omop_alchemy/maintenance/backup.py rename to omop_alchemy/maintenance/cli_backup.py index a277e78..57fea20 100644 --- a/omop_alchemy/maintenance/backup.py +++ b/omop_alchemy/maintenance/cli_backup.py @@ -9,8 +9,18 @@ import subprocess import sqlalchemy as sa +import typer -from ..backend_support import Dialect, require_backend +from ..backend_support import Dialect, POSTGRESQL_ONLY_HELP, require_backend +from ._cli_utils import build_engine, handle_error, resolve_connection +from .ui import ( + console, + render_backup_result, + render_backup_summary, + render_command_header, + render_restore_result, + render_restore_summary, +) class BackupFormat(StrEnum): @@ -50,22 +60,6 @@ class DatabaseRestoreResult: tool_path: str -def _ensure_backup_supported(engine: sa.Engine) -> None: - require_backend( - engine, - feature="Database backup", - supported_dialects=(Dialect.POSTGRESQL,), - ) - - -def _ensure_restore_supported(engine: sa.Engine) -> None: - require_backend( - engine, - feature="Database restore", - supported_dialects=(Dialect.POSTGRESQL,), - ) - - def _pg_dump_path() -> str: tool_path = shutil.which("pg_dump") if tool_path is None: @@ -216,7 +210,7 @@ def create_database_backup( db_schema: str | None = None, dry_run: bool = False, ) -> DatabaseBackupResult: - _ensure_backup_supported(engine) + require_backend(engine, feature="Database backup", supported_dialects=(Dialect.POSTGRESQL,)) tool_path = _pg_dump_path() resolved_output_path = Path(output_path) if output_path is not None else _default_output_path(format) resolved_output_path = resolved_output_path.expanduser().resolve() @@ -232,18 +226,11 @@ def create_database_backup( if not dry_run: resolved_output_path.parent.mkdir(parents=True, exist_ok=True) try: - subprocess.run( - command, - env=env, - check=True, - capture_output=True, - text=True, - ) + subprocess.run(command, env=env, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as exc: stderr = (exc.stderr or "").strip() raise RuntimeError( - "Database backup failed via `pg_dump`." - + (f" {stderr}" if stderr else "") + "Database backup failed via `pg_dump`." + (f" {stderr}" if stderr else "") ) from exc return DatabaseBackupResult( @@ -271,7 +258,7 @@ def restore_database_backup( db_schema: str | None = None, dry_run: bool = False, ) -> DatabaseRestoreResult: - _ensure_restore_supported(engine) + require_backend(engine, feature="Database restore", supported_dialects=(Dialect.POSTGRESQL,)) resolved_input_path = Path(input_path).expanduser().resolve() if not resolved_input_path.exists(): raise RuntimeError(f"Backup artifact not found: {resolved_input_path}") @@ -287,18 +274,11 @@ def restore_database_backup( if not dry_run: try: - subprocess.run( - command, - env=env, - check=True, - capture_output=True, - text=True, - ) + subprocess.run(command, env=env, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as exc: stderr = (exc.stderr or "").strip() raise RuntimeError( - "Database restore failed." - + (f" {stderr}" if stderr else "") + "Database restore failed." + (f" {stderr}" if stderr else "") ) from exc return DatabaseRestoreResult( @@ -316,3 +296,89 @@ def restore_database_backup( command=tuple(command), tool_path=tool_path, ) + + +app = typer.Typer(rich_markup_mode="rich") + + +@app.command( + "backup-database", + help=f"Create a PostgreSQL dump artifact that can be restored into another environment. {POSTGRESQL_ONLY_HELP}", +) +def backup_database_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Optional schema-limited backup."), + output_path: str | None = typer.Option( + None, + help="Backup artifact path. Defaults to a timestamped file in the current directory.", + ), + format: BackupFormat = typer.Option(BackupFormat.CUSTOM, help="Backup format."), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="backup-database", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=None, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Creating restore-ready PostgreSQL backup..."): + result = create_database_backup( + engine, + output_path=output_path, + format=format, + db_schema=conn.db_schema, + dry_run=dry_run, + ) + console.print(render_backup_result(result)) + console.print(render_backup_summary(result, dry_run=dry_run)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "restore-database", + help=f"Restore a PostgreSQL backup artifact into the configured target database. {POSTGRESQL_ONLY_HELP}", +) +def restore_database_command( + input_path: str = typer.Argument(..., help="Backup artifact path to restore."), + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option( + None, help="Optional schema-limited restore for custom-format dumps." + ), + format: BackupFormat = typer.Option( + ..., help="Restore format. Required: choose `custom` or `plain`." + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="restore-database", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=None, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Restoring PostgreSQL backup artifact..."): + result = restore_database_backup( + engine, + input_path=input_path, + format=format, + db_schema=conn.db_schema, + dry_run=dry_run, + ) + console.print(render_restore_result(result)) + console.print(render_restore_summary(result, dry_run=dry_run)) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/defaults.py b/omop_alchemy/maintenance/cli_config.py similarity index 69% rename from omop_alchemy/maintenance/defaults.py rename to omop_alchemy/maintenance/cli_config.py index 46e44a5..595f570 100644 --- a/omop_alchemy/maintenance/defaults.py +++ b/omop_alchemy/maintenance/cli_config.py @@ -6,6 +6,10 @@ from pathlib import Path import tomllib +import typer + +from .ui import console, render_connection_defaults + DEFAULTS_FILENAME = ".omop-maint.toml" DEFAULTS_ENV_VAR = "OMOP_MAINT_DEFAULTS_FILE" @@ -176,3 +180,73 @@ def clear_connection_defaults( save_connection_defaults(updated) return path + + +app = typer.Typer( + help="Manage persisted maintenance CLI connection overrides.", + rich_markup_mode="rich", +) + + +@app.command("show") +def config_show_command() -> None: + """Display current saved connection defaults.""" + defaults = load_connection_defaults() + console.print(render_connection_defaults(defaults, path=str(defaults_path()))) + + +@app.command("set-overrides") +def config_set_overrides_command( + dotenv: str | None = typer.Option(None, help="Override dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Override engine schema selector."), + db_schema: str | None = typer.Option(None, help="Override database schema."), + athena_source: str | None = typer.Option( + None, help="Override path to unzipped Athena vocabulary files." + ), +) -> None: + """Save one or more connection defaults to the project config file.""" + current = load_connection_defaults() + updated = current.with_updates( + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, + athena_source=athena_source, + ) + path = save_connection_defaults(updated) + console.print(render_connection_defaults(updated, path=str(path), title="Saved Overrides")) + + +@app.command("clear-overrides") +def config_clear_overrides_command( + dotenv: bool = typer.Option(False, "--dotenv", help="Clear overridden dotenv."), + engine_schema: bool = typer.Option( + False, "--engine-schema", help="Clear overridden engine schema." + ), + db_schema: bool = typer.Option(False, "--db-schema", help="Clear overridden database schema."), + athena_source: bool = typer.Option( + False, "--athena-source", help="Clear overridden Athena source path." + ), +) -> None: + """Clear one or more saved connection overrides.""" + path = clear_connection_defaults( + clear_dotenv=dotenv, + clear_engine_schema=engine_schema, + clear_db_schema=db_schema, + clear_athena_source=athena_source, + ) + if path is None: + console.print( + render_connection_defaults( + ConnectionDefaults(), + path=str(defaults_path()), + title="Overrides Already Clear", + ) + ) + return + console.print( + render_connection_defaults( + load_connection_defaults(), + path=str(path), + title="Overrides Cleared", + ) + ) diff --git a/omop_alchemy/maintenance/foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py similarity index 64% rename from omop_alchemy/maintenance/foreign_keys.py rename to omop_alchemy/maintenance/cli_foreign_keys.py index b7b9750..1d8f6fd 100644 --- a/omop_alchemy/maintenance/foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -4,14 +4,28 @@ from enum import StrEnum import sqlalchemy as sa +import typer -from ..backend_support import Dialect, require_backend +from ..backend_support import Dialect, POSTGRESQL_ONLY_HELP, require_backend +from ._cli_utils import build_engine, handle_error, resolve_connection from .tables import ( MaintenanceTable, TableCategory, existing_maintenance_tables, qualified_table_name, ) +from .ui import ( + console, + render_command_header, + render_foreign_key_note, + render_foreign_key_results, + render_foreign_key_status_results, + render_foreign_key_status_summary, + render_foreign_key_summary, + render_foreign_key_validation_issues, + render_foreign_key_validation_results, + render_foreign_key_validation_summary, +) class ForeignKeyAction(StrEnum): @@ -80,16 +94,10 @@ class ForeignKeyValidationResult: class ForeignKeyValidationReport: results: tuple[ForeignKeyValidationResult, ...] violations: tuple[ForeignKeyConstraintViolation, ...] -def _ensure_postgresql_supported( - engine: sa.Engine, - *, - feature: str, -) -> None: - require_backend( - engine, - feature=feature, - supported_dialects=(Dialect.POSTGRESQL,), - ) + + +def _ensure_postgresql_supported(engine: sa.Engine, *, feature: str) -> None: + require_backend(engine, feature=feature, supported_dialects=(Dialect.POSTGRESQL,)) def _selected_existing_tables( @@ -117,10 +125,7 @@ def collect_foreign_key_targets( db_schema=db_schema, vocabulary_included=vocabulary_included, ) - selected_names = { - table.table_name - for table in selected_tables - } + selected_names = {table.table_name for table in selected_tables} incoming_counts = {name: 0 for name in selected_names} outgoing_counts = {name: 0 for name in selected_names} @@ -240,9 +245,7 @@ def _collect_strict_validation_failures( } -def _strict_failure_detail( - violations: list[ForeignKeyConstraintViolation], -) -> str: +def _strict_failure_detail(violations: list[ForeignKeyConstraintViolation]) -> str: constraint_summary = ", ".join( f"{violation.constraint_name} ({violation.violation_count})" for violation in violations[:3] @@ -258,9 +261,7 @@ def _strict_failure_detail( ) -def _validation_failure_detail( - violations: list[ForeignKeyConstraintViolation], -) -> str: +def _validation_failure_detail(violations: list[ForeignKeyConstraintViolation]) -> str: constraint_summary = ", ".join( f"{violation.constraint_name} ({violation.violation_count})" for violation in violations[:3] @@ -281,10 +282,7 @@ def validate_foreign_key_constraints( db_schema: str | None = None, vocabulary_included: bool = False, ) -> ForeignKeyValidationReport: - _ensure_postgresql_supported( - engine, - feature="Foreign key constraint validation", - ) + _ensure_postgresql_supported(engine, feature="Foreign key constraint validation") targets = collect_foreign_key_targets( engine, @@ -305,10 +303,7 @@ def validate_foreign_key_constraints( for target in targets: violations = validation_failures.get(target.table_name, []) violating_constraint_count = len(violations) - violating_row_count = sum( - violation.violation_count - for violation in violations - ) + violating_row_count = sum(violation.violation_count for violation in violations) results.append( ForeignKeyValidationResult( table_name=target.table_name, @@ -330,10 +325,7 @@ def validate_foreign_key_constraints( all_violations.extend(violations) all_violations.sort( - key=lambda violation: ( - violation.source_table_name, - violation.constraint_name, - ) + key=lambda violation: (violation.source_table_name, violation.constraint_name) ) return ForeignKeyValidationReport( results=tuple(results), @@ -350,10 +342,7 @@ def manage_foreign_key_triggers( dry_run: bool = False, strict: bool = False, ) -> list[ForeignKeyManagementResult]: - _ensure_postgresql_supported( - engine, - feature="Foreign key trigger management", - ) + _ensure_postgresql_supported(engine, feature="Foreign key trigger management") targets = collect_foreign_key_targets( engine, @@ -435,10 +424,7 @@ def collect_foreign_key_trigger_status( db_schema: str | None = None, vocabulary_included: bool = False, ) -> list[ForeignKeyStatusResult]: - _ensure_postgresql_supported( - engine, - feature="Foreign key trigger status inspection", - ) + _ensure_postgresql_supported(engine, feature="Foreign key trigger status inspection") targets = collect_foreign_key_targets( engine, @@ -466,10 +452,7 @@ def collect_foreign_key_trigger_status( for target in targets: disabled_count, enabled_count = connection.execute( query, - { - "table_name": target.table_name, - "db_schema": db_schema, - }, + {"table_name": target.table_name, "db_schema": db_schema}, ).one() results.append( @@ -486,3 +469,161 @@ def collect_foreign_key_trigger_status( ) return results + + +# --------------------------------------------------------------------------- +# CLI commands +# --------------------------------------------------------------------------- + +app = typer.Typer( + help=f"Manage PostgreSQL RI trigger enforcement for OMOP tables. {POSTGRESQL_ONLY_HELP}", + rich_markup_mode="rich", +) + + +@app.command("disable") +def disable_foreign_keys_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="foreign-keys disable", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Managing PostgreSQL foreign key trigger enforcement..."): + results = manage_foreign_key_triggers( + engine, + action=ForeignKeyAction.DISABLE, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + dry_run=dry_run, + strict=False, + ) + console.print(render_foreign_key_results(results)) + console.print(render_foreign_key_summary(results, dry_run=dry_run)) + console.print(render_foreign_key_note(ForeignKeyAction.DISABLE, strict=False)) + except Exception as exc: + handle_error(exc) + + +@app.command("enable") +def enable_foreign_keys_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + strict: bool = typer.Option( + False, + "--strict", + help="Validate all selected foreign key relationships before enabling trigger enforcement.", + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="foreign-keys enable --strict" if strict else "foreign-keys enable", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + status_msg = ( + "Validating and enabling PostgreSQL foreign key trigger enforcement..." + if strict + else "Managing PostgreSQL foreign key trigger enforcement..." + ) + with console.status(status_msg): + results = manage_foreign_key_triggers( + engine, + action=ForeignKeyAction.ENABLE, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + dry_run=dry_run, + strict=strict, + ) + console.print(render_foreign_key_results(results)) + console.print(render_foreign_key_summary(results, dry_run=dry_run)) + console.print(render_foreign_key_note(ForeignKeyAction.ENABLE, strict=strict)) + except Exception as exc: + handle_error(exc) + + +@app.command("status") +def foreign_key_status_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="foreign-keys status", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="inspect", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Inspecting foreign key trigger status..."): + results = collect_foreign_key_trigger_status( + engine, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + ) + console.print(render_foreign_key_status_results(results)) + console.print(render_foreign_key_status_summary(results)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "validate", + help="Validate selected foreign key relationships and report violating constraints.", +) +def foreign_key_validate_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="foreign-keys validate", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="inspect", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Validating selected foreign key relationships..."): + report = validate_foreign_key_constraints( + engine, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + ) + console.print(render_foreign_key_validation_results(report.results)) + console.print(render_foreign_key_validation_issues(report.violations)) + console.print(render_foreign_key_validation_summary(report)) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/cli_fulltext.py b/omop_alchemy/maintenance/cli_fulltext.py new file mode 100644 index 0000000..c87ad53 --- /dev/null +++ b/omop_alchemy/maintenance/cli_fulltext.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import typer + +from ..backend_support import POSTGRESQL_ONLY_HELP +from ._cli_utils import build_engine, handle_error, resolve_connection +from ..cdm.handlers.fulltext import ( + drop_fulltext_columns, + install_fulltext_columns, + populate_fulltext_columns, +) +from .ui import ( + console, + render_command_header, + render_fulltext_results, + render_fulltext_summary, +) + +app = typer.Typer( + help=f"Manage PostgreSQL full-text sidecar tsvector columns for OMOP vocabulary tables. {POSTGRESQL_ONLY_HELP}", + rich_markup_mode="rich", +) + + +@app.command("install") +def install_fulltext_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + create_indexes: bool = typer.Option( + True, + "--create-indexes/--no-create-indexes", + help="Create GIN indexes alongside the tsvector columns.", + ), + fastupdate: bool = typer.Option( + False, + "--fastupdate/--no-fastupdate", + help="Set PostgreSQL GIN fastupdate on created indexes.", + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="fulltext install", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=True, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Managing PostgreSQL full-text sidecar columns..."): + results = install_fulltext_columns( + engine, + db_schema=conn.db_schema, + create_indexes=create_indexes, + fastupdate=fastupdate, + dry_run=dry_run, + ) + console.print(render_fulltext_results(results)) + console.print(render_fulltext_summary(results, action="install", dry_run=dry_run)) + except Exception as exc: + handle_error(exc) + + +@app.command("populate") +def populate_fulltext_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + regconfig: str = typer.Option( + "english", + help="PostgreSQL text search configuration to use for vector population.", + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="fulltext populate", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=True, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Managing PostgreSQL full-text sidecar columns..."): + results = populate_fulltext_columns( + engine, + db_schema=conn.db_schema, + regconfig=regconfig, + dry_run=dry_run, + ) + console.print(render_fulltext_results(results)) + console.print(render_fulltext_summary(results, action="populate", dry_run=dry_run)) + except Exception as exc: + handle_error(exc) + + +@app.command("drop") +def drop_fulltext_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + drop_indexes: bool = typer.Option( + True, + "--drop-indexes/--no-drop-indexes", + help="Drop managed GIN indexes before dropping the tsvector columns.", + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="fulltext drop", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=True, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Managing PostgreSQL full-text sidecar columns..."): + results = drop_fulltext_columns( + engine, + db_schema=conn.db_schema, + drop_indexes=drop_indexes, + dry_run=dry_run, + ) + console.print(render_fulltext_results(results)) + console.print(render_fulltext_summary(results, action="drop", dry_run=dry_run)) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/indexes.py b/omop_alchemy/maintenance/cli_indexes.py similarity index 72% rename from omop_alchemy/maintenance/indexes.py rename to omop_alchemy/maintenance/cli_indexes.py index 3c7e815..6e8ac81 100644 --- a/omop_alchemy/maintenance/indexes.py +++ b/omop_alchemy/maintenance/cli_indexes.py @@ -4,10 +4,12 @@ from enum import StrEnum import sqlalchemy as sa +import typer from omop_alchemy.cdm.base.indexing import OMOP_CLUSTER_INDEX_INFO_KEY from ..backend_support import Dialect, backend_label, supports_backend +from ._cli_utils import build_engine, handle_error, resolve_connection from .tables import ( MaintenanceTable, TableCategory, @@ -15,6 +17,13 @@ schema_adjusted_metadata, select_omop_tables, ) +from .ui import ( + console, + render_command_header, + render_index_note, + render_index_results, + render_index_summary, +) class IndexAction(StrEnum): @@ -48,6 +57,8 @@ class IndexManagementResult: action: IndexAction status: str detail: str + + def _schema_metadata_indexes( tables: list[MaintenanceTable], db_schema: str | None, @@ -60,10 +71,7 @@ def _schema_metadata_indexes( indexes[(table.table_name, str(index.name))] = index return indexes - _, copied_tables = schema_adjusted_metadata( - tables, - db_schema=db_schema, - ) + _, copied_tables = schema_adjusted_metadata(tables, db_schema=db_schema) for table_name, table in copied_tables.items(): for index in table.indexes: indexes[(table_name, str(index.name))] = index @@ -95,6 +103,8 @@ def _cluster_column_names( if str(index.name) == cluster_index_name: return tuple(column.name for column in index.columns) return table.primary_key_names + + def collect_index_targets( engine: sa.Engine, *, @@ -145,10 +155,7 @@ def manage_indexes( inspector = sa.inspect(engine) selected_tables = select_omop_tables(vocabulary_included=vocabulary_included) metadata_indexes = _schema_metadata_indexes(selected_tables, db_schema) - clustering_supported = supports_backend( - engine, - supported_dialects=(Dialect.POSTGRESQL,), - ) + clustering_supported = supports_backend(engine, supported_dialects=(Dialect.POSTGRESQL,)) results: list[IndexManagementResult] = [] @@ -262,3 +269,79 @@ def manage_indexes( ) return results + + +app = typer.Typer( + help="Manage ORM-defined secondary indexes.", + rich_markup_mode="rich", +) + + +@app.command("disable") +def disable_indexes_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="indexes disable", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Managing metadata-defined indexes..."): + results = manage_indexes( + engine, + action=IndexAction.DISABLE, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + dry_run=dry_run, + ) + console.print(render_index_results(results)) + console.print(render_index_summary(results, dry_run=dry_run)) + console.print(render_index_note(IndexAction.DISABLE)) + except Exception as exc: + handle_error(exc) + + +@app.command("enable") +def enable_indexes_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="indexes enable", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Managing metadata-defined indexes..."): + results = manage_indexes( + engine, + action=IndexAction.ENABLE, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + dry_run=dry_run, + ) + console.print(render_index_results(results)) + console.print(render_index_summary(results, dry_run=dry_run)) + console.print(render_index_note(IndexAction.ENABLE)) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/cli_schema.py b/omop_alchemy/maintenance/cli_schema.py new file mode 100644 index 0000000..63da74b --- /dev/null +++ b/omop_alchemy/maintenance/cli_schema.py @@ -0,0 +1,1536 @@ +from __future__ import annotations + +from dataclasses import dataclass +import importlib.metadata +import importlib.util +import os +import shutil + +import sqlalchemy as sa +from sqlalchemy.exc import SQLAlchemyError +import typer + +from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment + +from ..backend_support import Dialect, backend_label +from ._cli_utils import build_engine, handle_error, resolve_connection +from .cli_config import defaults_path +from .cli_foreign_keys import ( + ForeignKeyStatusResult, + ForeignKeyValidationReport, + collect_foreign_key_trigger_status, + validate_foreign_key_constraints, +) +from .cli_indexes import _cluster_target_name +from .tables import ( + MaintenanceTable, + TableCategory, + TableScope, + collect_maintenance_tables, + missing_maintenance_tables, + qualified_table_name, + schema_adjusted_metadata, + select_maintenance_tables, + select_omop_tables, +) +from .ui import ( + console, + render_command_header, + render_data_summary_results, + render_data_summary_summary, + render_doctor_checks, + render_doctor_recommendations, + render_doctor_summary, + render_foreign_key_validation_issues, + render_info_command_support, + render_info_database, + render_info_dependencies, + render_info_environment, + render_info_summary, + render_reconciliation_issues, + render_reconciliation_results, + render_reconciliation_summary, + render_table_creation_results, + render_table_creation_summary, +) + + +# --------------------------------------------------------------------------- +# info +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DependencyStatus: + name: str + installed: bool + version: str | None + + +@dataclass(frozen=True) +class CommandSupport: + command_name: str + requirement: str + status: str + detail: str + + +@dataclass(frozen=True) +class MaintenanceInfo: + package_version: str + cli_path: str | None + pg_dump_path: str | None + pg_restore_path: str | None + psql_path: str | None + defaults_file: str + defaults_exists: bool + dotenv_path: str | None + dotenv_exists: bool | None + engine_schema: str | None + db_schema: str | None + engine_url: str | None + backend: str | None + engine_created: bool + engine_error: str | None + connection_ready: bool + connection_error: str | None + managed_table_count: int + existing_table_count: int | None + missing_table_count: int | None + vocabulary_included: bool + dependencies: tuple[DependencyStatus, ...] + command_support: tuple[CommandSupport, ...] + + +def _package_version() -> str: + return importlib.metadata.version("omop-alchemy") + + +def _dependency_status(distribution_name: str, module_name: str) -> DependencyStatus: + installed = importlib.util.find_spec(module_name) is not None + version: str | None = None + if installed: + try: + version = importlib.metadata.version(distribution_name) + except importlib.metadata.PackageNotFoundError: + version = None + return DependencyStatus(name=distribution_name, installed=installed, version=version) + + +def _external_dependency_status(name: str, executable_name: str) -> DependencyStatus: + return DependencyStatus( + name=name, + installed=shutil.which(executable_name) is not None, + version=None, + ) + + +def _command_support_for_unavailable_engine(detail: str) -> tuple[CommandSupport, ...]: + blocked = "blocked" + return ( + CommandSupport("doctor", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("data-summary", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("analyze-tables", "PostgreSQL/SQLite", blocked, detail), + CommandSupport("create-missing-tables", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("indexes disable", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("indexes enable", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("reconcile-schema", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", blocked, detail), + CommandSupport("backup-database", "PostgreSQL + pg_dump", blocked, detail), + CommandSupport("restore-database", "PostgreSQL + pg_restore/psql", blocked, detail), + CommandSupport("fulltext install", "PostgreSQL", blocked, detail), + CommandSupport("fulltext populate", "PostgreSQL", blocked, detail), + CommandSupport("fulltext drop", "PostgreSQL", blocked, detail), + CommandSupport("reset-sequences", "PostgreSQL", blocked, detail), + CommandSupport("truncate-tables", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys disable", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys enable", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys enable --strict", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys status", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys validate", "PostgreSQL", blocked, detail), + ) + + +def _command_support_for_backend( + *, + backend: str, + engine_created: bool, + engine_error: str | None, + connection_ready: bool, + connection_error: str | None, + pg_dump_path: str | None, + pg_restore_path: str | None, + psql_path: str | None, +) -> tuple[CommandSupport, ...]: + current_backend = backend_label(backend) + if not engine_created: + blocked_detail = ( + f"Backend resolved to {current_backend}, but the engine could not be created: {engine_error}" + if engine_error + else f"Backend resolved to {current_backend}, but the engine could not be created." + ) + else: + blocked_detail = ( + f"Backend resolved to {current_backend}, but the connection test failed: {connection_error}" + if connection_error + else f"Backend resolved to {current_backend}, but the connection test failed." + ) + portable_status = "ready" if connection_ready else "blocked" + portable_detail = ( + f"Ready on {current_backend}." if connection_ready else blocked_detail + ) + + if backend == Dialect.POSTGRESQL: + analyze_status = portable_status + analyze_detail = ( + "Ready on PostgreSQL; ANALYZE and VACUUM ANALYZE are both supported." + if connection_ready + else blocked_detail + ) + enable_indexes_status = portable_status + enable_indexes_detail = ( + "Ready on PostgreSQL; index DDL and clustering metadata are both supported." + if connection_ready + else blocked_detail + ) + postgresql_status = portable_status + postgresql_detail = "Ready on PostgreSQL." if connection_ready else blocked_detail + vocab_load_status = portable_status + vocab_load_detail = ( + "Ready on PostgreSQL when an Athena source path is configured." + if connection_ready + else blocked_detail + ) + elif backend == "sqlite": + analyze_status = "limited" if connection_ready else "blocked" + analyze_detail = ( + "Ready on SQLite; ANALYZE is supported, but `--vacuum` is unavailable." + if connection_ready + else blocked_detail + ) + enable_indexes_status = "limited" if connection_ready else "blocked" + enable_indexes_detail = ( + "Ready on SQLite; index DDL is supported, but clustering metadata will be skipped." + if connection_ready + else blocked_detail + ) + postgresql_status = "unsupported" if connection_ready else "blocked" + postgresql_detail = ( + f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + vocab_load_status = portable_status + vocab_load_detail = ( + "Ready on SQLite when an Athena source path is configured." + if connection_ready + else blocked_detail + ) + else: + analyze_status = "unsupported" if connection_ready else "blocked" + analyze_detail = ( + f"Requires PostgreSQL or SQLite. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + enable_indexes_status = "limited" if connection_ready else "blocked" + enable_indexes_detail = ( + f"Ready on {current_backend}; index DDL is supported, but clustering metadata will be skipped." + if connection_ready + else blocked_detail + ) + postgresql_status = "unsupported" if connection_ready else "blocked" + postgresql_detail = ( + f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + vocab_load_status = "unsupported" if connection_ready else "blocked" + vocab_load_detail = ( + f"Requires SQLite or PostgreSQL plus a configured Athena source path. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + + return ( + CommandSupport("doctor", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("data-summary", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("analyze-tables", "PostgreSQL/SQLite", analyze_status, analyze_detail), + CommandSupport("create-missing-tables", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("indexes disable", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("indexes enable", "Any SQLAlchemy backend", enable_indexes_status, enable_indexes_detail), + CommandSupport("reconcile-schema", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", vocab_load_status, vocab_load_detail), + CommandSupport( + "backup-database", + "PostgreSQL + pg_dump", + ( + "ready" + if connection_ready and backend == Dialect.POSTGRESQL and pg_dump_path is not None + else "blocked" + if backend == Dialect.POSTGRESQL + else "unsupported" + if connection_ready + else "blocked" + ), + ( + "Ready on PostgreSQL; `pg_dump` is available." + if connection_ready and backend == Dialect.POSTGRESQL and pg_dump_path is not None + else "PostgreSQL is configured, but `pg_dump` is not on PATH." + if connection_ready and backend == Dialect.POSTGRESQL + else f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ), + ), + CommandSupport( + "restore-database", + "PostgreSQL + pg_restore/psql", + ( + "ready" + if connection_ready and backend == Dialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) + else "blocked" + if backend == Dialect.POSTGRESQL + else "unsupported" + if connection_ready + else "blocked" + ), + ( + "Ready on PostgreSQL; restore client tooling is available." + if connection_ready and backend == Dialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) + else "PostgreSQL is configured, but neither `pg_restore` nor `psql` is on PATH." + if connection_ready and backend == Dialect.POSTGRESQL + else f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ), + ), + CommandSupport("fulltext install", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("fulltext populate", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("fulltext drop", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("reset-sequences", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("truncate-tables", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys disable", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys enable", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys enable --strict", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys status", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys validate", "PostgreSQL", postgresql_status, postgresql_detail), + ) + + +def collect_maintenance_info( + *, + engine_schema: str | None = None, + db_schema: str | None = None, + dotenv: str | None = None, + vocabulary_included: bool = True, +) -> MaintenanceInfo: + load_environment(dotenv or "") + pg_dump_path = shutil.which("pg_dump") + pg_restore_path = shutil.which("pg_restore") + psql_path = shutil.which("psql") + defaults_file = defaults_path() + dependencies = ( + _dependency_status("sqlalchemy", "sqlalchemy"), + _dependency_status("typer", "typer"), + _dependency_status("rich", "rich"), + _dependency_status("psycopg", "psycopg"), + _dependency_status("psycopg2-binary", "psycopg2"), + _external_dependency_status("pg_dump", "pg_dump"), + _external_dependency_status("pg_restore", "pg_restore"), + _external_dependency_status("psql", "psql"), + ) + managed_tables = select_maintenance_tables( + exclude_categories=(() if vocabulary_included else (TableCategory.VOCABULARY,)) + ) + cli_path = shutil.which("omop-alchemy") + dotenv_exists = None if dotenv is None else os.path.exists(dotenv) + + engine_name: str | None = None + engine_url: str | None = None + backend: str | None = None + engine_created = False + engine_error: str | None = None + connection_ready = False + connection_error: str | None = None + existing_table_count: int | None = None + missing_table_count: int | None = None + + try: + engine_name = get_engine_name(engine_schema) + url = sa.engine.make_url(engine_name) + engine_url = url.render_as_string(hide_password=True) + backend = url.get_backend_name() + except RuntimeError as exc: + engine_error = str(exc) + except Exception as exc: + engine_error = f"Could not resolve engine configuration: {exc}" + + if engine_name is not None: + try: + engine = create_engine_with_dependencies(engine_name, future=True) + engine_created = True + except RuntimeError as exc: + engine_error = str(exc) + except Exception as exc: + engine_error = f"Could not create engine: {exc}" + else: + try: + with engine.connect() as connection: + connection.exec_driver_sql("SELECT 1") + connection_ready = True + missing_tables = collect_missing_tables( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + missing_table_count = len(missing_tables) + existing_table_count = len(managed_tables) - missing_table_count + except SQLAlchemyError as exc: + connection_error = f"{exc.__class__.__name__}: {exc}" + except Exception as exc: + connection_error = str(exc) + finally: + engine.dispose() + + if backend is None: + command_support = _command_support_for_unavailable_engine( + engine_error or "No engine configuration could be resolved." + ) + else: + command_support = _command_support_for_backend( + backend=backend, + engine_created=engine_created, + engine_error=engine_error, + connection_ready=connection_ready, + connection_error=connection_error, + pg_dump_path=pg_dump_path, + pg_restore_path=pg_restore_path, + psql_path=psql_path, + ) + + return MaintenanceInfo( + package_version=_package_version(), + cli_path=cli_path, + pg_dump_path=pg_dump_path, + pg_restore_path=pg_restore_path, + psql_path=psql_path, + defaults_file=str(defaults_file), + defaults_exists=defaults_file.exists(), + dotenv_path=dotenv, + dotenv_exists=dotenv_exists, + engine_schema=engine_schema, + db_schema=db_schema, + engine_url=engine_url, + backend=backend, + engine_created=engine_created, + engine_error=engine_error, + connection_ready=connection_ready, + connection_error=connection_error, + managed_table_count=len(managed_tables), + existing_table_count=existing_table_count, + missing_table_count=missing_table_count, + vocabulary_included=vocabulary_included, + dependencies=dependencies, + command_support=command_support, + ) + + +# --------------------------------------------------------------------------- +# doctor +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DoctorCheck: + name: str + status: str + detail: str + + +@dataclass(frozen=True) +class DoctorRecommendation: + status: str + summary: str + action: str | None + + +@dataclass(frozen=True) +class DoctorReport: + info: MaintenanceInfo + checks: tuple[DoctorCheck, ...] + recommendations: tuple[DoctorRecommendation, ...] + reconciliation: SchemaReconciliationReport | None + foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None + foreign_key_validation: ForeignKeyValidationReport | None + + +def _build_recommendations( + *, + info: MaintenanceInfo, + reconciliation: SchemaReconciliationReport | None, + foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None, + foreign_key_validation: ForeignKeyValidationReport | None, +) -> tuple[DoctorRecommendation, ...]: + recommendations: list[DoctorRecommendation] = [] + + if not info.connection_ready: + recommendations.append( + DoctorRecommendation( + status="failed", + summary="Database connection is not ready for maintenance operations.", + action="Check the engine configuration, backend driver, and target database reachability.", + ) + ) + return tuple(recommendations) + + if info.missing_table_count: + recommendations.append( + DoctorRecommendation( + status="warning", + summary=f"{info.missing_table_count} ORM-managed table(s) are missing from the target database.", + action="Run `omop-alchemy create-missing-tables` before attempting bulk operations.", + ) + ) + + if reconciliation is not None and reconciliation.issues: + recommendations.append( + DoctorRecommendation( + status="warning", + summary=f"Schema reconciliation found {len(reconciliation.issues)} difference(s) against ORM metadata.", + action="Review `omop-alchemy reconcile-schema` output before continuing with ETL or maintenance work.", + ) + ) + + if foreign_key_status is not None and any( + item.disabled_trigger_count > 0 for item in foreign_key_status + ): + recommendations.append( + DoctorRecommendation( + status="warning", + summary="Some PostgreSQL RI triggers are currently disabled.", + action="If loading is complete, run `omop-alchemy foreign-keys validate` and then `omop-alchemy foreign-keys enable --strict`.", + ) + ) + + if ( + foreign_key_validation is not None + and any(result.status == "failed" for result in foreign_key_validation.results) + ): + recommendations.append( + DoctorRecommendation( + status="failed", + summary="Foreign key validation found violating rows.", + action="Fix the reported rows, then rerun `omop-alchemy foreign-keys enable --strict`.", + ) + ) + + if info.backend == Dialect.POSTGRESQL and info.pg_dump_path is None: + recommendations.append( + DoctorRecommendation( + status="warning", + summary="`pg_dump` is not on PATH, so backup-database is unavailable from this machine.", + action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", + ) + ) + + if ( + info.backend == Dialect.POSTGRESQL + and info.pg_restore_path is None + and info.psql_path is None + ): + recommendations.append( + DoctorRecommendation( + status="warning", + summary="Neither `pg_restore` nor `psql` is on PATH, so restore-database is unavailable from this machine.", + action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", + ) + ) + + if not recommendations: + recommendations.append( + DoctorRecommendation( + status="passed", + summary="No obvious maintenance blockers were detected.", + action=None, + ) + ) + + return tuple(recommendations) + + +def collect_doctor_report( + *, + engine_schema: str | None = None, + db_schema: str | None = None, + dotenv: str | None = None, + vocabulary_included: bool = True, + deep: bool = False, +) -> DoctorReport: + load_environment(dotenv or "") + info = collect_maintenance_info( + engine_schema=engine_schema, + db_schema=db_schema, + dotenv=dotenv, + vocabulary_included=vocabulary_included, + ) + + checks = [ + DoctorCheck( + name="connection", + status="passed" if info.connection_ready else "failed", + detail=( + "Target database connection succeeded." + if info.connection_ready + else info.connection_error or info.engine_error or "Connection could not be established." + ), + ) + ] + + reconciliation: SchemaReconciliationReport | None = None + foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None = None + foreign_key_validation: ForeignKeyValidationReport | None = None + + if info.connection_ready: + engine = create_engine_with_dependencies(get_engine_name(engine_schema), future=True) + try: + missing_table_count = info.missing_table_count or 0 + checks.append( + DoctorCheck( + name="managed tables", + status="passed" if missing_table_count == 0 else "warning", + detail=( + "All selected ORM-managed tables exist." + if missing_table_count == 0 + else f"{missing_table_count} selected table(s) are missing." + ), + ) + ) + + if deep: + reconciliation = reconcile_schema( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + checks.append( + DoctorCheck( + name="schema drift", + status="passed" if not reconciliation.issues else "warning", + detail=( + "ORM metadata matches the target database." + if not reconciliation.issues + else f"{len(reconciliation.issues)} difference(s) detected." + ), + ) + ) + else: + checks.append( + DoctorCheck( + name="schema drift", + status="skipped", + detail="Run `omop-alchemy doctor --deep` to reconcile ORM metadata against the target database.", + ) + ) + + if info.backend == Dialect.POSTGRESQL: + foreign_key_status = tuple( + collect_foreign_key_trigger_status( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + ) + disabled_tables = sum( + item.disabled_trigger_count > 0 for item in foreign_key_status + ) + checks.append( + DoctorCheck( + name="foreign keys", + status="passed" if disabled_tables == 0 else "warning", + detail=( + "All inspected RI triggers are enabled." + if disabled_tables == 0 + else f"{disabled_tables} table(s) still have disabled RI triggers." + ), + ) + ) + + if deep: + foreign_key_validation = validate_foreign_key_constraints( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + violating_tables = sum( + result.status == "failed" for result in foreign_key_validation.results + ) + checks.append( + DoctorCheck( + name="foreign key validation", + status="passed" if violating_tables == 0 else "failed", + detail=( + "All selected foreign key relationships passed validation." + if violating_tables == 0 + else f"{violating_tables} table(s) have violating foreign key rows." + ), + ) + ) + else: + checks.append( + DoctorCheck( + name="foreign key validation", + status="skipped", + detail="Run `omop-alchemy doctor --deep` to validate selected foreign key relationships.", + ) + ) + else: + checks.append( + DoctorCheck( + name="foreign keys", + status="skipped", + detail="Foreign key trigger inspection is only available on PostgreSQL.", + ) + ) + checks.append( + DoctorCheck( + name="foreign key validation", + status="skipped", + detail="Foreign key validation is only available on PostgreSQL.", + ) + ) + finally: + engine.dispose() + else: + checks.extend( + ( + DoctorCheck( + name="managed tables", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + DoctorCheck( + name="foreign keys", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + DoctorCheck( + name="schema drift", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + DoctorCheck( + name="foreign key validation", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + ) + ) + + if info.backend == Dialect.POSTGRESQL: + backup_tools_ready = info.pg_dump_path is not None and ( + info.pg_restore_path is not None or info.psql_path is not None + ) + checks.append( + DoctorCheck( + name="backup tooling", + status="passed" if backup_tools_ready else "warning", + detail=( + "PostgreSQL backup and restore client tools are available." + if backup_tools_ready + else "PostgreSQL client tools are incomplete on this machine." + ), + ) + ) + else: + checks.append( + DoctorCheck( + name="backup tooling", + status="skipped", + detail="Backup and restore tooling checks are only relevant for PostgreSQL targets.", + ) + ) + + return DoctorReport( + info=info, + checks=tuple(checks), + recommendations=_build_recommendations( + info=info, + reconciliation=reconciliation, + foreign_key_status=foreign_key_status, + foreign_key_validation=foreign_key_validation, + ), + reconciliation=reconciliation, + foreign_key_status=foreign_key_status, + foreign_key_validation=foreign_key_validation, + ) + + +# --------------------------------------------------------------------------- +# reconcile_schema +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class ReconciliationIssue: + table_name: str + category: TableCategory + component: str + object_name: str + status: str + expected: str | None + actual: str | None + detail: str + + +@dataclass(frozen=True) +class TableReconciliationResult: + table_name: str + category: TableCategory + model_name: str + model_module: str + status: str + issue_count: int + detail: str + + +@dataclass(frozen=True) +class SchemaReconciliationReport: + backend: str + table_results: tuple[TableReconciliationResult, ...] + issues: tuple[ReconciliationIssue, ...] + + +def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: + if db_schema is None: + return table + + metadata = sa.MetaData() + return table.to_metadata( + metadata, + schema=db_schema, + referred_schema_fn=( + lambda _table, to_schema, _constraint, _referred_schema: to_schema + ), + ) + + +def _normalized_type(type_: sa.types.TypeEngine[object], dialect: sa.engine.Dialect) -> str: + return type_.compile(dialect=dialect).lower().replace(" ", "") + + +def _expected_foreign_keys( + table: sa.Table, +) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint]: + expected: dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint] = {} + for constraint in table.foreign_key_constraints: + constrained_columns = tuple(element.parent.name for element in constraint.elements) + referred_columns = tuple(element.column.name for element in constraint.elements) + referred_table = constraint.referred_table.name + expected[(constrained_columns, referred_table, referred_columns)] = constraint + return expected + + +def _actual_foreign_keys( + inspector: sa.Inspector, + table_name: str, + db_schema: str | None, +) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]]: + actual: dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]] = {} + for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): + constrained_columns = tuple(foreign_key.get("constrained_columns") or []) + referred_columns = tuple(foreign_key.get("referred_columns") or []) + referred_table = str(foreign_key.get("referred_table")) + actual[(constrained_columns, referred_table, referred_columns)] = foreign_key + return actual + + +def _expected_indexes(table: sa.Table) -> dict[str, sa.Index]: + return { + str(index.name): index + for index in table.indexes + if index.name is not None + } + + +def _actual_indexes( + inspector: sa.Inspector, + table_name: str, + db_schema: str | None, +) -> dict[str, dict[str, object]]: + return { + str(index["name"]): index + for index in inspector.get_indexes(table_name, schema=db_schema) + if index.get("name") is not None + } + + +def _actual_cluster_index_name( + connection: sa.Connection, + *, + table_name: str, + db_schema: str | None, +) -> str | None: + result = connection.execute( + sa.text( + """ + SELECT i.relname + FROM pg_index ix + JOIN pg_class t ON t.oid = ix.indrelid + JOIN pg_class i ON i.oid = ix.indexrelid + JOIN pg_namespace n ON n.oid = t.relnamespace + WHERE ix.indisclustered + AND t.relname = :table_name + AND (:db_schema IS NULL OR n.nspname = :db_schema) + """ + ), + {"table_name": table_name, "db_schema": db_schema}, + ).scalar_one_or_none() + return str(result) if result is not None else None + + +def reconcile_schema( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = False, +) -> SchemaReconciliationReport: + excluded_categories: tuple[TableCategory, ...] = ( + () if vocabulary_included else (TableCategory.VOCABULARY,) + ) + selected_tables = select_maintenance_tables(exclude_categories=excluded_categories) + inspector = sa.inspect(engine) + all_issues: list[ReconciliationIssue] = [] + table_results: list[TableReconciliationResult] = [] + + with engine.connect() as connection: + for maintenance_table in selected_tables: + table_issues: list[ReconciliationIssue] = [] + exists = inspector.has_table(maintenance_table.table_name, schema=db_schema) + if not exists: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="table", + object_name=maintenance_table.table_name, + status="missing", + expected="present", + actual="absent", + detail="ORM-managed table is missing from the target database.", + ) + ) + table_results.append( + TableReconciliationResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + status="missing", + issue_count=1, + detail="Table is missing from the target database.", + ) + ) + all_issues.extend(table_issues) + continue + + expected_table = _schema_table(maintenance_table.table, db_schema) + expected_columns = {column.name: column for column in expected_table.columns} + actual_columns = { + str(column["name"]): column + for column in inspector.get_columns(maintenance_table.table_name, schema=db_schema) + } + actual_pk_names = tuple( + inspector.get_pk_constraint(maintenance_table.table_name, schema=db_schema).get("constrained_columns") or [] + ) + expected_pk_names = tuple(column.name for column in expected_table.primary_key.columns) + + for column_name, column in expected_columns.items(): + if column_name not in actual_columns: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="missing", + expected=_normalized_type(column.type, engine.dialect), + actual=None, + detail="Column is defined in ORM metadata but missing from the database.", + ) + ) + + for column_name, column in actual_columns.items(): + if column_name not in expected_columns: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="unexpected", + expected=None, + actual=_normalized_type(column["type"], engine.dialect), + detail="Column exists in the database but is not defined in ORM metadata.", + ) + ) + + for column_name in sorted(set(expected_columns).intersection(actual_columns)): + expected_column = expected_columns[column_name] + actual_column = actual_columns[column_name] + expected_type = _normalized_type(expected_column.type, engine.dialect) + actual_type = _normalized_type(actual_column["type"], engine.dialect) + if expected_type != actual_type: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="mismatch", + expected=expected_type, + actual=actual_type, + detail="Column type differs from ORM metadata.", + ) + ) + + expected_nullable = False if column_name in expected_pk_names else bool(expected_column.nullable) + actual_nullable = False if column_name in actual_pk_names else bool(actual_column["nullable"]) + if expected_nullable != actual_nullable: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="mismatch", + expected="nullable" if expected_nullable else "not nullable", + actual="nullable" if actual_nullable else "not nullable", + detail="Column nullability differs from ORM metadata.", + ) + ) + + if expected_pk_names != actual_pk_names: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="primary_key", + object_name=maintenance_table.table_name, + status="mismatch", + expected=", ".join(expected_pk_names), + actual=", ".join(actual_pk_names) if actual_pk_names else None, + detail="Primary key columns differ from ORM metadata.", + ) + ) + + expected_fks = _expected_foreign_keys(expected_table) + actual_fks = _actual_foreign_keys(inspector, maintenance_table.table_name, db_schema) + + for signature, constraint in expected_fks.items(): + if signature not in actual_fks: + constrained_columns, referred_table, referred_columns = signature + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="foreign_key", + object_name=constraint.name or ",".join(constrained_columns), + status="missing", + expected=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", + actual=None, + detail="Foreign key is defined in ORM metadata but missing from the database.", + ) + ) + + for signature, foreign_key in actual_fks.items(): + if signature not in expected_fks: + constrained_columns, referred_table, referred_columns = signature + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="foreign_key", + object_name=str(foreign_key.get("name") or ",".join(constrained_columns)), + status="unexpected", + expected=None, + actual=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", + detail="Foreign key exists in the database but is not defined in ORM metadata.", + ) + ) + + expected_idxs = _expected_indexes(expected_table) + actual_idxs = _actual_indexes(inspector, maintenance_table.table_name, db_schema) + + for index_name, index in expected_idxs.items(): + if index_name not in actual_idxs: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="missing", + expected=", ".join(column.name for column in index.columns), + actual=None, + detail="Index is defined in ORM metadata but missing from the database.", + ) + ) + continue + + actual_index = actual_idxs[index_name] + expected_columns_for_index = tuple(column.name for column in index.columns) + actual_columns_for_index = tuple(actual_index.get("column_names") or []) + if expected_columns_for_index != actual_columns_for_index: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="mismatch", + expected=", ".join(expected_columns_for_index), + actual=", ".join(actual_columns_for_index) if actual_columns_for_index else None, + detail="Index columns differ from ORM metadata.", + ) + ) + if bool(index.unique) != bool(actual_index.get("unique")): + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="mismatch", + expected="unique" if index.unique else "non-unique", + actual="unique" if actual_index.get("unique") else "non-unique", + detail="Index uniqueness differs from ORM metadata.", + ) + ) + + for index_name, index in actual_idxs.items(): + if index_name not in expected_idxs: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="unexpected", + expected=None, + actual=", ".join(index.get("column_names") or []), + detail="Index exists in the database but is not defined in ORM metadata.", + ) + ) + + if engine.dialect.name == Dialect.POSTGRESQL: + expected_cluster = _cluster_target_name(maintenance_table) + actual_cluster = _actual_cluster_index_name( + connection, + table_name=maintenance_table.table_name, + db_schema=db_schema, + ) + if expected_cluster != actual_cluster: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="cluster", + object_name=maintenance_table.table_name, + status=( + "missing" + if expected_cluster and not actual_cluster + else "unexpected" + if actual_cluster and not expected_cluster + else "mismatch" + ), + expected=expected_cluster, + actual=actual_cluster, + detail="Table clustering differs from ORM metadata.", + ) + ) + + table_status = "matched" if not table_issues else "drifted" + table_results.append( + TableReconciliationResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + status=table_status, + issue_count=len(table_issues), + detail=( + "No differences detected." + if not table_issues + else f"{len(table_issues)} difference(s) detected." + ), + ) + ) + all_issues.extend(table_issues) + + return SchemaReconciliationReport( + backend=engine.dialect.name, + table_results=tuple(table_results), + issues=tuple(all_issues), + ) + + +# --------------------------------------------------------------------------- +# create_missing_tables +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class TableCreationResult: + table_name: str + category: TableCategory + model_name: str + model_module: str + status: str + detail: str + + +def _table_dependencies(table: MaintenanceTable) -> tuple[str, ...]: + return tuple( + sorted( + { + constraint.referred_table.name + for constraint in table.table.foreign_key_constraints + } + ) + ) + + +def collect_missing_tables( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = True, +) -> list[MaintenanceTable]: + inspector = sa.inspect(engine) + return missing_maintenance_tables( + inspector, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + + +def create_missing_tables( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = True, + dry_run: bool = False, +) -> list[TableCreationResult]: + inspector = sa.inspect(engine) + missing_tables = collect_missing_tables( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + existing_table_names = set(inspector.get_table_names(schema=db_schema)) + missing_table_names = {table.table_name for table in missing_tables} + + blocked_dependencies: dict[str, tuple[str, ...]] = {} + for maintenance_table in missing_tables: + unresolved_dependencies = tuple( + dependency_name + for dependency_name in _table_dependencies(maintenance_table) + if dependency_name not in existing_table_names + and dependency_name not in missing_table_names + ) + if unresolved_dependencies: + blocked_dependencies[maintenance_table.table_name] = unresolved_dependencies + + creatable_tables = [ + table + for table in missing_tables + if table.table_name not in blocked_dependencies + ] + + results: list[TableCreationResult] = [] + with engine.begin() as connection: + if creatable_tables and not dry_run: + metadata, adjusted_tables = schema_adjusted_metadata( + collect_maintenance_tables(), + db_schema=db_schema, + ) + metadata.create_all( + bind=connection, + tables=[adjusted_tables[table.table_name] for table in creatable_tables], + checkfirst=True, + ) + + for maintenance_table in missing_tables: + blocked = blocked_dependencies.get(maintenance_table.table_name) + results.append( + TableCreationResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + status=( + "blocked" + if blocked is not None + else "planned" + if dry_run + else "created" + ), + detail=( + "table blocked by unresolved dependencies: " + ", ".join(blocked) + if blocked is not None + else "table would be created from ORM metadata" + if dry_run + else "table created from ORM metadata" + ), + ) + ) + + return results + + +# --------------------------------------------------------------------------- +# data_summary +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class TableSummaryResult: + table_name: str + category: TableCategory + model_name: str + model_module: str + primary_key_columns: tuple[str, ...] + exists: bool + row_count: int | None + + +def collect_data_summary( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = False, + existing_only: bool = True, +) -> list[TableSummaryResult]: + inspector = sa.inspect(engine) + tables = select_omop_tables(vocabulary_included=vocabulary_included) + + results: list[TableSummaryResult] = [] + with engine.connect() as connection: + for table in tables: + exists = inspector.has_table(table.table_name, schema=db_schema) + if not exists and existing_only: + continue + + row_count: int | None = None + if exists: + row_count = int( + connection.execute( + sa.text( + f"SELECT COUNT(*) FROM {qualified_table_name(table.table_name, db_schema)}" + ) + ).scalar_one() + ) + + results.append( + TableSummaryResult( + table_name=table.table_name, + category=table.category, + model_name=table.model_name, + model_module=table.model_module, + primary_key_columns=table.primary_key_names, + exists=exists, + row_count=row_count, + ) + ) + + return results + + +# --------------------------------------------------------------------------- +# CLI commands +# --------------------------------------------------------------------------- + +app = typer.Typer(rich_markup_mode="rich") + + +@app.command( + "info", + help="Inspect maintenance CLI readiness, backend compatibility, and current installation state.", +) +def info_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="info", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="inspect", + ) + ) + try: + load_environment(conn.dotenv or "") + with console.status("Inspecting maintenance environment..."): + info = collect_maintenance_info( + dotenv=conn.dotenv, + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + ) + console.print(render_info_environment(info)) + console.print(render_info_database(info)) + console.print(render_info_dependencies(info)) + console.print(render_info_command_support(info.command_support)) + console.print(render_info_summary(info)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "doctor", + help="Run a read-only maintenance health check across connection readiness, schema drift, and FK state.", +) +def doctor_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + deep: bool = typer.Option( + False, + "--deep", + help="Include heavier checks such as PostgreSQL foreign key validation.", + ), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="doctor", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="inspect", + ) + ) + try: + load_environment(conn.dotenv or "") + with console.status("Running maintenance doctor checks..."): + report = collect_doctor_report( + dotenv=conn.dotenv, + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + deep=deep, + ) + console.print(render_info_environment(report.info)) + console.print(render_info_database(report.info)) + console.print(render_doctor_checks(report.checks)) + if deep and report.foreign_key_validation is not None: + console.print(render_foreign_key_validation_issues(report.foreign_key_validation.violations)) + console.print(render_doctor_recommendations(report.recommendations)) + console.print(render_doctor_summary(report, deep=deep)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "reconcile-schema", + help="Compare ORM-managed SQLAlchemy metadata against the current target database schema.", +) +def reconcile_schema_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="reconcile-schema", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="inspect", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Reconciling ORM metadata against target database schema..."): + report = reconcile_schema(engine, db_schema=conn.db_schema, vocabulary_included=vocabulary_included) + console.print(render_reconciliation_results(report.table_results)) + console.print(render_reconciliation_issues(report.issues)) + console.print(render_reconciliation_summary(report)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "create-missing-tables", + help="Create missing ORM-managed OMOP tables from metadata.", +) +def create_missing_tables_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(True, "--vocab/--no-vocab"), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="create-missing-tables", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Creating missing tables..."): + results = create_missing_tables( + engine, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + dry_run=dry_run, + ) + console.print(render_table_creation_results(results)) + console.print(render_table_creation_summary(results, dry_run=dry_run)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "data-summary", + help="Summarise ORM-managed OMOP tables present in the target database.", +) +def data_summary_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + include_missing: bool = typer.Option(False, "--include-missing"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="data-summary", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="inspect", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Collecting table summary..."): + results = collect_data_summary( + engine, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + existing_only=not include_missing, + ) + console.print(render_data_summary_results(results)) + console.print(render_data_summary_summary(results)) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/cli_tables.py b/omop_alchemy/maintenance/cli_tables.py new file mode 100644 index 0000000..8e3b689 --- /dev/null +++ b/omop_alchemy/maintenance/cli_tables.py @@ -0,0 +1,564 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import sqlalchemy as sa +import typer + +from ..backend_support import Dialect, POSTGRESQL_ONLY_HELP, require_backend +from ._cli_utils import build_engine, handle_error, resolve_connection, resolve_selection +from .tables import ( + TableCategory, + TableScope, + qualified_table_name, + resolve_maintenance_tables, + select_omop_tables, +) +from .ui import ( + console, + render_analyze_note, + render_analyze_results, + render_analyze_summary, + render_command_header, + render_error, + render_sequence_reset_results, + render_sequence_reset_summary, + render_truncate_note, + render_truncate_results, + render_truncate_summary, +) + + +# --------------------------------------------------------------------------- +# analyze_tables +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class AnalyzeTableResult: + table_name: str + category: TableCategory + model_name: str + model_module: str + operation: str + status: str + detail: str + + +def analyze_tables( + engine: sa.Engine, + *, + db_schema: str | None = None, + scope: TableScope | None = None, + table_names: tuple[str, ...] | None = None, + vacuum: bool = False, + dry_run: bool = False, +) -> list[AnalyzeTableResult]: + if scope is not None and table_names is not None: + raise RuntimeError("Use either `scope` or `table_names`, not both.") + + require_backend( + engine, + feature="Table analysis", + supported_dialects=(Dialect.POSTGRESQL, Dialect.SQLITE), + ) + + if vacuum and engine.dialect.name != Dialect.POSTGRESQL: + raise RuntimeError( + "VACUUM ANALYZE is only supported for PostgreSQL engines. " + f"Current dialect: '{engine.dialect.name}'." + ) + + selected_tables = resolve_maintenance_tables(scope=scope, table_names=table_names) + inspector = sa.inspect(engine) + operation = "VACUUM ANALYZE" if vacuum else "ANALYZE" + results: list[AnalyzeTableResult] = [] + + connection_factory = ( + engine.connect().execution_options(isolation_level="AUTOCOMMIT") + if vacuum + else engine.connect() + ) + + with connection_factory as connection: + for maintenance_table in selected_tables: + if not inspector.has_table(maintenance_table.table_name, schema=db_schema): + results.append( + AnalyzeTableResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + operation=operation, + status="skipped", + detail="table not present in target database", + ) + ) + continue + + qualified_name = qualified_table_name(maintenance_table.table_name, db_schema) + if not dry_run: + connection.exec_driver_sql(f"{operation} {qualified_name}") + + results.append( + AnalyzeTableResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + operation=operation, + status="planned" if dry_run else "applied", + detail=( + f"{operation.lower()} would run" + if dry_run + else f"{operation.lower()} completed" + ), + ) + ) + + return results + + +# --------------------------------------------------------------------------- +# truncate_tables +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class TruncateTableResult: + table_name: str + category: TableCategory + model_name: str + model_module: str + row_count: int | None + status: str + detail: str + + +def _blocking_foreign_key_references( + inspector: sa.Inspector, + *, + db_schema: str | None, + selected_table_names: set[str], +) -> dict[str, set[str]]: + blockers: dict[str, set[str]] = {} + + for table_name in inspector.get_table_names(schema=db_schema): + if table_name in selected_table_names: + continue + + for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): + referred_table = foreign_key.get("referred_table") + if referred_table not in selected_table_names: + continue + blockers.setdefault(str(referred_table), set()).add(table_name) + + return blockers + + +def _format_blocking_reference_error(blockers: dict[str, set[str]]) -> str: + blocker_parts = [ + f"{table_name} <- {', '.join(sorted(referencing_tables))}" + for table_name, referencing_tables in sorted(blockers.items()) + ] + preview = "; ".join(blocker_parts[:5]) + if len(blocker_parts) > 5: + preview = f"{preview}; +{len(blocker_parts) - 5} more" + + return ( + "Truncation would be blocked by foreign key references from tables outside the current selection. " + f"Blocking references: {preview}. " + "Use `--cascade`, expand the table selection, or disable foreign key trigger enforcement first." + ) + + +def truncate_tables( + engine: sa.Engine, + *, + db_schema: str | None = None, + scope: TableScope | None = None, + table_names: tuple[str, ...] | None = None, + restart_identities: bool = False, + cascade: bool = False, + dry_run: bool = False, +) -> list[TruncateTableResult]: + if scope is not None and table_names is not None: + raise RuntimeError("Use either `scope` or `table_names`, not both.") + if scope is None and table_names is None: + raise RuntimeError("Select tables to truncate with `scope` or `table_names`.") + + require_backend(engine, feature="Table truncation", supported_dialects=(Dialect.POSTGRESQL,)) + + selected_tables = resolve_maintenance_tables(scope=scope, table_names=table_names) + inspector = sa.inspect(engine) + results: list[TruncateTableResult] = [] + existing_tables: list[str] = [] + + with engine.begin() as connection: + for maintenance_table in selected_tables: + if not inspector.has_table(maintenance_table.table_name, schema=db_schema): + results.append( + TruncateTableResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + row_count=None, + status="skipped", + detail="table not present in target database", + ) + ) + continue + + row_count = int( + connection.exec_driver_sql( + f"SELECT COUNT(*) FROM {qualified_table_name(maintenance_table.table_name, db_schema)}" + ).scalar_one() + ) + existing_tables.append(maintenance_table.table_name) + results.append( + TruncateTableResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + row_count=row_count, + status="planned" if dry_run else "applied", + detail="table would be truncated" if dry_run else "table truncated", + ) + ) + + if existing_tables and not dry_run and not cascade: + blockers = _blocking_foreign_key_references( + inspector, + db_schema=db_schema, + selected_table_names=set(existing_tables), + ) + if blockers: + raise RuntimeError(_format_blocking_reference_error(blockers)) + + if existing_tables and not dry_run: + truncate_sql = ( + "TRUNCATE TABLE " + + ", ".join( + qualified_table_name(table_name, db_schema) + for table_name in existing_tables + ) + ) + if restart_identities: + truncate_sql += " RESTART IDENTITY" + if cascade: + truncate_sql += " CASCADE" + connection.exec_driver_sql(truncate_sql) + + return results + + +# --------------------------------------------------------------------------- +# reset_sequences +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class SequenceTarget: + table_name: str + category: TableCategory + model_name: str + model_module: str + pk_column_name: str + + +@dataclass(frozen=True) +class SequenceResetResult: + table_name: str + category: TableCategory + model_name: str + model_module: str + pk_column_name: str + sequence_name: str | None + next_value: int | None + status: str + detail: str + + +def collect_sequence_targets( + *, + vocabulary_included: bool = False, +) -> list[SequenceTarget]: + targets: list[SequenceTarget] = [] + for table in select_omop_tables( + vocabulary_included=vocabulary_included, + require_single_integer_primary_key=True, + ): + pk_column_name = table.single_primary_key_name + if pk_column_name is None: + continue + targets.append( + SequenceTarget( + table_name=table.table_name, + category=table.category, + model_name=table.model_name, + model_module=table.model_module, + pk_column_name=pk_column_name, + ) + ) + return targets + + +def reset_model_sequences( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = False, + dry_run: bool = False, +) -> list[SequenceResetResult]: + require_backend(engine, feature="Sequence reset", supported_dialects=(Dialect.POSTGRESQL,)) + + inspector = sa.inspect(engine) + targets = collect_sequence_targets(vocabulary_included=vocabulary_included) + results: list[SequenceResetResult] = [] + + with engine.begin() as connection: + for target in targets: + if not inspector.has_table(target.table_name, schema=db_schema): + continue + + fully_qualified_table_name = qualified_table_name(target.table_name, db_schema) + sequence_name = connection.execute( + sa.text("SELECT pg_get_serial_sequence(:table_name, :column_name)"), + { + "table_name": fully_qualified_table_name, + "column_name": target.pk_column_name, + }, + ).scalar_one_or_none() + + if sequence_name is None: + results.append( + SequenceResetResult( + table_name=target.table_name, + category=target.category, + model_name=target.model_name, + model_module=target.model_module, + pk_column_name=target.pk_column_name, + sequence_name=None, + next_value=None, + status="skipped", + detail="no owned PostgreSQL sequence found", + ) + ) + continue + + current_max = connection.execute( + sa.text( + f"SELECT COALESCE(MAX({target.pk_column_name}), 0) " + f"FROM {fully_qualified_table_name}" + ) + ).scalar_one() + next_value = int(current_max) + 1 + + if not dry_run: + connection.execute( + sa.text("SELECT setval(:sequence_name, :next_value, false)"), + {"sequence_name": sequence_name, "next_value": next_value}, + ) + + results.append( + SequenceResetResult( + table_name=target.table_name, + category=target.category, + model_name=target.model_name, + model_module=target.model_module, + pk_column_name=target.pk_column_name, + sequence_name=sequence_name, + next_value=next_value, + status="planned" if dry_run else "reset", + detail=( + "sequence would be reset from table max + 1" + if dry_run + else "sequence reset from table max + 1" + ), + ) + ) + + return results + + +# --------------------------------------------------------------------------- +# CLI commands +# --------------------------------------------------------------------------- + +app = typer.Typer(rich_markup_mode="rich") + + +@app.command( + "analyze-tables", + help="Refresh planner statistics for selected ORM-managed tables.", +) +def analyze_tables_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + scope: TableScope | None = typer.Option( + None, + "--scope", + help="Category scope to analyze. Defaults to all ORM-managed tables when omitted.", + case_sensitive=False, + ), + table: list[str] | None = typer.Option( + None, + "--table", + help="Specific ORM-managed table name to analyze. Repeat for multiple tables.", + ), + vacuum: bool = typer.Option( + False, + "--vacuum", + help="Use VACUUM ANALYZE instead of ANALYZE. PostgreSQL only.", + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + resolved_scope, resolved_tables = resolve_selection( + scope=scope, tables=table, default_scope=TableScope.ALL + ) + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="analyze-tables", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=None, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Refreshing planner statistics for selected tables..."): + results = analyze_tables( + engine, + db_schema=conn.db_schema, + scope=resolved_scope, + table_names=resolved_tables, + vacuum=vacuum, + dry_run=dry_run, + ) + console.print(render_analyze_results(results)) + console.print(render_analyze_summary(results, dry_run=dry_run)) + console.print(render_analyze_note()) + except Exception as exc: + handle_error(exc) + + +@app.command( + "reset-sequences", + help=f"Reset owned sequences from table max + 1. {POSTGRESQL_ONLY_HELP}", +) +def reset_sequences_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="reset-sequences", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Resetting PostgreSQL sequences..."): + results = reset_model_sequences( + engine, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + dry_run=dry_run, + ) + console.print(render_sequence_reset_results(results)) + console.print(render_sequence_reset_summary(results, dry_run=dry_run)) + except Exception as exc: + handle_error(exc) + + +@app.command( + "truncate-tables", + help=f"Truncate selected ORM-managed tables. {POSTGRESQL_ONLY_HELP}", +) +def truncate_tables_command( + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option(None, help="Database schema override."), + scope: TableScope | None = typer.Option( + None, + "--scope", + help="Category scope to truncate.", + case_sensitive=False, + ), + table: list[str] | None = typer.Option( + None, + "--table", + help="Specific ORM-managed table name to truncate. Repeat for multiple tables.", + ), + restart_identities: bool = typer.Option( + False, + "--restart-identities", + help="Restart owned identities during truncation.", + ), + cascade: bool = typer.Option( + False, + "--cascade", + help="Include dependent tables via PostgreSQL CASCADE.", + ), + yes: bool = typer.Option( + False, + "--yes", + help="Confirm that you want to apply this destructive operation.", + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + resolved_scope, resolved_tables = resolve_selection(scope=scope, tables=table) + if resolved_scope is None and resolved_tables is None: + console.print( + render_error("Select tables to truncate with `--scope` or one or more `--table` values.") + ) + raise typer.Exit(code=1) + if not dry_run and not yes: + console.print( + render_error("Truncation is destructive. Re-run with `--yes`, or use `--dry-run` first.") + ) + raise typer.Exit(code=1) + + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print( + render_command_header( + command_name="truncate-tables", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=None, + mode_label="dry-run" if dry_run else "apply", + ) + ) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + with console.status("Truncating selected tables..."): + results = truncate_tables( + engine, + db_schema=conn.db_schema, + scope=resolved_scope, + table_names=resolved_tables, + restart_identities=restart_identities, + cascade=cascade, + dry_run=dry_run, + ) + console.print(render_truncate_results(results)) + console.print( + render_truncate_summary( + results, + dry_run=dry_run, + restart_identities=restart_identities, + cascade=cascade, + ) + ) + console.print(render_truncate_note()) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/load_vocab.py b/omop_alchemy/maintenance/cli_vocab.py similarity index 71% rename from omop_alchemy/maintenance/load_vocab.py rename to omop_alchemy/maintenance/cli_vocab.py index 029a86d..6d7deae 100644 --- a/omop_alchemy/maintenance/load_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -7,7 +7,9 @@ import sqlalchemy as sa import sqlalchemy.orm as so +import typer from orm_loader.tables.typing import CSVTableProtocol +from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeElapsedColumn from omop_alchemy.cdm.model.vocabulary import ( Concept, @@ -23,8 +25,18 @@ ) from ..backend_support import Dialect, require_backend -from .reset_sequences import reset_model_sequences +from ._cli_utils import build_engine, handle_error, resolve_connection +from .cli_foreign_keys import ForeignKeyAction, manage_foreign_key_triggers +from .cli_indexes import IndexAction, manage_indexes +from .cli_tables import reset_model_sequences from .tables import TableCategory, schema_adjusted_metadata, select_maintenance_tables +from .ui import ( + console, + render_command_header, + render_error, + render_vocab_load_results, + render_vocab_load_summary, +) MergeStrategy: TypeAlias = Literal["replace", "upsert", "insert_if_empty"] @@ -98,6 +110,7 @@ class _VocabularyLoadItem: ), ) + class VocabularyLoadError(RuntimeError): """Raised when a single Athena vocabulary table load fails.""" @@ -154,10 +167,12 @@ def _load_vocab_model_csv( merge_strategy: MergeStrategy, quote_mode: str = "auto", chunksize: int | None = None, + index_strategy: str = "auto", ) -> int: load_kwargs: dict[str, object] = { "merge_strategy": merge_strategy, "quote_mode": quote_mode, + "index_strategy": index_strategy, } if chunksize is not None: load_kwargs["chunksize"] = chunksize @@ -167,7 +182,7 @@ def _load_vocab_model_csv( model.load_csv( session, csv_path, - **load_kwargs, # type: ignore[arg-type] + **load_kwargs, # type: ignore[arg-type] ) ) except Exception as exc: @@ -180,7 +195,7 @@ def _load_vocab_model_csv( model.load_csv( session, csv_path, - **load_kwargs, # type: ignore[arg-type] + **load_kwargs, # type: ignore[arg-type] ) ) @@ -267,6 +282,7 @@ def _configure_loader_connection( quoted_schema = '"' + db_schema.replace('"', '""') + '"' connection.exec_driver_sql(f"SET search_path TO {quoted_schema}") + def load_vocab_source( engine: sa.Engine, *, @@ -275,6 +291,7 @@ def load_vocab_source( dry_run: bool = False, merge_strategy: MergeStrategy = "replace", chunksize: int | None = 100_000, + bulk_mode: bool = True, progress_callback: VocabularyLoadProgressCallback | None = None, ) -> VocabularyLoadReport: _ensure_supported_backend(engine) @@ -344,6 +361,27 @@ def load_vocab_source( detail=f"Preparing Athena vocabulary load for {table_count} CSV file(s)", ) + _use_bulk_mode = ( + bulk_mode + and not dry_run + and engine.dialect.name == Dialect.POSTGRESQL + ) + if _use_bulk_mode: + manage_foreign_key_triggers( + engine, + action=ForeignKeyAction.DISABLE, + vocabulary_included=True, + db_schema=db_schema, + dry_run=False, + ) + manage_indexes( + engine, + action=IndexAction.DISABLE, + vocabulary_included=True, + db_schema=db_schema, + dry_run=False, + ) + with engine.connect() as connection: _configure_loader_connection( connection, @@ -410,6 +448,7 @@ def load_vocab_source( "csv_path": csv_path, "merge_strategy": merge_strategy, "quote_mode": "auto", + "index_strategy": "keep" if _use_bulk_mode else "auto", } if chunksize is not None: loader_kwargs["chunksize"] = chunksize @@ -429,7 +468,7 @@ def load_vocab_source( row_count = _load_vocab_model_csv( session, - **loader_kwargs, # type: ignore[arg-type] + **loader_kwargs, # type: ignore[arg-type] ) completed_units += item.size_bytes * LOAD_PROGRESS_FRACTION @@ -488,15 +527,37 @@ def load_vocab_source( session.rollback() if not dry_run: connection.rollback() + recovery = ( + " Indexes and FK triggers may still be disabled; run " + "'omop-alchemy indexes enable --vocab' and 'omop-alchemy foreign-keys enable' to recover." + if _use_bulk_mode else "" + ) raise VocabularyLoadError( "Athena vocabulary load failed for " f"table `{current_model_name or 'unknown'}` from `{current_csv_path or '-'}` " f"using merge strategy `{merge_strategy}` on backend `{engine.dialect.name}`. " f"Underlying error: {exc.__class__.__name__}: {exc}" + + recovery ) from exc finally: session.close() + if _use_bulk_mode: + manage_indexes( + engine, + action=IndexAction.ENABLE, + vocabulary_included=True, + db_schema=db_schema, + dry_run=False, + ) + manage_foreign_key_triggers( + engine, + action=ForeignKeyAction.ENABLE, + vocabulary_included=True, + db_schema=db_schema, + dry_run=False, + ) + results.extend(missing_optional_results) if not dry_run and engine.dialect.name == Dialect.POSTGRESQL: @@ -520,3 +581,114 @@ def load_vocab_source( sequence_reset_count=sequence_reset_count, results=tuple(results), ) + + +app = typer.Typer(rich_markup_mode="rich") + + +@app.command( + "load-vocab-source", + help="Load Athena vocabulary CSV files from a configured source path using the ORM staged CSV loader.", +) +def load_vocab_source_command( + athena_source: str | None = typer.Option( + None, help="Path to unzipped Athena vocabulary CSV files." + ), + dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), + engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + db_schema: str | None = typer.Option( + None, + help="Database schema override. PostgreSQL only; uses search_path for ORM CSV loading.", + ), + merge_strategy: MergeStrategy = typer.Option( + "replace", + help=( + "CSV merge strategy. `replace` (default) keeps the DB in sync with the source. " + "`upsert` is incremental and non-destructive. " + "`insert_if_empty` is the fast path for a fresh empty target." + ), + ), + chunksize: int | None = typer.Option( + 100_000, + help="Chunk size for fallback ORM CSV loading. Defaults to 100 000 rows; pass 0 to disable chunking.", + ), + bulk_mode: bool = typer.Option( + True, + "--bulk-mode/--no-bulk-mode", + help=( + "Disable FK triggers and drop indexes globally before loading, then rebuild after. " + "Much faster than per-table management for a full vocabulary reload. " + "PostgreSQL only; ignored on SQLite. " + "If the load fails mid-way, run `indexes enable --vocab` and `foreign-keys enable` to recover." + ), + ), + dry_run: bool = typer.Option(False, "--dry-run"), +) -> None: + conn = resolve_connection( + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, + athena_source=athena_source, + ) + console.print( + render_command_header( + command_name="load-vocab-source", + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=True, + mode_label="dry-run" if dry_run else "apply", + ) + ) + + if conn.athena_source is None: + console.print( + render_error( + "No Athena vocabulary source path is configured. " + "Set it with `omop-alchemy config set-overrides --athena-source ` " + "or pass `--athena-source`." + ) + ) + raise typer.Exit(code=1) + + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + + with Progress( + SpinnerColumn(), + TextColumn("[bold cyan]{task.description}"), + BarColumn(bar_width=None), + TaskProgressColumn(), + TimeElapsedColumn(), + console=console, + transient=False, + ) as progress: + task_id = progress.add_task( + "Preparing Athena vocabulary load...", total=100.0, completed=0 + ) + completed_tables: list[str] = [] + + def _update_progress(event: VocabularyLoadProgress) -> None: + progress.update(task_id, completed=event.percent, description=event.detail) + if event.phase == "commit-complete" and event.table_name is not None: + completed_tables.append(event.table_name) + progress.console.print( + f"[green]loaded[/green] [bold]{event.table_name}[/bold] " + f"({len(completed_tables)}/{event.table_count})" + ) + + report = load_vocab_source( + engine, + source_path=conn.athena_source, + db_schema=conn.db_schema, + dry_run=dry_run, + merge_strategy=merge_strategy, + chunksize=None if chunksize == 0 else chunksize, + bulk_mode=bulk_mode, + progress_callback=_update_progress, + ) + progress.update(task_id, completed=100.0, description="Athena vocabulary load complete") + + console.print(render_vocab_load_results(report.results)) + console.print(render_vocab_load_summary(report, dry_run=dry_run)) + except Exception as exc: + handle_error(exc) diff --git a/omop_alchemy/maintenance/create_tables.py b/omop_alchemy/maintenance/create_tables.py deleted file mode 100644 index 8a41659..0000000 --- a/omop_alchemy/maintenance/create_tables.py +++ /dev/null @@ -1,129 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -import sqlalchemy as sa - -from .tables import ( - MaintenanceTable, - TableCategory, - collect_maintenance_tables, - missing_maintenance_tables, - schema_adjusted_metadata, -) - - -@dataclass(frozen=True) -class TableCreationResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - status: str - detail: str - - -def _table_dependencies(table: MaintenanceTable) -> tuple[str, ...]: - return tuple( - sorted( - { - constraint.referred_table.name - for constraint in table.table.foreign_key_constraints - } - ) - ) - - -def collect_missing_tables( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = True, -) -> list[MaintenanceTable]: - inspector = sa.inspect(engine) - return missing_maintenance_tables( - inspector, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - - -def create_missing_tables( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = True, - dry_run: bool = False, -) -> list[TableCreationResult]: - inspector = sa.inspect(engine) - missing_tables = collect_missing_tables( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - existing_table_names = set(inspector.get_table_names(schema=db_schema)) - missing_table_names = { - table.table_name - for table in missing_tables - } - - blocked_dependencies: dict[str, tuple[str, ...]] = {} - for maintenance_table in missing_tables: - unresolved_dependencies = tuple( - dependency_name - for dependency_name in _table_dependencies(maintenance_table) - if dependency_name not in existing_table_names - and dependency_name not in missing_table_names - ) - if unresolved_dependencies: - blocked_dependencies[maintenance_table.table_name] = unresolved_dependencies - - creatable_tables = [ - table - for table in missing_tables - if table.table_name not in blocked_dependencies - ] - - results: list[TableCreationResult] = [] - with engine.begin() as connection: - if creatable_tables and not dry_run: - metadata, adjusted_tables = schema_adjusted_metadata( - collect_maintenance_tables(), - db_schema=db_schema, - ) - metadata.create_all( - bind=connection, - tables=[ - adjusted_tables[table.table_name] - for table in creatable_tables - ], - checkfirst=True, - ) - - for maintenance_table in missing_tables: - blocked = blocked_dependencies.get(maintenance_table.table_name) - results.append( - TableCreationResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - status=( - "blocked" - if blocked is not None - else "planned" - if dry_run - else "created" - ), - detail=( - "table blocked by unresolved dependencies: " - + ", ".join(blocked) - if blocked is not None - else "table would be created from ORM metadata" - if dry_run - else "table created from ORM metadata" - ), - ) - ) - - return results diff --git a/omop_alchemy/maintenance/data_summary.py b/omop_alchemy/maintenance/data_summary.py deleted file mode 100644 index 272a09f..0000000 --- a/omop_alchemy/maintenance/data_summary.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -import sqlalchemy as sa - -from .tables import TableCategory, qualified_table_name, select_omop_tables - - -@dataclass(frozen=True) -class TableSummaryResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - primary_key_columns: tuple[str, ...] - exists: bool - row_count: int | None -def collect_data_summary( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = False, - existing_only: bool = True, -) -> list[TableSummaryResult]: - inspector = sa.inspect(engine) - tables = select_omop_tables(vocabulary_included=vocabulary_included) - - results: list[TableSummaryResult] = [] - with engine.connect() as connection: - for table in tables: - exists = inspector.has_table(table.table_name, schema=db_schema) - if not exists and existing_only: - continue - - row_count: int | None = None - if exists: - row_count = int( - connection.execute( - sa.text( - f"SELECT COUNT(*) FROM {qualified_table_name(table.table_name, db_schema)}" - ) - ).scalar_one() - ) - - results.append( - TableSummaryResult( - table_name=table.table_name, - category=table.category, - model_name=table.model_name, - model_module=table.model_module, - primary_key_columns=table.primary_key_names, - exists=exists, - row_count=row_count, - ) - ) - - return results diff --git a/omop_alchemy/maintenance/doctor.py b/omop_alchemy/maintenance/doctor.py deleted file mode 100644 index 91b1cbd..0000000 --- a/omop_alchemy/maintenance/doctor.py +++ /dev/null @@ -1,346 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment - -from ..backend_support import Dialect -from .foreign_keys import ( - ForeignKeyStatusResult, - ForeignKeyValidationReport, - collect_foreign_key_trigger_status, - validate_foreign_key_constraints, -) -from .info import MaintenanceInfo, collect_maintenance_info -from .reconcile import SchemaReconciliationReport, reconcile_schema - - -@dataclass(frozen=True) -class DoctorCheck: - name: str - status: str - detail: str - - -@dataclass(frozen=True) -class DoctorRecommendation: - status: str - summary: str - action: str | None - - -@dataclass(frozen=True) -class DoctorReport: - info: MaintenanceInfo - checks: tuple[DoctorCheck, ...] - recommendations: tuple[DoctorRecommendation, ...] - reconciliation: SchemaReconciliationReport | None - foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None - foreign_key_validation: ForeignKeyValidationReport | None - - -def _build_recommendations( - *, - info: MaintenanceInfo, - reconciliation: SchemaReconciliationReport | None, - foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None, - foreign_key_validation: ForeignKeyValidationReport | None, -) -> tuple[DoctorRecommendation, ...]: - recommendations: list[DoctorRecommendation] = [] - - if not info.connection_ready: - recommendations.append( - DoctorRecommendation( - status="failed", - summary="Database connection is not ready for maintenance operations.", - action="Check the engine configuration, backend driver, and target database reachability.", - ) - ) - return tuple(recommendations) - - if info.missing_table_count: - recommendations.append( - DoctorRecommendation( - status="warning", - summary=f"{info.missing_table_count} ORM-managed table(s) are missing from the target database.", - action="Run `omop-alchemy create-missing-tables` before attempting bulk operations.", - ) - ) - - if reconciliation is not None and reconciliation.issues: - recommendations.append( - DoctorRecommendation( - status="warning", - summary=f"Schema reconciliation found {len(reconciliation.issues)} difference(s) against ORM metadata.", - action="Review `omop-alchemy reconcile-schema` output before continuing with ETL or maintenance work.", - ) - ) - - if foreign_key_status is not None and any( - item.disabled_trigger_count > 0 - for item in foreign_key_status - ): - recommendations.append( - DoctorRecommendation( - status="warning", - summary="Some PostgreSQL RI triggers are currently disabled.", - action="If loading is complete, run `omop-alchemy foreign-keys validate` and then `omop-alchemy foreign-keys enable --strict`.", - ) - ) - - if ( - foreign_key_validation is not None - and any(result.status == "failed" for result in foreign_key_validation.results) - ): - recommendations.append( - DoctorRecommendation( - status="failed", - summary="Foreign key validation found violating rows.", - action="Fix the reported rows, then rerun `omop-alchemy foreign-keys enable --strict`.", - ) - ) - - if info.backend == Dialect.POSTGRESQL and info.pg_dump_path is None: - recommendations.append( - DoctorRecommendation( - status="warning", - summary="`pg_dump` is not on PATH, so backup-database is unavailable from this machine.", - action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", - ) - ) - - if ( - info.backend == Dialect.POSTGRESQL - and info.pg_restore_path is None - and info.psql_path is None - ): - recommendations.append( - DoctorRecommendation( - status="warning", - summary="Neither `pg_restore` nor `psql` is on PATH, so restore-database is unavailable from this machine.", - action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", - ) - ) - - if not recommendations: - recommendations.append( - DoctorRecommendation( - status="passed", - summary="No obvious maintenance blockers were detected.", - action=None, - ) - ) - - return tuple(recommendations) - - -def collect_doctor_report( - *, - engine_schema: str | None = None, - db_schema: str | None = None, - dotenv: str | None = None, - vocabulary_included: bool = True, - deep: bool = False, -) -> DoctorReport: - load_environment(dotenv or "") - info = collect_maintenance_info( - engine_schema=engine_schema, - db_schema=db_schema, - dotenv=dotenv, - vocabulary_included=vocabulary_included, - ) - - checks = [ - DoctorCheck( - name="connection", - status="passed" if info.connection_ready else "failed", - detail=( - "Target database connection succeeded." - if info.connection_ready - else info.connection_error or info.engine_error or "Connection could not be established." - ), - ) - ] - - reconciliation: SchemaReconciliationReport | None = None - foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None = None - foreign_key_validation: ForeignKeyValidationReport | None = None - - if info.connection_ready: - engine = create_engine_with_dependencies( - get_engine_name(engine_schema), - future=True, - ) - try: - missing_table_count = info.missing_table_count or 0 - checks.append( - DoctorCheck( - name="managed tables", - status="passed" if missing_table_count == 0 else "warning", - detail=( - "All selected ORM-managed tables exist." - if missing_table_count == 0 - else f"{missing_table_count} selected table(s) are missing." - ), - ) - ) - - if deep: - reconciliation = reconcile_schema( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - checks.append( - DoctorCheck( - name="schema drift", - status="passed" if not reconciliation.issues else "warning", - detail=( - "ORM metadata matches the target database." - if not reconciliation.issues - else f"{len(reconciliation.issues)} difference(s) detected." - ), - ) - ) - else: - checks.append( - DoctorCheck( - name="schema drift", - status="skipped", - detail="Run `omop-alchemy doctor --deep` to reconcile ORM metadata against the target database.", - ) - ) - - if info.backend == Dialect.POSTGRESQL: - foreign_key_status = tuple( - collect_foreign_key_trigger_status( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - ) - disabled_tables = sum( - item.disabled_trigger_count > 0 - for item in foreign_key_status - ) - checks.append( - DoctorCheck( - name="foreign keys", - status="passed" if disabled_tables == 0 else "warning", - detail=( - "All inspected RI triggers are enabled." - if disabled_tables == 0 - else f"{disabled_tables} table(s) still have disabled RI triggers." - ), - ) - ) - - if deep: - foreign_key_validation = validate_foreign_key_constraints( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - violating_tables = sum( - result.status == "failed" - for result in foreign_key_validation.results - ) - checks.append( - DoctorCheck( - name="foreign key validation", - status="passed" if violating_tables == 0 else "failed", - detail=( - "All selected foreign key relationships passed validation." - if violating_tables == 0 - else f"{violating_tables} table(s) have violating foreign key rows." - ), - ) - ) - else: - checks.append( - DoctorCheck( - name="foreign key validation", - status="skipped", - detail="Run `omop-alchemy doctor --deep` to validate selected foreign key relationships.", - ) - ) - else: - checks.append( - DoctorCheck( - name="foreign keys", - status="skipped", - detail="Foreign key trigger inspection is only available on PostgreSQL.", - ) - ) - checks.append( - DoctorCheck( - name="foreign key validation", - status="skipped", - detail="Foreign key validation is only available on PostgreSQL.", - ) - ) - finally: - engine.dispose() - else: - checks.extend( - ( - DoctorCheck( - name="managed tables", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - DoctorCheck( - name="foreign keys", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - DoctorCheck( - name="schema drift", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - DoctorCheck( - name="foreign key validation", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - ) - ) - - if info.backend == Dialect.POSTGRESQL: - backup_tools_ready = info.pg_dump_path is not None and ( - info.pg_restore_path is not None or info.psql_path is not None - ) - checks.append( - DoctorCheck( - name="backup tooling", - status="passed" if backup_tools_ready else "warning", - detail=( - "PostgreSQL backup and restore client tools are available." - if backup_tools_ready - else "PostgreSQL client tools are incomplete on this machine." - ), - ) - ) - else: - checks.append( - DoctorCheck( - name="backup tooling", - status="skipped", - detail="Backup and restore tooling checks are only relevant for PostgreSQL targets.", - ) - ) - - return DoctorReport( - info=info, - checks=tuple(checks), - recommendations=_build_recommendations( - info=info, - reconciliation=reconciliation, - foreign_key_status=foreign_key_status, - foreign_key_validation=foreign_key_validation, - ), - reconciliation=reconciliation, - foreign_key_status=foreign_key_status, - foreign_key_validation=foreign_key_validation, - ) diff --git a/omop_alchemy/maintenance/info.py b/omop_alchemy/maintenance/info.py deleted file mode 100644 index aabd11c..0000000 --- a/omop_alchemy/maintenance/info.py +++ /dev/null @@ -1,408 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -import importlib.metadata -import importlib.util -import os -import shutil - -import sqlalchemy as sa -from sqlalchemy.exc import SQLAlchemyError - -from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment - -from ..backend_support import Dialect, backend_label -from .create_tables import collect_missing_tables -from .defaults import defaults_path -from .tables import TableCategory, select_maintenance_tables - - -@dataclass(frozen=True) -class DependencyStatus: - name: str - installed: bool - version: str | None - - -@dataclass(frozen=True) -class CommandSupport: - command_name: str - requirement: str - status: str - detail: str - - -@dataclass(frozen=True) -class MaintenanceInfo: - package_version: str - cli_path: str | None - pg_dump_path: str | None - pg_restore_path: str | None - psql_path: str | None - defaults_file: str - defaults_exists: bool - dotenv_path: str | None - dotenv_exists: bool | None - engine_schema: str | None - db_schema: str | None - engine_url: str | None - backend: str | None - engine_created: bool - engine_error: str | None - connection_ready: bool - connection_error: str | None - managed_table_count: int - existing_table_count: int | None - missing_table_count: int | None - vocabulary_included: bool - dependencies: tuple[DependencyStatus, ...] - command_support: tuple[CommandSupport, ...] - - -def _package_version() -> str: - return importlib.metadata.version("omop-alchemy") - - -def _dependency_status(distribution_name: str, module_name: str) -> DependencyStatus: - installed = importlib.util.find_spec(module_name) is not None - version: str | None = None - if installed: - try: - version = importlib.metadata.version(distribution_name) - except importlib.metadata.PackageNotFoundError: - version = None - return DependencyStatus( - name=distribution_name, - installed=installed, - version=version, - ) - - -def _external_dependency_status(name: str, executable_name: str) -> DependencyStatus: - return DependencyStatus( - name=name, - installed=shutil.which(executable_name) is not None, - version=None, - ) - - -def _command_support_for_unavailable_engine(detail: str) -> tuple[CommandSupport, ...]: - blocked = "blocked" - return ( - CommandSupport("doctor", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("data-summary", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("analyze-tables", "PostgreSQL/SQLite", blocked, detail), - CommandSupport("create-missing-tables", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("indexes disable", "Any SQLAlchemy backend", blocked, detail), - CommandSupport( - "indexes enable", - "Any SQLAlchemy backend", - blocked, - detail, - ), - CommandSupport("reconcile-schema", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", blocked, detail), - CommandSupport("backup-database", "PostgreSQL + pg_dump", blocked, detail), - CommandSupport("restore-database", "PostgreSQL + pg_restore/psql", blocked, detail), - CommandSupport("fulltext install", "PostgreSQL", blocked, detail), - CommandSupport("fulltext populate", "PostgreSQL", blocked, detail), - CommandSupport("fulltext drop", "PostgreSQL", blocked, detail), - CommandSupport("reset-sequences", "PostgreSQL", blocked, detail), - CommandSupport("truncate-tables", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys disable", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys enable", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys enable --strict", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys status", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys validate", "PostgreSQL", blocked, detail), - ) - - -def _command_support_for_backend( - *, - backend: str, - engine_created: bool, - engine_error: str | None, - connection_ready: bool, - connection_error: str | None, - pg_dump_path: str | None, - pg_restore_path: str | None, - psql_path: str | None, -) -> tuple[CommandSupport, ...]: - current_backend = backend_label(backend) - if not engine_created: - blocked_detail = ( - f"Backend resolved to {current_backend}, but the engine could not be created: {engine_error}" - if engine_error - else f"Backend resolved to {current_backend}, but the engine could not be created." - ) - else: - blocked_detail = ( - f"Backend resolved to {current_backend}, but the connection test failed: {connection_error}" - if connection_error - else f"Backend resolved to {current_backend}, but the connection test failed." - ) - portable_status = "ready" if connection_ready else "blocked" - portable_detail = ( - f"Ready on {current_backend}." - if connection_ready - else blocked_detail - ) - - if backend == Dialect.POSTGRESQL: - analyze_status = portable_status - analyze_detail = ( - "Ready on PostgreSQL; ANALYZE and VACUUM ANALYZE are both supported." - if connection_ready - else blocked_detail - ) - enable_indexes_status = portable_status - enable_indexes_detail = ( - "Ready on PostgreSQL; index DDL and clustering metadata are both supported." - if connection_ready - else blocked_detail - ) - postgresql_status = portable_status - postgresql_detail = ( - "Ready on PostgreSQL." - if connection_ready - else blocked_detail - ) - vocab_load_status = portable_status - vocab_load_detail = ( - "Ready on PostgreSQL when an Athena source path is configured." - if connection_ready - else blocked_detail - ) - elif backend == "sqlite": - analyze_status = "limited" if connection_ready else "blocked" - analyze_detail = ( - "Ready on SQLite; ANALYZE is supported, but `--vacuum` is unavailable." - if connection_ready - else blocked_detail - ) - enable_indexes_status = "limited" if connection_ready else "blocked" - enable_indexes_detail = ( - "Ready on SQLite; index DDL is supported, but clustering metadata will be skipped." - if connection_ready - else blocked_detail - ) - postgresql_status = "unsupported" if connection_ready else "blocked" - postgresql_detail = ( - f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - vocab_load_status = portable_status - vocab_load_detail = ( - "Ready on SQLite when an Athena source path is configured." - if connection_ready - else blocked_detail - ) - else: - analyze_status = "unsupported" if connection_ready else "blocked" - analyze_detail = ( - f"Requires PostgreSQL or SQLite. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - enable_indexes_status = "limited" if connection_ready else "blocked" - enable_indexes_detail = ( - f"Ready on {current_backend}; index DDL is supported, but clustering metadata will be skipped." - if connection_ready - else blocked_detail - ) - postgresql_status = "unsupported" if connection_ready else "blocked" - postgresql_detail = ( - f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - vocab_load_status = "unsupported" if connection_ready else "blocked" - vocab_load_detail = ( - f"Requires SQLite or PostgreSQL plus a configured Athena source path. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - - return ( - CommandSupport("doctor", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("data-summary", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("analyze-tables", "PostgreSQL/SQLite", analyze_status, analyze_detail), - CommandSupport("create-missing-tables", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("indexes disable", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("indexes enable", "Any SQLAlchemy backend", enable_indexes_status, enable_indexes_detail), - CommandSupport("reconcile-schema", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", vocab_load_status, vocab_load_detail), - CommandSupport( - "backup-database", - "PostgreSQL + pg_dump", - ( - "ready" - if connection_ready and backend == Dialect.POSTGRESQL and pg_dump_path is not None - else "blocked" - if backend == Dialect.POSTGRESQL - else "unsupported" - if connection_ready - else "blocked" - ), - ( - "Ready on PostgreSQL; `pg_dump` is available." - if connection_ready and backend == Dialect.POSTGRESQL and pg_dump_path is not None - else "PostgreSQL is configured, but `pg_dump` is not on PATH." - if connection_ready and backend == Dialect.POSTGRESQL - else f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ), - ), - CommandSupport( - "restore-database", - "PostgreSQL + pg_restore/psql", - ( - "ready" - if connection_ready and backend == Dialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) - else "blocked" - if backend == Dialect.POSTGRESQL - else "unsupported" - if connection_ready - else "blocked" - ), - ( - "Ready on PostgreSQL; restore client tooling is available." - if connection_ready and backend == Dialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) - else "PostgreSQL is configured, but neither `pg_restore` nor `psql` is on PATH." - if connection_ready and backend == Dialect.POSTGRESQL - else f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ), - ), - CommandSupport("fulltext install", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("fulltext populate", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("fulltext drop", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("reset-sequences", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("truncate-tables", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys disable", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys enable", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys enable --strict", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys status", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys validate", "PostgreSQL", postgresql_status, postgresql_detail), - ) - - -def collect_maintenance_info( - *, - engine_schema: str | None = None, - db_schema: str | None = None, - dotenv: str | None = None, - vocabulary_included: bool = True, -) -> MaintenanceInfo: - load_environment(dotenv or "") - pg_dump_path = shutil.which("pg_dump") - pg_restore_path = shutil.which("pg_restore") - psql_path = shutil.which("psql") - defaults_file = defaults_path() - dependencies = ( - _dependency_status("sqlalchemy", "sqlalchemy"), - _dependency_status("typer", "typer"), - _dependency_status("rich", "rich"), - _dependency_status("psycopg", "psycopg"), - _dependency_status("psycopg2-binary", "psycopg2"), - _external_dependency_status("pg_dump", "pg_dump"), - _external_dependency_status("pg_restore", "pg_restore"), - _external_dependency_status("psql", "psql"), - ) - managed_tables = select_maintenance_tables( - exclude_categories=(() if vocabulary_included else (TableCategory.VOCABULARY,)) - ) - cli_path = shutil.which("omop-alchemy") - dotenv_exists = None if dotenv is None else os.path.exists(dotenv) - - engine_name: str | None = None - engine_url: str | None = None - backend: str | None = None - engine_created = False - engine_error: str | None = None - connection_ready = False - connection_error: str | None = None - existing_table_count: int | None = None - missing_table_count: int | None = None - - try: - engine_name = get_engine_name(engine_schema) - url = sa.engine.make_url(engine_name) - engine_url = url.render_as_string(hide_password=True) - backend = url.get_backend_name() - except RuntimeError as exc: - engine_error = str(exc) - except Exception as exc: - engine_error = f"Could not resolve engine configuration: {exc}" - - if engine_name is not None: - try: - engine = create_engine_with_dependencies(engine_name, future=True) - engine_created = True - except RuntimeError as exc: - engine_error = str(exc) - except Exception as exc: - engine_error = f"Could not create engine: {exc}" - else: - try: - with engine.connect() as connection: - connection.exec_driver_sql("SELECT 1") - connection_ready = True - missing_tables = collect_missing_tables( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - missing_table_count = len(missing_tables) - existing_table_count = len(managed_tables) - missing_table_count - except SQLAlchemyError as exc: - connection_error = f"{exc.__class__.__name__}: {exc}" - except Exception as exc: - connection_error = str(exc) - finally: - engine.dispose() - - if backend is None: - command_support = _command_support_for_unavailable_engine( - engine_error or "No engine configuration could be resolved." - ) - else: - command_support = _command_support_for_backend( - backend=backend, - engine_created=engine_created, - engine_error=engine_error, - connection_ready=connection_ready, - connection_error=connection_error, - pg_dump_path=pg_dump_path, - pg_restore_path=pg_restore_path, - psql_path=psql_path, - ) - - return MaintenanceInfo( - package_version=_package_version(), - cli_path=cli_path, - pg_dump_path=pg_dump_path, - pg_restore_path=pg_restore_path, - psql_path=psql_path, - defaults_file=str(defaults_file), - defaults_exists=defaults_file.exists(), - dotenv_path=dotenv, - dotenv_exists=dotenv_exists, - engine_schema=engine_schema, - db_schema=db_schema, - engine_url=engine_url, - backend=backend, - engine_created=engine_created, - engine_error=engine_error, - connection_ready=connection_ready, - connection_error=connection_error, - managed_table_count=len(managed_tables), - existing_table_count=existing_table_count, - missing_table_count=missing_table_count, - vocabulary_included=vocabulary_included, - dependencies=dependencies, - command_support=command_support, - ) diff --git a/omop_alchemy/maintenance/reconcile.py b/omop_alchemy/maintenance/reconcile.py deleted file mode 100644 index ec3c514..0000000 --- a/omop_alchemy/maintenance/reconcile.py +++ /dev/null @@ -1,432 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -import sqlalchemy as sa - -from ..backend_support import Dialect -from .indexes import _cluster_target_name -from .tables import MaintenanceTable, TableCategory, select_maintenance_tables - - -@dataclass(frozen=True) -class ReconciliationIssue: - table_name: str - category: TableCategory - component: str - object_name: str - status: str - expected: str | None - actual: str | None - detail: str - - -@dataclass(frozen=True) -class TableReconciliationResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - status: str - issue_count: int - detail: str - - -@dataclass(frozen=True) -class SchemaReconciliationReport: - backend: str - table_results: tuple[TableReconciliationResult, ...] - issues: tuple[ReconciliationIssue, ...] - - -def _selected_tables( - *, - vocabulary_included: bool, -) -> list[MaintenanceTable]: - excluded_categories: tuple[TableCategory, ...] = () - if not vocabulary_included: - excluded_categories = (TableCategory.VOCABULARY,) - return select_maintenance_tables(exclude_categories=excluded_categories) - - -def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: - if db_schema is None: - return table - - metadata = sa.MetaData() - return table.to_metadata( - metadata, - schema=db_schema, - referred_schema_fn=( - lambda _table, to_schema, _constraint, _referred_schema: to_schema - ), - ) - - -def _normalized_type(type_: sa.types.TypeEngine[object], dialect: sa.engine.Dialect) -> str: - return type_.compile(dialect=dialect).lower().replace(" ", "") - - -def _expected_foreign_keys(table: sa.Table) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint]: - expected: dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint] = {} - for constraint in table.foreign_key_constraints: - constrained_columns = tuple(element.parent.name for element in constraint.elements) - referred_columns = tuple(element.column.name for element in constraint.elements) - referred_table = constraint.referred_table.name - expected[(constrained_columns, referred_table, referred_columns)] = constraint - return expected - - -def _actual_foreign_keys( - inspector: sa.Inspector, - table_name: str, - db_schema: str | None, -) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]]: - actual: dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]] = {} - for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): - constrained_columns = tuple(foreign_key.get("constrained_columns") or []) - referred_columns = tuple(foreign_key.get("referred_columns") or []) - referred_table = str(foreign_key.get("referred_table")) - actual[(constrained_columns, referred_table, referred_columns)] = foreign_key - return actual - - -def _expected_indexes(table: sa.Table) -> dict[str, sa.Index]: - return { - str(index.name): index - for index in table.indexes - if index.name is not None - } - - -def _actual_indexes( - inspector: sa.Inspector, - table_name: str, - db_schema: str | None, -) -> dict[str, dict[str, object]]: - return { - str(index["name"]): index - for index in inspector.get_indexes(table_name, schema=db_schema) - if index.get("name") is not None - } - - -def _actual_cluster_index_name( - connection: sa.Connection, - *, - table_name: str, - db_schema: str | None, -) -> str | None: - result = connection.execute( - sa.text( - """ - SELECT i.relname - FROM pg_index ix - JOIN pg_class t ON t.oid = ix.indrelid - JOIN pg_class i ON i.oid = ix.indexrelid - JOIN pg_namespace n ON n.oid = t.relnamespace - WHERE ix.indisclustered - AND t.relname = :table_name - AND (:db_schema IS NULL OR n.nspname = :db_schema) - """ - ), - { - "table_name": table_name, - "db_schema": db_schema, - }, - ).scalar_one_or_none() - return str(result) if result is not None else None - - -def reconcile_schema( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = False, -) -> SchemaReconciliationReport: - inspector = sa.inspect(engine) - selected_tables = _selected_tables(vocabulary_included=vocabulary_included) - all_issues: list[ReconciliationIssue] = [] - table_results: list[TableReconciliationResult] = [] - - with engine.connect() as connection: - for maintenance_table in selected_tables: - table_issues: list[ReconciliationIssue] = [] - exists = inspector.has_table(maintenance_table.table_name, schema=db_schema) - if not exists: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="table", - object_name=maintenance_table.table_name, - status="missing", - expected="present", - actual="absent", - detail="ORM-managed table is missing from the target database.", - ) - ) - table_results.append( - TableReconciliationResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - status="missing", - issue_count=1, - detail="Table is missing from the target database.", - ) - ) - all_issues.extend(table_issues) - continue - - expected_table = _schema_table(maintenance_table.table, db_schema) - expected_columns = { - column.name: column - for column in expected_table.columns - } - actual_columns = { - str(column["name"]): column - for column in inspector.get_columns(maintenance_table.table_name, schema=db_schema) - } - actual_pk_names = tuple( - inspector.get_pk_constraint(maintenance_table.table_name, schema=db_schema).get("constrained_columns") or [] - ) - expected_pk_names = tuple(column.name for column in expected_table.primary_key.columns) - - for column_name, column in expected_columns.items(): - if column_name not in actual_columns: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="missing", - expected=_normalized_type(column.type, engine.dialect), - actual=None, - detail="Column is defined in ORM metadata but missing from the database.", - ) - ) - - for column_name, column in actual_columns.items(): - if column_name not in expected_columns: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="unexpected", - expected=None, - actual=_normalized_type(column["type"], engine.dialect), - detail="Column exists in the database but is not defined in ORM metadata.", - ) - ) - - for column_name in sorted(set(expected_columns).intersection(actual_columns)): - expected_column = expected_columns[column_name] - actual_column = actual_columns[column_name] - expected_type = _normalized_type(expected_column.type, engine.dialect) - actual_type = _normalized_type(actual_column["type"], engine.dialect) - if expected_type != actual_type: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="mismatch", - expected=expected_type, - actual=actual_type, - detail="Column type differs from ORM metadata.", - ) - ) - - expected_nullable = False if column_name in expected_pk_names else bool(expected_column.nullable) - actual_nullable = False if column_name in actual_pk_names else bool(actual_column["nullable"]) - if expected_nullable != actual_nullable: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="mismatch", - expected="nullable" if expected_nullable else "not nullable", - actual="nullable" if actual_nullable else "not nullable", - detail="Column nullability differs from ORM metadata.", - ) - ) - - if expected_pk_names != actual_pk_names: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="primary_key", - object_name=maintenance_table.table_name, - status="mismatch", - expected=", ".join(expected_pk_names), - actual=", ".join(actual_pk_names) if actual_pk_names else None, - detail="Primary key columns differ from ORM metadata.", - ) - ) - - expected_foreign_keys = _expected_foreign_keys(expected_table) - actual_foreign_keys = _actual_foreign_keys( - inspector, - maintenance_table.table_name, - db_schema, - ) - - for signature, constraint in expected_foreign_keys.items(): - if signature not in actual_foreign_keys: - constrained_columns, referred_table, referred_columns = signature - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="foreign_key", - object_name=constraint.name or ",".join(constrained_columns), - status="missing", - expected=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", - actual=None, - detail="Foreign key is defined in ORM metadata but missing from the database.", - ) - ) - - for signature, foreign_key in actual_foreign_keys.items(): - if signature not in expected_foreign_keys: - constrained_columns, referred_table, referred_columns = signature - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="foreign_key", - object_name=str(foreign_key.get("name") or ",".join(constrained_columns)), - status="unexpected", - expected=None, - actual=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", - detail="Foreign key exists in the database but is not defined in ORM metadata.", - ) - ) - - expected_indexes = _expected_indexes(expected_table) - actual_indexes = _actual_indexes( - inspector, - maintenance_table.table_name, - db_schema, - ) - - for index_name, index in expected_indexes.items(): - if index_name not in actual_indexes: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="missing", - expected=", ".join(column.name for column in index.columns), - actual=None, - detail="Index is defined in ORM metadata but missing from the database.", - ) - ) - continue - - actual_index = actual_indexes[index_name] - expected_columns_for_index = tuple(column.name for column in index.columns) - actual_columns_for_index = tuple(actual_index.get("column_names") or []) - if expected_columns_for_index != actual_columns_for_index: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="mismatch", - expected=", ".join(expected_columns_for_index), - actual=", ".join(actual_columns_for_index) if actual_columns_for_index else None, - detail="Index columns differ from ORM metadata.", - ) - ) - if bool(index.unique) != bool(actual_index.get("unique")): - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="mismatch", - expected="unique" if index.unique else "non-unique", - actual="unique" if actual_index.get("unique") else "non-unique", - detail="Index uniqueness differs from ORM metadata.", - ) - ) - - for index_name, index in actual_indexes.items(): - if index_name not in expected_indexes: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="unexpected", - expected=None, - actual=", ".join(index.get("column_names") or []), - detail="Index exists in the database but is not defined in ORM metadata.", - ) - ) - - if engine.dialect.name == Dialect.POSTGRESQL: - expected_cluster = _cluster_target_name(maintenance_table) - actual_cluster = _actual_cluster_index_name( - connection, - table_name=maintenance_table.table_name, - db_schema=db_schema, - ) - if expected_cluster != actual_cluster: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="cluster", - object_name=maintenance_table.table_name, - status=( - "missing" - if expected_cluster and not actual_cluster - else "unexpected" - if actual_cluster and not expected_cluster - else "mismatch" - ), - expected=expected_cluster, - actual=actual_cluster, - detail="Table clustering differs from ORM metadata.", - ) - ) - - table_status = "matched" if not table_issues else "drifted" - table_results.append( - TableReconciliationResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - status=table_status, - issue_count=len(table_issues), - detail=( - "No differences detected." - if not table_issues - else f"{len(table_issues)} difference(s) detected." - ), - ) - ) - all_issues.extend(table_issues) - - return SchemaReconciliationReport( - backend=engine.dialect.name, - table_results=tuple(table_results), - issues=tuple(all_issues), - ) diff --git a/omop_alchemy/maintenance/reset_sequences.py b/omop_alchemy/maintenance/reset_sequences.py deleted file mode 100644 index aa92b3d..0000000 --- a/omop_alchemy/maintenance/reset_sequences.py +++ /dev/null @@ -1,139 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -import sqlalchemy as sa - -from ..backend_support import Dialect, require_backend -from .tables import TableCategory, qualified_table_name, select_omop_tables - - -@dataclass(frozen=True) -class SequenceTarget: - table_name: str - category: TableCategory - model_name: str - model_module: str - pk_column_name: str - - -@dataclass(frozen=True) -class SequenceResetResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - pk_column_name: str - sequence_name: str | None - next_value: int | None - status: str - detail: str -def collect_sequence_targets( - *, - vocabulary_included: bool = False, -) -> list[SequenceTarget]: - targets: list[SequenceTarget] = [] - for table in select_omop_tables( - vocabulary_included=vocabulary_included, - require_single_integer_primary_key=True, - ): - pk_column_name = table.single_primary_key_name - if pk_column_name is None: - continue - targets.append( - SequenceTarget( - table_name=table.table_name, - category=table.category, - model_name=table.model_name, - model_module=table.model_module, - pk_column_name=pk_column_name, - ) - ) - return targets -def _ensure_postgresql_supported(engine: sa.Engine) -> None: - require_backend( - engine, - feature="Sequence reset", - supported_dialects=(Dialect.POSTGRESQL,), - ) - - -def reset_model_sequences( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = False, - dry_run: bool = False, -) -> list[SequenceResetResult]: - _ensure_postgresql_supported(engine) - - inspector = sa.inspect(engine) - targets = collect_sequence_targets(vocabulary_included=vocabulary_included) - results: list[SequenceResetResult] = [] - - with engine.begin() as connection: - for target in targets: - if not inspector.has_table(target.table_name, schema=db_schema): - continue - - fully_qualified_table_name = qualified_table_name(target.table_name, db_schema) - sequence_name = connection.execute( - sa.text("SELECT pg_get_serial_sequence(:table_name, :column_name)"), - { - "table_name": fully_qualified_table_name, - "column_name": target.pk_column_name, - }, - ).scalar_one_or_none() - - if sequence_name is None: - results.append( - SequenceResetResult( - table_name=target.table_name, - category=target.category, - model_name=target.model_name, - model_module=target.model_module, - pk_column_name=target.pk_column_name, - sequence_name=None, - next_value=None, - status="skipped", - detail="no owned PostgreSQL sequence found", - ) - ) - continue - - current_max = connection.execute( - sa.text( - f"SELECT COALESCE(MAX({target.pk_column_name}), 0) " - f"FROM {fully_qualified_table_name}" - ) - ).scalar_one() - next_value = int(current_max) + 1 - - if not dry_run: - connection.execute( - sa.text("SELECT setval(:sequence_name, :next_value, false)"), - { - "sequence_name": sequence_name, - "next_value": next_value, - }, - ) - - results.append( - SequenceResetResult( - table_name=target.table_name, - category=target.category, - model_name=target.model_name, - model_module=target.model_module, - pk_column_name=target.pk_column_name, - sequence_name=sequence_name, - next_value=next_value, - status="planned" if dry_run else "reset", - detail=( - "sequence would be reset from table max + 1" - if dry_run - else "sequence reset from table max + 1" - ), - ) - ) - - return results diff --git a/omop_alchemy/maintenance/truncate_tables.py b/omop_alchemy/maintenance/truncate_tables.py deleted file mode 100644 index 9cd4853..0000000 --- a/omop_alchemy/maintenance/truncate_tables.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - -import sqlalchemy as sa - -from ..backend_support import Dialect, require_backend -from .tables import ( - TableCategory, - TableScope, - qualified_table_name, - resolve_maintenance_tables, -) - - -@dataclass(frozen=True) -class TruncateTableResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - row_count: int | None - status: str - detail: str - - -def _blocking_foreign_key_references( - inspector: sa.Inspector, - *, - db_schema: str | None, - selected_table_names: set[str], -) -> dict[str, set[str]]: - blockers: dict[str, set[str]] = {} - - for table_name in inspector.get_table_names(schema=db_schema): - if table_name in selected_table_names: - continue - - for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): - referred_table = foreign_key.get("referred_table") - if referred_table not in selected_table_names: - continue - blockers.setdefault(str(referred_table), set()).add(table_name) - - return blockers - - -def _format_blocking_reference_error(blockers: dict[str, set[str]]) -> str: - blocker_parts = [ - f"{table_name} <- {', '.join(sorted(referencing_tables))}" - for table_name, referencing_tables in sorted(blockers.items()) - ] - preview = "; ".join(blocker_parts[:5]) - if len(blocker_parts) > 5: - preview = f"{preview}; +{len(blocker_parts) - 5} more" - - return ( - "Truncation would be blocked by foreign key references from tables outside the current selection. " - f"Blocking references: {preview}. " - "Use `--cascade`, expand the table selection, or disable foreign key trigger enforcement first." - ) - - -def truncate_tables( - engine: sa.Engine, - *, - db_schema: str | None = None, - scope: TableScope | None = None, - table_names: tuple[str, ...] | None = None, - restart_identities: bool = False, - cascade: bool = False, - dry_run: bool = False, -) -> list[TruncateTableResult]: - if scope is not None and table_names is not None: - raise RuntimeError("Use either `scope` or `table_names`, not both.") - if scope is None and table_names is None: - raise RuntimeError("Select tables to truncate with `scope` or `table_names`.") - - require_backend( - engine, - feature="Table truncation", - supported_dialects=(Dialect.POSTGRESQL,), - ) - - selected_tables = resolve_maintenance_tables( - scope=scope, - table_names=table_names, - ) - inspector = sa.inspect(engine) - results: list[TruncateTableResult] = [] - existing_tables: list[str] = [] - - with engine.begin() as connection: - for maintenance_table in selected_tables: - if not inspector.has_table(maintenance_table.table_name, schema=db_schema): - results.append( - TruncateTableResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - row_count=None, - status="skipped", - detail="table not present in target database", - ) - ) - continue - - row_count = int( - connection.exec_driver_sql( - f"SELECT COUNT(*) FROM {qualified_table_name(maintenance_table.table_name, db_schema)}" - ).scalar_one() - ) - existing_tables.append(maintenance_table.table_name) - results.append( - TruncateTableResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - row_count=row_count, - status="planned" if dry_run else "applied", - detail=( - "table would be truncated" - if dry_run - else "table truncated" - ), - ) - ) - - if existing_tables and not dry_run and not cascade: - blockers = _blocking_foreign_key_references( - inspector, - db_schema=db_schema, - selected_table_names=set(existing_tables), - ) - if blockers: - raise RuntimeError(_format_blocking_reference_error(blockers)) - - if existing_tables and not dry_run: - truncate_sql = ( - "TRUNCATE TABLE " - + ", ".join( - qualified_table_name(table_name, db_schema) - for table_name in existing_tables - ) - ) - if restart_identities: - truncate_sql += " RESTART IDENTITY" - if cascade: - truncate_sql += " CASCADE" - connection.exec_driver_sql(truncate_sql) - - return results diff --git a/omop_alchemy/maintenance/ui.py b/omop_alchemy/maintenance/ui.py index 3bcd4e9..0d2202b 100644 --- a/omop_alchemy/maintenance/ui.py +++ b/omop_alchemy/maintenance/ui.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Iterable +from typing import TYPE_CHECKING from rich import box from rich.console import Console, Group, RenderableType @@ -10,29 +11,36 @@ from omop_alchemy.cdm.handlers.fulltext import FullTextResult -from .analyze_tables import AnalyzeTableResult -from .ascii import render_banner -from .backup import DatabaseBackupResult, DatabaseRestoreResult from ..backend_support import backend_label -from .create_tables import TableCreationResult -from .data_summary import TableSummaryResult -from .defaults import ConnectionDefaults -from .doctor import DoctorCheck, DoctorRecommendation, DoctorReport -from .foreign_keys import ( - ForeignKeyAction, - ForeignKeyManagementResult, - ForeignKeyStatusResult, - ForeignKeyConstraintViolation, - ForeignKeyValidationReport, - ForeignKeyValidationResult, -) -from .info import CommandSupport, MaintenanceInfo -from .indexes import IndexAction, IndexManagementResult -from .load_vocab import VocabularyLoadReport, VocabularyLoadResult -from .reconcile import ReconciliationIssue, SchemaReconciliationReport, TableReconciliationResult -from .reset_sequences import SequenceResetResult +from .ascii import render_banner from .tables import TableCategory -from .truncate_tables import TruncateTableResult + +if TYPE_CHECKING: + from .cli_backup import DatabaseBackupResult, DatabaseRestoreResult + from .cli_config import ConnectionDefaults + from .cli_foreign_keys import ( + ForeignKeyAction, + ForeignKeyConstraintViolation, + ForeignKeyManagementResult, + ForeignKeyStatusResult, + ForeignKeyValidationReport, + ForeignKeyValidationResult, + ) + from .cli_indexes import IndexAction, IndexManagementResult + from .cli_schema import ( + CommandSupport, + DoctorCheck, + DoctorRecommendation, + DoctorReport, + MaintenanceInfo, + ReconciliationIssue, + SchemaReconciliationReport, + TableCreationResult, + TableReconciliationResult, + TableSummaryResult, + ) + from .cli_tables import AnalyzeTableResult, SequenceResetResult, TruncateTableResult + from .cli_vocab import VocabularyLoadReport, VocabularyLoadResult console = Console() @@ -687,7 +695,7 @@ def render_foreign_key_summary(results: Iterable[ForeignKeyManagementResult], *, def render_foreign_key_note(action: ForeignKeyAction, *, strict: bool = False) -> Panel: - if action is ForeignKeyAction.DISABLE: + if action == "disable": body = ( "PostgreSQL keeps the foreign key constraints defined in metadata. " "This command disables the internal RI triggers that enforce them." @@ -898,7 +906,7 @@ def render_index_results(results: Iterable[IndexManagementResult]) -> Renderable def render_index_note(action: IndexAction) -> Panel: body = ( "This command drops SQLAlchemy metadata-defined secondary indexes that currently exist in the database. Primary keys and constraints are not removed." - if action is IndexAction.DISABLE + if action == "disable" else "This command recreates SQLAlchemy metadata-defined secondary indexes that are currently missing from the database and applies PostgreSQL clustering declared in ORM metadata when the backend supports it." ) return Panel.fit(body, title="[bold]Note[/bold]", border_style="yellow") diff --git a/tests/conftest.py b/tests/conftest.py index 6c3422c..26427f7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,7 +11,7 @@ from typing import Any, Dict, Tuple -from omop_alchemy.maintenance.load_vocab import _load_vocab_model_csv +from omop_alchemy.maintenance.cli_vocab import _load_vocab_model_csv from omop_alchemy.cdm.model.clinical import Condition_Occurrence, Person from omop_alchemy.cdm.model.derived import Observation_Period from omop_alchemy.cdm.model.structural import Episode, Episode_Event diff --git a/tests/test_analyze_tables.py b/tests/test_analyze_tables.py index b15ccd8..f8bc7a3 100644 --- a/tests/test_analyze_tables.py +++ b/tests/test_analyze_tables.py @@ -2,9 +2,9 @@ import pytest from typer.testing import CliRunner -from omop_alchemy.maintenance.analyze_tables import AnalyzeTableResult, analyze_tables +from omop_alchemy.maintenance.cli_tables import AnalyzeTableResult, analyze_tables from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.create_tables import create_missing_tables +from omop_alchemy.maintenance.cli_schema import create_missing_tables from omop_alchemy.maintenance.tables import TableCategory, TableScope runner = CliRunner() diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index c997656..a90ef97 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -1,8 +1,8 @@ from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.defaults import defaults_path, load_connection_defaults -from omop_alchemy.maintenance.indexes import IndexAction, IndexManagementResult +from omop_alchemy.maintenance.cli_config import defaults_path, load_connection_defaults +from omop_alchemy.maintenance.cli_indexes import IndexAction, IndexManagementResult from omop_alchemy.maintenance.tables import TableCategory @@ -75,19 +75,19 @@ def fake_manage_indexes( return [] monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.manage_indexes", + "omop_alchemy.maintenance.cli_indexes.manage_indexes", fake_manage_indexes, ) diff --git a/tests/test_create_tables.py b/tests/test_create_tables.py index 149dd35..4dd3484 100644 --- a/tests/test_create_tables.py +++ b/tests/test_create_tables.py @@ -1,6 +1,6 @@ import sqlalchemy as sa -from omop_alchemy.maintenance.create_tables import collect_missing_tables, create_missing_tables +from omop_alchemy.maintenance.cli_schema import collect_missing_tables, create_missing_tables def _engine(tmp_path): diff --git a/tests/test_data_summary.py b/tests/test_data_summary.py index 66a7bac..0b4605d 100644 --- a/tests/test_data_summary.py +++ b/tests/test_data_summary.py @@ -1,7 +1,7 @@ import sqlalchemy as sa -from omop_alchemy.maintenance.create_tables import create_missing_tables -from omop_alchemy.maintenance.data_summary import collect_data_summary +from omop_alchemy.maintenance.cli_schema import create_missing_tables +from omop_alchemy.maintenance.cli_schema import collect_data_summary def _engine(tmp_path): diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py index 3596d5a..61a0609 100644 --- a/tests/test_foreign_keys.py +++ b/tests/test_foreign_keys.py @@ -3,8 +3,8 @@ from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.create_tables import create_missing_tables -from omop_alchemy.maintenance.foreign_keys import ( +from omop_alchemy.maintenance.cli_schema import create_missing_tables +from omop_alchemy.maintenance.cli_foreign_keys import ( ForeignKeyAction, ForeignKeyConstraintViolation, validate_foreign_key_constraints, @@ -83,15 +83,15 @@ def fake_create_engine(url: str, *, future: bool) -> sa.Engine: return sa.create_engine(url, future=future) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) @@ -124,11 +124,11 @@ def begin(self): return _FakeConnection() monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys._ensure_postgresql_supported", + "omop_alchemy.maintenance.cli_foreign_keys._ensure_postgresql_supported", lambda engine, *, feature: None, ) monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys.collect_foreign_key_targets", + "omop_alchemy.maintenance.cli_foreign_keys.collect_foreign_key_targets", lambda engine, *, db_schema=None, vocabulary_included=False: [ type("Target", (), { "table_name": "person", @@ -149,7 +149,7 @@ def begin(self): ], ) monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys._collect_strict_validation_failures", + "omop_alchemy.maintenance.cli_foreign_keys._collect_strict_validation_failures", lambda connection, *, db_schema=None, vocabulary_included=False: { "visit_occurrence": [ ForeignKeyConstraintViolation( @@ -194,11 +194,11 @@ def begin(self): return _FakeConnection() monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys._ensure_postgresql_supported", + "omop_alchemy.maintenance.cli_foreign_keys._ensure_postgresql_supported", lambda engine, *, feature: None, ) monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys.collect_foreign_key_targets", + "omop_alchemy.maintenance.cli_foreign_keys.collect_foreign_key_targets", lambda engine, *, db_schema=None, vocabulary_included=False: [ type("Target", (), { "table_name": "person", @@ -211,7 +211,7 @@ def begin(self): ], ) monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys._collect_strict_validation_failures", + "omop_alchemy.maintenance.cli_foreign_keys._collect_strict_validation_failures", lambda connection, *, db_schema=None, vocabulary_included=False: {}, ) @@ -260,19 +260,19 @@ def fake_manage_foreign_key_triggers( return [] monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.manage_foreign_key_triggers", + "omop_alchemy.maintenance.cli_foreign_keys.manage_foreign_key_triggers", fake_manage_foreign_key_triggers, ) @@ -301,11 +301,11 @@ def connect(self): return _FakeConnection() monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys._ensure_postgresql_supported", + "omop_alchemy.maintenance.cli_foreign_keys._ensure_postgresql_supported", lambda engine, *, feature: None, ) monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys.collect_foreign_key_targets", + "omop_alchemy.maintenance.cli_foreign_keys.collect_foreign_key_targets", lambda engine, *, db_schema=None, vocabulary_included=False: [ type("Target", (), { "table_name": "person", @@ -326,7 +326,7 @@ def connect(self): ], ) monkeypatch.setattr( - "omop_alchemy.maintenance.foreign_keys._collect_strict_validation_failures", + "omop_alchemy.maintenance.cli_foreign_keys._collect_strict_validation_failures", lambda connection, *, db_schema=None, vocabulary_included=False: { "visit_occurrence": [ ForeignKeyConstraintViolation( @@ -370,7 +370,7 @@ def fake_validate_foreign_key_constraints( db_schema: str | None = None, vocabulary_included: bool = False, ): - from omop_alchemy.maintenance.foreign_keys import ( + from omop_alchemy.maintenance.cli_foreign_keys import ( ForeignKeyConstraintViolation, ForeignKeyValidationReport, ForeignKeyValidationResult, @@ -406,19 +406,19 @@ def fake_validate_foreign_key_constraints( ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.validate_foreign_key_constraints", + "omop_alchemy.maintenance.cli_foreign_keys.validate_foreign_key_constraints", fake_validate_foreign_key_constraints, ) diff --git a/tests/test_fulltext.py b/tests/test_fulltext.py index b43684e..5a798c4 100644 --- a/tests/test_fulltext.py +++ b/tests/test_fulltext.py @@ -231,11 +231,11 @@ def fake_install_fulltext_columns( ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli._build_engine", + "omop_alchemy.maintenance.cli_fulltext.build_engine", fake_build_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.install_fulltext_columns", + "omop_alchemy.maintenance.cli_fulltext.install_fulltext_columns", fake_install_fulltext_columns, ) diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 4c34b0d..615e980 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -3,8 +3,8 @@ from omop_alchemy.cdm.base.indexing import OMOP_CLUSTER_INDEX_INFO_KEY, omop_index_name from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.create_tables import create_missing_tables -from omop_alchemy.maintenance.indexes import ( +from omop_alchemy.maintenance.cli_schema import create_missing_tables +from omop_alchemy.maintenance.cli_indexes import ( IndexAction, IndexManagementResult, collect_index_targets, @@ -162,19 +162,19 @@ def fake_manage_indexes( ] monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.manage_indexes", + "omop_alchemy.maintenance.cli_indexes.manage_indexes", fake_manage_indexes, ) diff --git a/tests/test_load_vocab_postgres.py b/tests/test_load_vocab_postgres.py index d8a2f87..8fc2149 100644 --- a/tests/test_load_vocab_postgres.py +++ b/tests/test_load_vocab_postgres.py @@ -12,7 +12,7 @@ import sqlalchemy as sa from omop_alchemy.cdm.model.vocabulary import Concept -from omop_alchemy.maintenance.load_vocab import ( +from omop_alchemy.maintenance.cli_vocab import ( _load_vocab_model_csv, load_vocab_source, ) diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index fbb8a59..a2b00f2 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -6,8 +6,8 @@ from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.defaults import defaults_path -from omop_alchemy.maintenance.load_vocab import ( +from omop_alchemy.maintenance.cli_config import defaults_path +from omop_alchemy.maintenance.cli_vocab import ( OPTIONAL_VOCAB_MODELS, REQUIRED_VOCAB_MODELS, MergeStrategy, @@ -70,12 +70,13 @@ def fake_load_vocab_model_csv( merge_strategy, quote_mode="auto", chunksize=None, + index_strategy="auto", ) -> int: loaded_tables.append((model.__tablename__, merge_strategy, quote_mode, csv_path)) return 1 monkeypatch.setattr( - "omop_alchemy.maintenance.load_vocab._load_vocab_model_csv", + "omop_alchemy.maintenance.cli_vocab._load_vocab_model_csv", fake_load_vocab_model_csv, ) @@ -171,9 +172,10 @@ def fake_load_vocab_source( dry_run: bool = False, merge_strategy: MergeStrategy = "replace", chunksize: int | None = None, + bulk_mode: bool = True, progress_callback=None, ): - from omop_alchemy.maintenance.load_vocab import VocabularyLoadReport, VocabularyLoadResult + from omop_alchemy.maintenance.cli_vocab import VocabularyLoadReport, VocabularyLoadResult calls["engine"] = engine calls["source_path"] = str(source_path) @@ -200,19 +202,19 @@ def fake_load_vocab_source( ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_vocab_source", + "omop_alchemy.maintenance.cli_vocab.load_vocab_source", fake_load_vocab_source, ) @@ -258,7 +260,7 @@ def staging_tablename() -> str: return "_staging_concept" @staticmethod - def load_csv(session, path, *, merge_strategy, quote_mode): + def load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto"): return 7 @staticmethod @@ -267,7 +269,7 @@ def create_staging_table(session): calls: dict[str, object] = {} - def fake_load_csv(session, path, *, merge_strategy, quote_mode): + def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto"): calls["merge_strategy"] = merge_strategy calls["quote_mode"] = quote_mode calls["path"] = path @@ -312,12 +314,13 @@ def fake_load_vocab_model_csv( merge_strategy, quote_mode="auto", chunksize=None, + index_strategy="auto", ) -> int: loaded_order.append(model.__tablename__) return 1 monkeypatch.setattr( - "omop_alchemy.maintenance.load_vocab._load_vocab_model_csv", + "omop_alchemy.maintenance.cli_vocab._load_vocab_model_csv", fake_load_vocab_model_csv, ) @@ -344,11 +347,12 @@ def fake_load_vocab_model_csv( merge_strategy, quote_mode="auto", chunksize=None, + index_strategy="auto", ) -> int: return 1 monkeypatch.setattr( - "omop_alchemy.maintenance.load_vocab._load_vocab_model_csv", + "omop_alchemy.maintenance.cli_vocab._load_vocab_model_csv", fake_load_vocab_model_csv, ) @@ -370,7 +374,7 @@ def test_load_vocab_source_wraps_failed_table_load(monkeypatch, tmp_path): engine = sa.create_engine(f"sqlite:///{tmp_path / 'load_vocab_source_error.db'}", future=True) source_path = _build_required_athena_source(tmp_path) - def fake_load_vocab_model_csv(session, *, model, csv_path, merge_strategy, quote_mode="auto", chunksize=None): + def fake_load_vocab_model_csv(session, *, model, csv_path, merge_strategy, quote_mode="auto", chunksize=None, index_strategy="auto"): if model.__tablename__ == "domain": raise sa.exc.ProgrammingError( "COPY domain FROM STDIN", @@ -380,7 +384,7 @@ def fake_load_vocab_model_csv(session, *, model, csv_path, merge_strategy, quote return 1 monkeypatch.setattr( - "omop_alchemy.maintenance.load_vocab._load_vocab_model_csv", + "omop_alchemy.maintenance.cli_vocab._load_vocab_model_csv", fake_load_vocab_model_csv, ) @@ -408,7 +412,7 @@ def staging_tablename() -> str: return "_staging_drug_strength" @staticmethod - def load_csv(session, path, *, merge_strategy, quote_mode): + def load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto"): raise NotImplementedError @staticmethod @@ -417,7 +421,7 @@ def create_staging_table(session): calls = {"load_csv": 0, "create_staging_table": 0} - def fake_load_csv(session, path, *, merge_strategy, quote_mode): + def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto"): calls["load_csv"] += 1 if calls["load_csv"] == 1: raise sa.exc.ProgrammingError( @@ -460,11 +464,11 @@ def fail_load_vocab_source(*args, **kwargs): ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli._build_engine", + "omop_alchemy.maintenance.cli_vocab.build_engine", fake_build_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_vocab_source", + "omop_alchemy.maintenance.cli_vocab.load_vocab_source", fail_load_vocab_source, ) @@ -519,12 +523,13 @@ def fake_load_vocab_model_csv( merge_strategy, quote_mode="auto", chunksize=None, + index_strategy="auto", ) -> int: received_quote_modes.append(quote_mode) return 1 monkeypatch.setattr( - "omop_alchemy.maintenance.load_vocab._load_vocab_model_csv", + "omop_alchemy.maintenance.cli_vocab._load_vocab_model_csv", fake_load_vocab_model_csv, ) diff --git a/tests/test_truncate_tables.py b/tests/test_truncate_tables.py index af348c2..eee3297 100644 --- a/tests/test_truncate_tables.py +++ b/tests/test_truncate_tables.py @@ -4,12 +4,12 @@ from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.create_tables import create_missing_tables +from omop_alchemy.maintenance.cli_schema import create_missing_tables from omop_alchemy.maintenance.tables import TableCategory, TableScope -from omop_alchemy.maintenance.truncate_tables import TruncateTableResult, truncate_tables +from omop_alchemy.maintenance.cli_tables import TruncateTableResult, truncate_tables runner = CliRunner() -truncate_tables_module = importlib.import_module("omop_alchemy.maintenance.truncate_tables") +truncate_tables_module = importlib.import_module("omop_alchemy.maintenance.cli_tables") def test_truncate_tables_requires_postgresql(tmp_path): @@ -92,19 +92,19 @@ def fake_truncate_tables( ] monkeypatch.setattr( - "omop_alchemy.maintenance.cli.load_environment", + "omop_alchemy.maintenance._cli_utils.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.get_engine_name", + "omop_alchemy.maintenance._cli_utils.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.create_engine_with_dependencies", + "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli.truncate_tables", + "omop_alchemy.maintenance.cli_tables.truncate_tables", fake_truncate_tables, ) From 903520710a5a0b3349c5f77d8e0ea65a1cefb515 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Fri, 22 May 2026 04:40:33 +0000 Subject: [PATCH 02/25] Big rework regarding backends and CLI, removal of standalone fulltext bit, better UI --- omop_alchemy/__init__.py | 3 +- omop_alchemy/backend_support.py | 50 -- omop_alchemy/backends/__init__.py | 34 ++ omop_alchemy/backends/base.py | 284 +++++++++++ omop_alchemy/backends/postgres.py | 460 ++++++++++++++++++ omop_alchemy/backends/resolve.py | 32 ++ omop_alchemy/backends/sqlite.py | 28 ++ omop_alchemy/cdm/handlers/__init__.py | 16 - .../cdm/handlers/fulltext/__init__.py | 33 -- .../cdm/handlers/fulltext/fulltext.py | 422 ---------------- omop_alchemy/config.py | 87 +--- omop_alchemy/db.py | 253 ++++++++++ omop_alchemy/logger_config.py | 26 + omop_alchemy/maintenance/__init__.py | 22 +- omop_alchemy/maintenance/_cli_utils.py | 90 ++-- omop_alchemy/maintenance/cli.py | 5 +- omop_alchemy/maintenance/cli_backup.py | 331 ++++--------- omop_alchemy/maintenance/cli_config.py | 254 ++-------- omop_alchemy/maintenance/cli_foreign_keys.py | 375 +++++++------- omop_alchemy/maintenance/cli_fulltext.py | 298 ++++++++++-- omop_alchemy/maintenance/cli_indexes.py | 111 +++-- omop_alchemy/maintenance/cli_schema.py | 344 ++++++++----- omop_alchemy/maintenance/cli_tables.py | 221 +++++---- omop_alchemy/maintenance/cli_vocab.py | 107 ++-- omop_alchemy/maintenance/help.py | 14 +- omop_alchemy/maintenance/ui.py | 67 +-- omop_alchemy/py.typed | 0 tests/test_analyze_tables.py | 2 +- tests/test_cli_config.py | 20 +- tests/test_config_driver.py | 2 +- tests/test_foreign_keys.py | 96 ++-- tests/test_fulltext.py | 59 +-- tests/test_indexes.py | 6 +- tests/test_load_vocab_source.py | 10 +- tests/test_truncate_tables.py | 10 +- 35 files changed, 2371 insertions(+), 1801 deletions(-) delete mode 100644 omop_alchemy/backend_support.py create mode 100644 omop_alchemy/backends/__init__.py create mode 100644 omop_alchemy/backends/base.py create mode 100644 omop_alchemy/backends/postgres.py create mode 100644 omop_alchemy/backends/resolve.py create mode 100644 omop_alchemy/backends/sqlite.py delete mode 100644 omop_alchemy/cdm/handlers/fulltext/__init__.py delete mode 100644 omop_alchemy/cdm/handlers/fulltext/fulltext.py create mode 100644 omop_alchemy/db.py create mode 100644 omop_alchemy/logger_config.py delete mode 100644 omop_alchemy/py.typed diff --git a/omop_alchemy/__init__.py b/omop_alchemy/__init__.py index 51bdf87..e78895b 100644 --- a/omop_alchemy/__init__.py +++ b/omop_alchemy/__init__.py @@ -1,4 +1,5 @@ -from .config import create_engine_with_dependencies, load_environment, get_engine_name, TEST_PATH, ROOT_PATH +from .config import load_environment, TEST_PATH, ROOT_PATH +from .db import get_engine_name, create_engine_with_dependencies from .errors import CDMValidationError diff --git a/omop_alchemy/backend_support.py b/omop_alchemy/backend_support.py deleted file mode 100644 index ce58843..0000000 --- a/omop_alchemy/backend_support.py +++ /dev/null @@ -1,50 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable -from enum import StrEnum - -import sqlalchemy as sa - -class Dialect(StrEnum): - POSTGRESQL = "postgresql" - SQLITE = "sqlite" - -POSTGRESQL_ONLY_HELP = "PostgreSQL only" - -_DIALECT_LABELS: dict[Dialect, str] = { - Dialect.POSTGRESQL: "PostgreSQL", - Dialect.SQLITE: "SQLite", -} - -def backend_label(dialect_name: str) -> str: - try: - return _DIALECT_LABELS[Dialect(dialect_name)] - except ValueError: - return dialect_name - -def supports_backend( - engine: sa.Engine, - *, - supported_dialects: Iterable[Dialect], -) -> bool: - return engine.dialect.name in tuple(supported_dialects) - - -def require_backend( - engine: sa.Engine, - *, - feature: str, - supported_dialects: Iterable[Dialect], -) -> None: - supported = tuple(supported_dialects) - if engine.dialect.name in supported: - return - - supported_label = ", ".join( - backend_label(dialect) - for dialect in sorted(supported) - ) - raise RuntimeError( - f"{feature} is only supported for {supported_label} engines. " - f"Current dialect: '{engine.dialect.name}'." - ) diff --git a/omop_alchemy/backends/__init__.py b/omop_alchemy/backends/__init__.py new file mode 100644 index 0000000..4958bb9 --- /dev/null +++ b/omop_alchemy/backends/__init__.py @@ -0,0 +1,34 @@ +from .base import ( + Backend, + BackendNotSupportedError, + CONCEPT_NAME_TSVECTOR_COLUMN, + CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, + FullTextAction, + FullTextError, + FullTextResult, + FullTextTargetConfig, + backend_supports, + require_backend_support, + backend_support_note, +) +from .postgres import PostgresBackend +from .sqlite import SQLiteBackend +from .resolve import resolve_backend, SupportedDialect + +__all__ = [ + "Backend", + "BackendNotSupportedError", + "CONCEPT_NAME_TSVECTOR_COLUMN", + "CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN", + "FullTextAction", + "FullTextError", + "FullTextResult", + "FullTextTargetConfig", + "backend_supports", + "require_backend_support", + "backend_support_note", + "PostgresBackend", + "SQLiteBackend", + "resolve_backend", + "SupportedDialect", +] diff --git a/omop_alchemy/backends/base.py b/omop_alchemy/backends/base.py new file mode 100644 index 0000000..2bdc390 --- /dev/null +++ b/omop_alchemy/backends/base.py @@ -0,0 +1,284 @@ +from __future__ import annotations +from abc import ABC, abstractmethod +from dataclasses import dataclass +from enum import StrEnum +from typing import TYPE_CHECKING, Any +import sqlalchemy as sa + +if TYPE_CHECKING: + from sqlalchemy.sql import ColumnElement + + +# ── Fulltext types ──────────────────────────────────────────────────────────── + +CONCEPT_NAME_TSVECTOR_COLUMN = "concept_name_tsvector" +CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN = "concept_synonym_name_tsvector" + + +@dataclass(frozen=True) +class FullTextTargetConfig: + """Primitive fulltext target owned by the backend — no ORM dependencies.""" + table_name: str + source_column_name: str + vector_column_name: str + index_name: str + + +class FullTextAction(StrEnum): + INSTALL = "install" + POPULATE = "populate" + DROP = "drop" + + +@dataclass(frozen=True) +class FullTextResult: + target_name: str + table_name: str + source_column_name: str + vector_column_name: str + index_name: str + action: FullTextAction + status: str + detail: str + row_count: int | None = None + + +class FullTextError(RuntimeError): + """Raised when a full-text search maintenance operation fails.""" + + +# ── Backend errors ──────────────────────────────────────────────────────────── + +class BackendNotSupportedError(RuntimeError): + """Raised when the active backend does not implement a required operation.""" + + def __init__(self, feature: str, backend: "Backend") -> None: + super().__init__(f"'{feature}' is not supported by the {backend.name} backend.") + self.feature = feature + self.backend_name = backend.name + + +def backend_supports(backend: "Backend", method_name: str) -> bool: + """True if this backend class overrides *method_name* from the base Backend class.""" + return getattr(type(backend), method_name) is not getattr(Backend, method_name) + + +def require_backend_support(backend: "Backend", method_name: str, feature: str) -> None: + """Raise BackendNotSupportedError if this backend does not override *method_name*.""" + if not backend_supports(backend, method_name): + raise BackendNotSupportedError(feature, backend) + + +def backend_support_note(method_name: str) -> str: + """Return a help-text note listing which known backends support *method_name*. + + Derived purely from the class hierarchy — no manual list to maintain. + """ + from .resolve import _DIALECT_TO_BACKEND_MAP + + supported = sorted( + b.name for b in _DIALECT_TO_BACKEND_MAP.values() + if backend_supports(b, method_name) + ) + return f"Supported backends: {', '.join(supported)}." if supported else "Not supported by any backend." + + +class Backend(ABC): + + @property + @abstractmethod + def name(self) -> str: ... + + @property + @abstractmethod + def dialect(self) -> str: ... + + # ── FK trigger management ──────────────────────────────────────────────── + + def toggle_fk_triggers( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + *, + enable: bool, + ) -> None: + raise BackendNotSupportedError("FK trigger management", self) + + def get_fk_trigger_counts( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + ) -> tuple[int, int]: + """Return (disabled_count, enabled_count) for RI triggers on the table.""" + raise BackendNotSupportedError("FK trigger status inspection", self) + + def count_fk_violations( + self, + conn: sa.Connection, + source_table: str, + referred_table: str, + constrained_cols: list[str], + referred_cols: list[str], + db_schema: str | None, + ) -> int: + raise BackendNotSupportedError("FK constraint violation counting", self) + + # ── Clustering ─────────────────────────────────────────────────────────── + + def cluster_table( + self, + conn: sa.Connection, + table_name: str, + index_name: str, + db_schema: str | None, + ) -> None: + raise BackendNotSupportedError("Table clustering", self) + + def get_clustered_index_name( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + ) -> str | None: + raise BackendNotSupportedError("Cluster index inspection", self) + + # ── Table operations ───────────────────────────────────────────────────── + + @abstractmethod + def analyze_table( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + *, + vacuum: bool = False, + ) -> None: ... + + def truncate_table_batch( + self, + conn: sa.Connection, + table_names: list[str], + db_schema: str | None, + *, + restart_identities: bool, + cascade: bool, + ) -> None: + raise BackendNotSupportedError("TRUNCATE with RESTART IDENTITY / CASCADE", self) + + # ── Sequence management ────────────────────────────────────────────────── + + def find_sequence_name( + self, + conn: sa.Connection, + table_name: str, + column_name: str, + db_schema: str | None, + ) -> str | None: + raise BackendNotSupportedError("Owned sequence lookup", self) + + def set_sequence_value( + self, + conn: sa.Connection, + sequence_name: str, + value: int, + ) -> None: + raise BackendNotSupportedError("Sequence value reset", self) + + # ── Schema context ─────────────────────────────────────────────────────── + + def configure_schema_context( + self, + conn: sa.Connection, + db_schema: str | None, + ) -> None: + pass # no-op by default; PostgreSQL overrides with SET search_path + + # ── Full-text search ───────────────────────────────────────────────────── + + @property + def fulltext_targets(self) -> tuple[FullTextTargetConfig, ...]: + """Return the fulltext target configs managed by this backend. Empty by default.""" + return () + + def register_fulltext_metadata(self) -> None: + """Append tsvector sidecar columns to SQLAlchemy ORM metadata for this backend's targets.""" + raise BackendNotSupportedError("Full-text metadata registration", self) + + def unregister_fulltext_metadata(self) -> None: + """Remove tsvector sidecar columns from SQLAlchemy ORM metadata.""" + raise BackendNotSupportedError("Full-text metadata unregistration", self) + + def concept_name_tsvector_expression( + self, *, regconfig: str = "english" + ) -> "ColumnElement[Any]": + """Return a SQLAlchemy expression for the concept_name tsvector.""" + raise BackendNotSupportedError("Full-text search expression", self) + + def concept_synonym_name_tsvector_expression( + self, *, regconfig: str = "english" + ) -> "ColumnElement[Any]": + """Return a SQLAlchemy expression for the concept_synonym_name tsvector.""" + raise BackendNotSupportedError("Full-text search expression", self) + + def install_fulltext_on_table( + self, + conn: sa.Connection, + *, + table_name: str, + vector_column_name: str, + index_name: str, + db_schema: str | None, + create_indexes: bool, + fastupdate: bool, + ) -> None: + raise BackendNotSupportedError("Full-text search", self) + + def populate_fulltext_on_table( + self, + conn: sa.Connection, + *, + table_name: str, + vector_column_name: str, + source_column_name: str, + db_schema: str | None, + regconfig: str, + ) -> int | None: + raise BackendNotSupportedError("Full-text search", self) + + def drop_fulltext_on_table( + self, + conn: sa.Connection, + *, + table_name: str, + vector_column_name: str, + index_name: str, + db_schema: str | None, + drop_indexes: bool, + ) -> None: + raise BackendNotSupportedError("Full-text search", self) + + # ── Backup / restore ───────────────────────────────────────────────────── + + def prepare_backup( + self, + engine: sa.Engine, + output_path: str, + backup_format: str, + db_schema: str | None, + ) -> tuple[str, list[str], dict[str, str], str]: + """Return (tool_path, command, env, database_name). subprocess.run stays in CLI.""" + raise BackendNotSupportedError("Database backup", self) + + def prepare_restore( + self, + engine: sa.Engine, + input_path: str, + backup_format: str, + db_schema: str | None, + ) -> tuple[str, list[str], dict[str, str], str]: + """Return (tool_path, command, env, database_name). subprocess.run stays in CLI.""" + raise BackendNotSupportedError("Database restore", self) + + diff --git a/omop_alchemy/backends/postgres.py b/omop_alchemy/backends/postgres.py new file mode 100644 index 0000000..0cc5f3c --- /dev/null +++ b/omop_alchemy/backends/postgres.py @@ -0,0 +1,460 @@ +from __future__ import annotations + +import os +import shutil + +import sqlalchemy as sa + +from sqlalchemy.dialects.postgresql import TSVECTOR +from sqlalchemy.sql import func + +from .base import Backend, FullTextTargetConfig + + +def _qualified(table_name: str, db_schema: str | None) -> str: + if db_schema: + return f'"{db_schema}"."{table_name}"' + return f'"{table_name}"' + + +class PostgresBackend(Backend): + + @property + def name(self) -> str: + return "PostgreSQL" + + @property + def dialect(self) -> str: + return "postgresql" + + # ── FK trigger management ──────────────────────────────────────────────── + + def toggle_fk_triggers( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + *, + enable: bool, + ) -> None: + action = "ENABLE" if enable else "DISABLE" + conn.exec_driver_sql( + f"ALTER TABLE {_qualified(table_name, db_schema)} {action} TRIGGER ALL" + ) + + def get_fk_trigger_counts( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + ) -> tuple[int, int]: + disabled_count, enabled_count = conn.execute( + sa.text( + """ + SELECT + SUM(CASE WHEN t.tgenabled = 'D' THEN 1 ELSE 0 END), + SUM(CASE WHEN t.tgenabled <> 'D' THEN 1 ELSE 0 END) + FROM pg_trigger t + JOIN pg_class c ON c.oid = t.tgrelid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE t.tgisinternal + AND t.tgname LIKE 'RI_ConstraintTrigger%' + AND c.relname = :table_name + AND (:db_schema IS NULL OR n.nspname = :db_schema) + """ + ), + {"table_name": table_name, "db_schema": db_schema}, + ).one() + return int(disabled_count or 0), int(enabled_count or 0) + + def count_fk_violations( + self, + conn: sa.Connection, + source_table: str, + referred_table: str, + constrained_cols: list[str], + referred_cols: list[str], + db_schema: str | None, + ) -> int: + source = _qualified(source_table, db_schema) + referred = _qualified(referred_table, db_schema) + non_null_predicate = " AND ".join( + f"src.{col} IS NOT NULL" for col in constrained_cols + ) + join_predicate = " AND ".join( + f"ref.{ref_col} = src.{src_col}" + for src_col, ref_col in zip(constrained_cols, referred_cols, strict=True) + ) + return int( + conn.exec_driver_sql( + f""" + SELECT COUNT(*) + FROM {source} AS src + WHERE {non_null_predicate} + AND NOT EXISTS ( + SELECT 1 + FROM {referred} AS ref + WHERE {join_predicate} + ) + """ + ).scalar_one() + ) + + # ── Clustering ─────────────────────────────────────────────────────────── + + def cluster_table( + self, + conn: sa.Connection, + table_name: str, + index_name: str, + db_schema: str | None, + ) -> None: + conn.exec_driver_sql( + f"CLUSTER {_qualified(table_name, db_schema)} USING {index_name}" + ) + + def get_clustered_index_name( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + ) -> str | None: + result = conn.execute( + sa.text( + """ + SELECT i.relname + FROM pg_index ix + JOIN pg_class t ON t.oid = ix.indrelid + JOIN pg_class i ON i.oid = ix.indexrelid + JOIN pg_namespace n ON n.oid = t.relnamespace + WHERE ix.indisclustered + AND t.relname = :table_name + AND (:db_schema IS NULL OR n.nspname = :db_schema) + """ + ), + {"table_name": table_name, "db_schema": db_schema}, + ).scalar_one_or_none() + return str(result) if result is not None else None + + # ── Table operations ───────────────────────────────────────────────────── + + def analyze_table( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + *, + vacuum: bool = False, + ) -> None: + operation = "VACUUM ANALYZE" if vacuum else "ANALYZE" + conn.exec_driver_sql(f"{operation} {_qualified(table_name, db_schema)}") + + def truncate_table_batch( + self, + conn: sa.Connection, + table_names: list[str], + db_schema: str | None, + *, + restart_identities: bool, + cascade: bool, + ) -> None: + sql = "TRUNCATE TABLE " + ", ".join( + _qualified(name, db_schema) for name in table_names + ) + if restart_identities: + sql += " RESTART IDENTITY" + if cascade: + sql += " CASCADE" + conn.exec_driver_sql(sql) + + # ── Sequence management ────────────────────────────────────────────────── + + def find_sequence_name( + self, + conn: sa.Connection, + table_name: str, + column_name: str, + db_schema: str | None, + ) -> str | None: + fully_qualified = _qualified(table_name, db_schema) + return conn.execute( + sa.text("SELECT pg_get_serial_sequence(:table_name, :column_name)"), + {"table_name": fully_qualified, "column_name": column_name}, + ).scalar_one_or_none() + + def set_sequence_value( + self, + conn: sa.Connection, + sequence_name: str, + value: int, + ) -> None: + conn.execute( + sa.text("SELECT setval(:sequence_name, :value, false)"), + {"sequence_name": sequence_name, "value": value}, + ) + + # ── Schema context ─────────────────────────────────────────────────────── + + def configure_schema_context( + self, + conn: sa.Connection, + db_schema: str | None, + ) -> None: + if db_schema is None: + return + quoted = '"' + db_schema.replace('"', '""') + '"' + conn.exec_driver_sql(f"SET search_path TO {quoted}") + + # ── Full-text search ───────────────────────────────────────────────────── + + @property + def fulltext_targets(self) -> tuple[FullTextTargetConfig, ...]: + return ( + FullTextTargetConfig( + table_name="concept", + source_column_name="concept_name", + vector_column_name="concept_name_tsvector", + index_name="idx_gin_concept_name_tsvector", + ), + FullTextTargetConfig( + table_name="concept_synonym", + source_column_name="concept_synonym_name", + vector_column_name="concept_synonym_name_tsvector", + index_name="idx_gin_concept_synonym_name_tsvector", + ), + ) + + def register_fulltext_metadata(self) -> None: + from typing import cast + from ..cdm.model.vocabulary.concept import Concept + from ..cdm.model.vocabulary.concept_synonym import Concept_Synonym + table_map = { + "concept": cast(sa.Table, Concept.__table__), + "concept_synonym": cast(sa.Table, Concept_Synonym.__table__), + } + for cfg in self.fulltext_targets: + table = table_map[cfg.table_name] + if cfg.vector_column_name not in table.c: + table.append_column(sa.Column(cfg.vector_column_name, TSVECTOR, nullable=True)) + + def unregister_fulltext_metadata(self) -> None: + from typing import cast + from ..cdm.model.vocabulary.concept import Concept + from ..cdm.model.vocabulary.concept_synonym import Concept_Synonym + table_map = { + "concept": cast(sa.Table, Concept.__table__), + "concept_synonym": cast(sa.Table, Concept_Synonym.__table__), + } + for cfg in self.fulltext_targets: + table = table_map[cfg.table_name] + column = table.c.get(cfg.vector_column_name) + if column is not None: + table._columns.remove(column) + + def concept_name_tsvector_expression(self, *, regconfig: str = "english") -> sa.ColumnElement: + from typing import cast + from ..cdm.model.vocabulary.concept import Concept + col = cast(sa.Table, Concept.__table__).c.get("concept_name_tsvector") + if col is not None: + return col + return func.to_tsvector(regconfig, func.coalesce(Concept.concept_name, "")) + + def concept_synonym_name_tsvector_expression(self, *, regconfig: str = "english") -> sa.ColumnElement: + from typing import cast + from ..cdm.model.vocabulary.concept_synonym import Concept_Synonym + col = cast(sa.Table, Concept_Synonym.__table__).c.get("concept_synonym_name_tsvector") + if col is not None: + return col + return func.to_tsvector(regconfig, func.coalesce(Concept_Synonym.concept_synonym_name, "")) + + def install_fulltext_on_table( + self, + conn: sa.Connection, + *, + table_name: str, + vector_column_name: str, + index_name: str, + db_schema: str | None, + create_indexes: bool, + fastupdate: bool, + ) -> None: + qualified_table = _qualified(table_name, db_schema) + conn.exec_driver_sql( + f"ALTER TABLE {qualified_table} ADD COLUMN IF NOT EXISTS {vector_column_name} tsvector" + ) + if create_indexes: + qualified_index = f"{db_schema}.{index_name}" if db_schema else index_name + conn.exec_driver_sql( + f"CREATE INDEX IF NOT EXISTS {qualified_index}" + f" ON {qualified_table} USING GIN ({vector_column_name})" + f" WITH (fastupdate = {'on' if fastupdate else 'off'})" + ) + + def populate_fulltext_on_table( + self, + conn: sa.Connection, + *, + table_name: str, + vector_column_name: str, + source_column_name: str, + db_schema: str | None, + regconfig: str, + ) -> int | None: + result = conn.execute( + sa.text( + f"UPDATE {_qualified(table_name, db_schema)}" + f" SET {vector_column_name} = to_tsvector(" + f" CAST(:regconfig AS regconfig), coalesce({source_column_name}, '')" + f" )" + ), + {"regconfig": regconfig}, + ) + if result.rowcount is None or result.rowcount < 0: + return None + return int(result.rowcount) + + def drop_fulltext_on_table( + self, + conn: sa.Connection, + *, + table_name: str, + vector_column_name: str, + index_name: str, + db_schema: str | None, + drop_indexes: bool, + ) -> None: + if drop_indexes: + qualified_index = f"{db_schema}.{index_name}" if db_schema else index_name + conn.exec_driver_sql(f"DROP INDEX IF EXISTS {qualified_index}") + conn.exec_driver_sql( + f"ALTER TABLE {_qualified(table_name, db_schema)}" + f" DROP COLUMN IF EXISTS {vector_column_name}" + ) + + # ── Backup / restore ───────────────────────────────────────────────────── + + def prepare_backup( + self, + engine: sa.Engine, + output_path: str, + backup_format: str, + db_schema: str | None, + ) -> tuple[str, list[str], dict[str, str], str]: + tool_path = _pg_dump_path() + url = engine.url + database_name = url.database + if not database_name: + raise RuntimeError( + "Database backup requires a database name in the configured engine URL." + ) + connection_uri = _libpq_connection_uri(url) + command = [ + tool_path, + "--format", backup_format, + "--file", output_path, + "--dbname", connection_uri, + "--no-password", + "--no-owner", + "--no-privileges", + ] + if db_schema: + command.extend(["--schema", db_schema]) + env = os.environ.copy() + if url.password: + env["PGPASSWORD"] = str(url.password) + return tool_path, command, env, database_name + + def prepare_restore( + self, + engine: sa.Engine, + input_path: str, + backup_format: str, + db_schema: str | None, + ) -> tuple[str, list[str], dict[str, str], str]: + url = engine.url + database_name = url.database + if not database_name: + raise RuntimeError( + "Database restore requires a database name in the configured engine URL." + ) + connection_uri = _libpq_connection_uri(url) + + if backup_format == "custom": + tool_path = _pg_restore_path() + command = [ + tool_path, + "--dbname", connection_uri, + "--no-password", + "--no-owner", + "--no-privileges", + "--exit-on-error", + ] + if db_schema: + command.extend(["--schema", db_schema]) + command.append(input_path) + else: + tool_path = _psql_path() + command = [ + tool_path, + "--dbname", connection_uri, + "--no-password", + "--set", "ON_ERROR_STOP=1", + "--single-transaction", + "--file", input_path, + ] + + env = os.environ.copy() + if url.password: + env["PGPASSWORD"] = str(url.password) + return tool_path, command, env, database_name + + +# ── subprocess tool helpers ─────────────────────────────────────────────────── + +def _pg_dump_path() -> str: + tool_path = shutil.which("pg_dump") + if tool_path is None: + raise RuntimeError( + "The `pg_dump` executable is required for database backups but was not found on PATH. " + "Install PostgreSQL client tools and ensure `pg_dump` is available." + ) + return tool_path + + +def _pg_restore_path() -> str: + tool_path = shutil.which("pg_restore") + if tool_path is None: + raise RuntimeError( + "The `pg_restore` executable is required to restore custom PostgreSQL dumps " + "but was not found on PATH. " + "Install PostgreSQL client tools and ensure `pg_restore` is available." + ) + return tool_path + + +def _psql_path() -> str: + tool_path = shutil.which("psql") + if tool_path is None: + raise RuntimeError( + "The `psql` executable is required to restore plain SQL PostgreSQL dumps " + "but was not found on PATH. " + "Install PostgreSQL client tools and ensure `psql` is available." + ) + return tool_path + + +def _libpq_connection_uri(url: sa.engine.URL) -> str: + if not url.database: + raise RuntimeError( + "Database backup requires a database name in the configured engine URL." + ) + libpq_url = sa.engine.URL.create( + drivername="postgresql", + username=url.username, + password=None, + host=url.host, + port=url.port, + database=url.database, + query=url.query, + ) + return libpq_url.render_as_string(hide_password=False) diff --git a/omop_alchemy/backends/resolve.py b/omop_alchemy/backends/resolve.py new file mode 100644 index 0000000..2152cd4 --- /dev/null +++ b/omop_alchemy/backends/resolve.py @@ -0,0 +1,32 @@ +from __future__ import annotations +from enum import StrEnum + +import sqlalchemy as sa + +from .base import Backend +from .postgres import PostgresBackend +from .sqlite import SQLiteBackend + + +class SupportedDialect(StrEnum): + POSTGRESQL = "postgresql" + SQLITE = "sqlite" + +_DIALECT_TO_BACKEND_MAP: dict[SupportedDialect, Backend] = { + SupportedDialect.POSTGRESQL: PostgresBackend(), + SupportedDialect.SQLITE: SQLiteBackend(), +} + +def resolve_backend(engine: sa.Engine) -> Backend: + dialect = engine.dialect.name + try: + supported_dialect = SupportedDialect(dialect) + except ValueError: + raise RuntimeError( + f"Unsupported database dialect: '{dialect}'. " + f"Supported dialects: {', '.join(sorted(SupportedDialect))}." + ) + return _DIALECT_TO_BACKEND_MAP[supported_dialect] + + + diff --git a/omop_alchemy/backends/sqlite.py b/omop_alchemy/backends/sqlite.py new file mode 100644 index 0000000..d7c8316 --- /dev/null +++ b/omop_alchemy/backends/sqlite.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import sqlalchemy as sa + +from .base import Backend, BackendNotSupportedError + + +class SQLiteBackend(Backend): + + @property + def name(self) -> str: + return "SQLite" + + @property + def dialect(self) -> str: + return "sqlite" + + def analyze_table( + self, + conn: sa.Connection, + table_name: str, + db_schema: str | None, + *, + vacuum: bool = False, + ) -> None: + if vacuum: + raise BackendNotSupportedError("VACUUM ANALYZE", self) + conn.exec_driver_sql(f'ANALYZE "{table_name}"') diff --git a/omop_alchemy/cdm/handlers/__init__.py b/omop_alchemy/cdm/handlers/__init__.py index 8da9c41..a5963ee 100644 --- a/omop_alchemy/cdm/handlers/__init__.py +++ b/omop_alchemy/cdm/handlers/__init__.py @@ -1,22 +1,6 @@ -from .fulltext import ( - concept_name_tsvector_expression, - concept_synonym_name_tsvector_expression, - drop_fulltext_columns, - install_fulltext_columns, - populate_fulltext_columns, - register_optional_fulltext_columns, - unregister_optional_fulltext_columns, -) from .vocabs_and_mappers import make_concept_resolver, ConceptResolverRegistry __all__ = [ - "concept_name_tsvector_expression", - "concept_synonym_name_tsvector_expression", - "drop_fulltext_columns", "make_concept_resolver", - "install_fulltext_columns", - "populate_fulltext_columns", - "register_optional_fulltext_columns", "ConceptResolverRegistry", - "unregister_optional_fulltext_columns", ] diff --git a/omop_alchemy/cdm/handlers/fulltext/__init__.py b/omop_alchemy/cdm/handlers/fulltext/__init__.py deleted file mode 100644 index 7d1e717..0000000 --- a/omop_alchemy/cdm/handlers/fulltext/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from .fulltext import ( - CONCEPT_NAME_TSVECTOR_COLUMN, - CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, - FULLTEXT_TARGETS, - FullTextAction, - FullTextError, - FullTextResult, - FullTextTarget, - concept_name_tsvector_expression, - concept_synonym_name_tsvector_expression, - drop_fulltext_columns, - install_fulltext_columns, - populate_fulltext_columns, - register_optional_fulltext_columns, - unregister_optional_fulltext_columns, -) - -__all__ = [ - "CONCEPT_NAME_TSVECTOR_COLUMN", - "CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN", - "FULLTEXT_TARGETS", - "FullTextAction", - "FullTextError", - "FullTextResult", - "FullTextTarget", - "concept_name_tsvector_expression", - "concept_synonym_name_tsvector_expression", - "drop_fulltext_columns", - "install_fulltext_columns", - "populate_fulltext_columns", - "register_optional_fulltext_columns", - "unregister_optional_fulltext_columns", -] diff --git a/omop_alchemy/cdm/handlers/fulltext/fulltext.py b/omop_alchemy/cdm/handlers/fulltext/fulltext.py deleted file mode 100644 index 45afca3..0000000 --- a/omop_alchemy/cdm/handlers/fulltext/fulltext.py +++ /dev/null @@ -1,422 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import StrEnum -from typing import Any, cast - -import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import TSVECTOR -from sqlalchemy.engine import Engine -from sqlalchemy.sql import ColumnElement, ColumnExpressionArgument, func - -from ...model.vocabulary.concept import Concept -from ...model.vocabulary.concept_synonym import Concept_Synonym - -from ....backend_support import Dialect - - -@dataclass(frozen=True) -class FullTextTarget: - name: str - table: sa.Table - source_column_name: str - vector_column_name: str - index_name: str - - @property - def table_name(self) -> str: - return self.table.name - - @property - def source_column(self) -> sa.Column[object]: - return self.table.c[self.source_column_name] - - -class FullTextAction(StrEnum): - INSTALL = "install" - POPULATE = "populate" - DROP = "drop" - - -@dataclass(frozen=True) -class FullTextResult: - target_name: str - table_name: str - source_column_name: str - vector_column_name: str - index_name: str - action: FullTextAction - status: str - detail: str - row_count: int | None = None - - -class FullTextError(RuntimeError): - """Raised when PostgreSQL full-text maintenance fails.""" - - -CONCEPT_NAME_TSVECTOR_COLUMN = "concept_name_tsvector" -CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN = "concept_synonym_name_tsvector" - -FULLTEXT_TARGETS = ( - FullTextTarget( - name=Concept.__tablename__, - table=cast(sa.Table, Concept.__table__), - source_column_name="concept_name", - vector_column_name=CONCEPT_NAME_TSVECTOR_COLUMN, - index_name="idx_gin_concept_name_tsvector", - ), - FullTextTarget( - name=Concept_Synonym.__tablename__, - table=cast(sa.Table, Concept_Synonym.__table__), - source_column_name="concept_synonym_name", - vector_column_name=CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, - index_name="idx_gin_concept_synonym_name_tsvector", - ), -) - - -def _fulltext_target_for_table(table_name: str) -> FullTextTarget: - for target in FULLTEXT_TARGETS: - if target.table_name == table_name: - return target - raise RuntimeError(f"No full-text target configured for table `{table_name}`.") - - -def _stored_tsvector_column( - table: sa.FromClause, - column_name: str, -) -> sa.ColumnElement[Any] | None: - column = table.c.get(column_name) - if column is None: - return None - return column - - -def _computed_tsvector( - text_column: ColumnExpressionArgument[str], - *, - regconfig: str, -) -> ColumnElement[object]: - return func.to_tsvector(regconfig, func.coalesce(text_column, "")) - - -def _optional_tsvector_column(target: FullTextTarget) -> sa.Column[Any]: - return sa.Column(target.vector_column_name, TSVECTOR, nullable=True) - - -def register_optional_fulltext_columns() -> None: - """ - Register optional sidecar tsvector columns on SQLAlchemy metadata. - - This mutates ORM table metadata for the current process so query builders can - point at stored full-text columns when they exist in the database. - """ - for target in FULLTEXT_TARGETS: - if target.vector_column_name in target.table.c: - continue - target.table.append_column(_optional_tsvector_column(target)) - - -def unregister_optional_fulltext_columns() -> None: - """ - Remove optional sidecar tsvector columns from SQLAlchemy metadata. - - This is useful after explicitly dropping the columns from the database, so - future query builders in the same process fall back to inline expressions. - """ - for target in FULLTEXT_TARGETS: - column = target.table.c.get(target.vector_column_name) - if column is None: - continue - target.table._columns.remove(column) - - -def concept_name_tsvector_expression(*, regconfig: str = "english") -> ColumnElement[object]: - stored = _stored_tsvector_column( - Concept.__table__, - CONCEPT_NAME_TSVECTOR_COLUMN, - ) - if stored is not None: - return stored - return _computed_tsvector(Concept.concept_name, regconfig=regconfig) - - -def concept_synonym_name_tsvector_expression( - *, - regconfig: str = "english", -) -> ColumnElement[object]: - stored = _stored_tsvector_column( - Concept_Synonym.__table__, - CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, - ) - if stored is not None: - return stored - return _computed_tsvector( - Concept_Synonym.concept_synonym_name, - regconfig=regconfig, - ) - - -def _qualified_index_name(index_name: str, db_schema: str | None) -> str: - if db_schema: - return f"{db_schema}.{index_name}" - return index_name - - -def _qualified_table_name(table_name: str, db_schema: str | None) -> str: - if db_schema: - return f"{db_schema}.{table_name}" - return table_name - - -def _ensure_supported_backend(engine: Engine) -> None: - if engine.dialect.name == Dialect.POSTGRESQL: - return - raise RuntimeError( - "PostgreSQL full-text vector column management is only supported for " - f"PostgreSQL engines. Current dialect: '{engine.dialect.name}'." - ) - - -def _install_target( - connection: sa.Connection, - *, - target: FullTextTarget, - db_schema: str | None, - create_indexes: bool, - fastupdate: bool, -) -> None: - connection.exec_driver_sql( - f""" - ALTER TABLE {_qualified_table_name(target.table_name, db_schema)} - ADD COLUMN IF NOT EXISTS {target.vector_column_name} tsvector - """ - ) - - if not create_indexes: - return - - connection.exec_driver_sql( - f""" - CREATE INDEX IF NOT EXISTS {_qualified_index_name(target.index_name, db_schema)} - ON {_qualified_table_name(target.table_name, db_schema)} - USING GIN ({target.vector_column_name}) - WITH (fastupdate = {'on' if fastupdate else 'off'}) - """ - ) - - -def install_fulltext_columns( - engine: Engine, - *, - db_schema: str | None = None, - create_indexes: bool = True, - fastupdate: bool = False, - dry_run: bool = False, -) -> tuple[FullTextResult, ...]: - _ensure_supported_backend(engine) - - results: list[FullTextResult] = [] - try: - if not dry_run: - with engine.begin() as connection: - for target in FULLTEXT_TARGETS: - _install_target( - connection, - target=target, - db_schema=db_schema, - create_indexes=create_indexes, - fastupdate=fastupdate, - ) - register_optional_fulltext_columns() - except Exception as exc: - raise FullTextError( - "Full-text install failed for PostgreSQL sidecar columns. " - f"Underlying error: {exc.__class__.__name__}: {exc}" - ) from exc - - for target in FULLTEXT_TARGETS: - results.append( - FullTextResult( - target_name=target.name, - table_name=target.table_name, - source_column_name=target.source_column_name, - vector_column_name=target.vector_column_name, - index_name=target.index_name, - action=FullTextAction.INSTALL, - status="planned" if dry_run else "applied", - detail=( - "tsvector column would be installed" - if dry_run and not create_indexes - else "tsvector column and GIN index would be installed" - if dry_run - else "tsvector column installed" - if not create_indexes - else "tsvector column and GIN index installed" - ), - ) - ) - return tuple(results) - - -def _populate_target( - connection: sa.Connection, - *, - target: FullTextTarget, - db_schema: str | None, - regconfig: str, -) -> int | None: - result = connection.execute( - sa.text( - f""" - UPDATE {_qualified_table_name(target.table_name, db_schema)} - SET {target.vector_column_name} = to_tsvector( - CAST(:regconfig AS regconfig), - coalesce({target.source_column_name}, '') - ) - """ - ), - {"regconfig": regconfig}, - ) - if result.rowcount is None or result.rowcount < 0: - return None - return int(result.rowcount) - - -def populate_fulltext_columns( - engine: Engine, - *, - db_schema: str | None = None, - regconfig: str = "english", - dry_run: bool = False, -) -> tuple[FullTextResult, ...]: - _ensure_supported_backend(engine) - - row_counts: dict[str, int | None] = {} - try: - if not dry_run: - with engine.begin() as connection: - for target in FULLTEXT_TARGETS: - row_counts[target.name] = _populate_target( - connection, - target=target, - db_schema=db_schema, - regconfig=regconfig, - ) - register_optional_fulltext_columns() - except Exception as exc: - raise FullTextError( - "Full-text populate failed for PostgreSQL sidecar columns. " - f"Underlying error: {exc.__class__.__name__}: {exc}" - ) from exc - - results: list[FullTextResult] = [] - for target in FULLTEXT_TARGETS: - row_count = None if dry_run else row_counts.get(target.name) - results.append( - FullTextResult( - target_name=target.name, - table_name=target.table_name, - source_column_name=target.source_column_name, - vector_column_name=target.vector_column_name, - index_name=target.index_name, - action=FullTextAction.POPULATE, - status="planned" if dry_run else "applied", - detail=( - "tsvector column would be populated from source text" - if dry_run - else "tsvector column populated from source text" - ), - row_count=row_count, - ) - ) - return tuple(results) - - -def _drop_target( - connection: sa.Connection, - *, - target: FullTextTarget, - db_schema: str | None, - drop_indexes: bool, -) -> None: - if drop_indexes: - connection.exec_driver_sql( - f"DROP INDEX IF EXISTS {_qualified_index_name(target.index_name, db_schema)}" - ) - connection.exec_driver_sql( - f""" - ALTER TABLE {_qualified_table_name(target.table_name, db_schema)} - DROP COLUMN IF EXISTS {target.vector_column_name} - """ - ) - - -def drop_fulltext_columns( - engine: Engine, - *, - db_schema: str | None = None, - drop_indexes: bool = True, - dry_run: bool = False, -) -> tuple[FullTextResult, ...]: - _ensure_supported_backend(engine) - - try: - if not dry_run: - with engine.begin() as connection: - for target in FULLTEXT_TARGETS: - _drop_target( - connection, - target=target, - db_schema=db_schema, - drop_indexes=drop_indexes, - ) - unregister_optional_fulltext_columns() - except Exception as exc: - raise FullTextError( - "Full-text drop failed for PostgreSQL sidecar columns. " - f"Underlying error: {exc.__class__.__name__}: {exc}" - ) from exc - - results: list[FullTextResult] = [] - for target in FULLTEXT_TARGETS: - results.append( - FullTextResult( - target_name=target.name, - table_name=target.table_name, - source_column_name=target.source_column_name, - vector_column_name=target.vector_column_name, - index_name=target.index_name, - action=FullTextAction.DROP, - status="planned" if dry_run else "applied", - detail=( - "tsvector column would be dropped" - if dry_run and not drop_indexes - else "tsvector column and GIN index would be dropped" - if dry_run - else "tsvector column dropped" - if not drop_indexes - else "tsvector column and GIN index dropped" - ), - ) - ) - return tuple(results) - - -__all__ = [ - "CONCEPT_NAME_TSVECTOR_COLUMN", - "CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN", - "FULLTEXT_TARGETS", - "FullTextAction", - "FullTextError", - "FullTextResult", - "FullTextTarget", - "concept_name_tsvector_expression", - "concept_synonym_name_tsvector_expression", - "drop_fulltext_columns", - "install_fulltext_columns", - "populate_fulltext_columns", - "register_optional_fulltext_columns", - "unregister_optional_fulltext_columns", -] diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index 1cbd66f..95149dd 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -1,8 +1,6 @@ -import os -from collections.abc import Mapping from dotenv import load_dotenv from pathlib import Path -import sqlalchemy as sa + from orm_loader.helpers import get_logger ROOT_PATH = Path(__file__).parent @@ -10,14 +8,6 @@ logger = get_logger(__name__) -# from orm-loader 0.4.0 onwards, implicit psycopg2 dependency has been removed in favor of explicit driver modules. -# This mapping is used to provide clearer error messages when a required driver is missing. -POSTGRES_DRIVER_MODULES: Mapping[str, str] = { - "postgresql": "psycopg", # bare URL aliased to psycopg - "postgresql+psycopg": "psycopg", - "postgresql+psycopg2": "psycopg2", # retained so missing-driver message is clear -} - def load_environment(dotenv: str = '') -> None: """ Explicitly load environment variables for the application. @@ -29,78 +19,3 @@ def load_environment(dotenv: str = '') -> None: else: logger.debug("No .env file loaded") - -def get_engine_name(schema: str | None = None) -> str: - """ - Resolve database engine URI. - - Resolution order: - 1. ENGINE_ (if schema provided) - 2. ENGINE (fallback / legacy) - - Raises if nothing is configured. - """ - if schema: - key = f"ENGINE_{schema.upper()}" - engine = os.getenv(key) - if engine: - logger.info("Database engine configured for schema '%s'", schema) - return engine - else: - logger.debug( - "No schema-specific engine found for '%s' (%s)", - schema, - key, - ) - - engine = os.getenv("ENGINE") - if engine: - logger.info("Default database engine configured") - return engine - - raise RuntimeError( - f"No database engine configured" - + (f" for schema '{schema}'" if schema else "") - ) - - -def _missing_driver_message( - engine_name: str, - exc: ModuleNotFoundError, -) -> str | None: - drivername = sa.engine.make_url(engine_name).drivername - expected_module = POSTGRES_DRIVER_MODULES.get(drivername) - if expected_module is None: - return None - - missing_module = exc.name - if missing_module is None and expected_module in str(exc): - missing_module = expected_module - - if missing_module != expected_module: - return None - - return ( - f"Database driver '{expected_module}' is required for engine " - f"'{drivername}' but is not installed. " - "Install PostgreSQL support with " - "`uv sync --extra postgres` " - "or " - "`pip install -e '.[postgres]'`." - ) - - -def create_engine_with_dependencies( - engine_name: str, - **engine_kwargs, -) -> sa.Engine: - """ - Create a SQLAlchemy engine with clearer dependency errors for postgres. - """ - try: - return sa.create_engine(engine_name, **engine_kwargs) - except ModuleNotFoundError as exc: - message = _missing_driver_message(engine_name, exc) - if message is not None: - raise RuntimeError(message) from exc - raise diff --git a/omop_alchemy/db.py b/omop_alchemy/db.py new file mode 100644 index 0000000..da6517e --- /dev/null +++ b/omop_alchemy/db.py @@ -0,0 +1,253 @@ +from __future__ import annotations +from dataclasses import dataclass, asdict +from typing import Optional +from collections.abc import Mapping +import json +import os +from pathlib import Path +import tomllib + +from sqlalchemy.engine import Engine +import sqlalchemy as sa + +from .config import load_environment +import logging +logger = logging.getLogger(__name__) + +DEFAULTS_FILENAME = ".omop-maint.toml" +DEFAULTS_ENV_VAR = "OMOP_MAINT_DEFAULTS_FILE" +DEFAULTS_SECTION = "defaults" +LEGACY_DEFAULTS_SECTION = "connection" +PROJECT_MARKER = "pyproject.toml" + + +def defaults_path() -> Path: + configured_path = os.getenv(DEFAULTS_ENV_VAR) + if configured_path: + return Path(configured_path).expanduser().resolve() + + current = Path.cwd().resolve() + for directory in (current, *current.parents): + if (directory / PROJECT_MARKER).exists(): + return (directory / DEFAULTS_FILENAME).resolve() + + return (current / DEFAULTS_FILENAME).resolve() + +def _clean(value: object) -> str | None: + if value is None: + return None + value_str = str(value).strip() + return value_str or None + +def _relative_path_for_storage(config_path: Path, value: str | None) -> str | None: + cleaned = _clean(value) + if cleaned is None: + return None + + path_value = Path(cleaned).expanduser() + if not path_value.is_absolute(): + path_value = (Path.cwd() / path_value).resolve() + + return path_value.relative_to(config_path.parent).as_posix() + +def _resolve_relative_path(config_path: Path, value: object) -> str | None: + cleaned = _clean(value) + if cleaned is None: + return None + + path_value = Path(cleaned).expanduser() + if path_value.is_absolute(): + return str(path_value) + + return str((config_path.parent / path_value).resolve()) + +@dataclass(frozen=True) +class ConnectionDefaults: + """ + + Returns: + _type_: _description_ + """ + dotenv: Optional[str] = None + engine_schema: Optional[str] = None + db_schema: Optional[str] = None + athena_source: Optional[str] = None + logging: Optional[str] = None + + def to_dict(self) -> dict[str, Optional[str]]: + return asdict(self) + + def save(self) -> Path: + path = defaults_path() + path.parent.mkdir(parents=True, exist_ok=True) + + lines = [f"[{DEFAULTS_SECTION}]"] + dotenv = _relative_path_for_storage(path, self.dotenv) + if dotenv is not None: + lines.append(f"dotenv = {json.dumps(dotenv)}") + if self.engine_schema is not None: + lines.append(f"engine_schema = {json.dumps(self.engine_schema)}") + if self.db_schema is not None: + lines.append(f"db_schema = {json.dumps(self.db_schema)}") + athena_source = _relative_path_for_storage(path, self.athena_source) + if athena_source is not None: + lines.append(f"athena_source = {json.dumps(athena_source)}") + if self.logging is not None: + lines.append(f"logging = {json.dumps(self.logging)}") + lines.append("") + path.write_text("\n".join(lines), encoding="utf-8") + return path + + @classmethod + def load(cls) -> ConnectionDefaults: + path = defaults_path() + if not path.exists(): + return ConnectionDefaults() + + data = tomllib.loads(path.read_text(encoding="utf-8")) + defaults = data.get(DEFAULTS_SECTION, {}) + connection = data.get(LEGACY_DEFAULTS_SECTION, {}) + + if not isinstance(defaults, dict): + defaults = {} + if not isinstance(connection, dict): + connection = {} + + return ConnectionDefaults( + dotenv=_resolve_relative_path( + path, + defaults.get("dotenv", connection.get("dotenv")), + ), + engine_schema=_clean(defaults.get("engine_schema", connection.get("engine_schema"))), + db_schema=_clean(defaults.get("db_schema", connection.get("db_schema"))), + athena_source=_resolve_relative_path( + path, + defaults.get("athena_source", connection.get("athena_source")), + ), + logging=_clean(defaults.get("logging", connection.get("logging"))), + ) + + @classmethod + def update_and_save_defaults( + cls, + *, + dotenv: Optional[str] = None, + engine_schema: Optional[str] = None, + db_schema: Optional[str] = None, + athena_source: Optional[str] = None, + logging: Optional[str] = None, + ) -> tuple[ConnectionDefaults, Path]: + """Loads current defaults, allows update of any subset of values, and returns updated defaults after it has been saved.""" + current = cls.load() + updated = ConnectionDefaults( + dotenv=dotenv if dotenv is not None else current.dotenv, + engine_schema=engine_schema if engine_schema is not None else current.engine_schema, + db_schema=db_schema if db_schema is not None else current.db_schema, + athena_source=athena_source if athena_source is not None else current.athena_source, + logging=logging if logging is not None else current.logging, + ) + path = updated.save() + return updated, path + + +def resolve_connection( + *, + dotenv: str | None, + engine_schema: str | None, + db_schema: str | None, + athena_source: str | None = None, +) -> ConnectionDefaults: + saved = ConnectionDefaults.load() + return ConnectionDefaults( + dotenv=dotenv if dotenv is not None else saved.dotenv, + engine_schema=engine_schema if engine_schema is not None else saved.engine_schema, + db_schema=db_schema if db_schema is not None else saved.db_schema, + athena_source=athena_source if athena_source is not None else saved.athena_source, + ) + + +def get_engine_name(schema: str | None = None) -> str: + """ + Resolve database engine URI. + + Resolution order: + 1. ENGINE_ (if schema provided) + 2. ENGINE (fallback / legacy) + + Raises if nothing is configured. + """ + if schema: + key = f"ENGINE_{schema.upper()}" + engine = os.getenv(key) + if engine: + logger.info("Database engine configured for schema '%s'", schema) + return engine + else: + logger.debug( + "No schema-specific engine found for '%s' (%s)", + schema, + key, + ) + + engine = os.getenv("ENGINE") + if engine: + logger.info("Default database engine configured") + return engine + + raise RuntimeError(f"No database engine configured for {'schema ' + schema if schema else 'default'}. ") + + +def _missing_driver_message( + engine_name: str, + exc: ModuleNotFoundError, +) -> str | None: + drivername = sa.engine.make_url(engine_name).drivername + expected_module = POSTGRES_DRIVER_MODULES.get(drivername) + if expected_module is None: + return None + + missing_module = exc.name + if missing_module is None and expected_module in str(exc): + missing_module = expected_module + + if missing_module != expected_module: + return None + + return ( + f"Database driver '{expected_module}' is required for engine " + f"'{drivername}' but is not installed. " + "Install PostgreSQL support with " + "`uv sync --extra postgres` " + "or " + "`pip install -e '.[postgres]'`." + ) + +def build_engine(*, dotenv: str | None, engine_schema: str | None) -> Engine: + load_environment(dotenv or "") + return create_engine_with_dependencies(get_engine_name(engine_schema), future=True) + + +def create_engine_with_dependencies( + engine_name: str, + **engine_kwargs, +) -> sa.Engine: + """ + Create a SQLAlchemy engine with clearer dependency errors for postgres. + """ + try: + return sa.create_engine(engine_name, **engine_kwargs) + except ModuleNotFoundError as exc: + message = _missing_driver_message(engine_name, exc) + if message is not None: + raise RuntimeError(message) from exc + raise + +# from orm-loader 0.4.0 onwards, implicit psycopg2 dependency has been removed in favor of explicit driver modules. +# This mapping is used to provide clearer error messages when a required driver is missing. +POSTGRES_DRIVER_MODULES: Mapping[str, str] = { + "postgresql": "psycopg", # bare URL aliased to psycopg + "postgresql+psycopg": "psycopg", + "postgresql+psycopg2": "psycopg2", # retained so missing-driver message is clear +} + + diff --git a/omop_alchemy/logger_config.py b/omop_alchemy/logger_config.py new file mode 100644 index 0000000..64e3375 --- /dev/null +++ b/omop_alchemy/logger_config.py @@ -0,0 +1,26 @@ +import functools +import logging + +from .db import ConnectionDefaults, defaults_path + +@functools.lru_cache(maxsize=None) +def configure_logging() -> None: + mode = (ConnectionDefaults.load().logging or "file").strip().lower() + if mode not in {"file", "console", "off"}: + mode = "file" + if mode == "off": + return + + formatter = logging.Formatter("%(asctime)s | %(levelname)-8s | %(name)s | %(message)s") + if mode == "file": + log_path = defaults_path().parent / "logging" / "omop-alchemy.log" + log_path.parent.mkdir(parents=True, exist_ok=True) + handler: logging.Handler = logging.FileHandler(log_path, encoding="utf-8") + else: + handler = logging.StreamHandler() + handler.setFormatter(formatter) + + root_logger = logging.getLogger() + root_logger.addHandler(handler) + if root_logger.level in {logging.NOTSET, logging.WARNING, logging.ERROR, logging.CRITICAL}: + root_logger.setLevel(logging.INFO) \ No newline at end of file diff --git a/omop_alchemy/maintenance/__init__.py b/omop_alchemy/maintenance/__init__.py index 87be939..e7d58f2 100644 --- a/omop_alchemy/maintenance/__init__.py +++ b/omop_alchemy/maintenance/__init__.py @@ -1,26 +1,17 @@ from .cli_backup import ( BackupFormat, - DatabaseBackupResult, - DatabaseRestoreResult, - create_database_backup, - restore_database_backup, + BackupResult, ) from .cli_config import ( ConnectionDefaults, - clear_connection_defaults, defaults_path, - load_connection_defaults, - save_connection_defaults, ) from .cli_foreign_keys import ( - ForeignKeyAction, ForeignKeyConstraintViolation, ForeignKeyManagementResult, ForeignKeyStatusResult, - ForeignKeyTarget, ForeignKeyValidationReport, ForeignKeyValidationResult, - collect_foreign_key_targets, collect_foreign_key_trigger_status, manage_foreign_key_triggers, validate_foreign_key_constraints, @@ -68,7 +59,6 @@ "analyze_tables", "collect_data_summary", "collect_doctor_report", - "collect_foreign_key_targets", "collect_foreign_key_trigger_status", "validate_foreign_key_constraints", "collect_maintenance_info", @@ -77,10 +67,7 @@ "collect_missing_tables", "reconcile_schema", "collect_sequence_targets", - "create_database_backup", "create_missing_tables", - "restore_database_backup", - "clear_connection_defaults", "manage_foreign_key_triggers", "manage_indexes", "load_vocab_source", @@ -90,13 +77,10 @@ "defaults_path", "AnalyzeTableResult", "BackupFormat", - "DatabaseBackupResult", - "DatabaseRestoreResult", - "ForeignKeyAction", + "BackupResult", "ForeignKeyConstraintViolation", "ForeignKeyManagementResult", "ForeignKeyStatusResult", - "ForeignKeyTarget", "ForeignKeyValidationReport", "ForeignKeyValidationResult", "IndexAction", @@ -121,6 +105,4 @@ "TableCreationResult", "TableSummaryResult", "TruncateTableResult", - "load_connection_defaults", - "save_connection_defaults", ] diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index bc6d29c..2863194 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -1,41 +1,20 @@ from __future__ import annotations - -import functools -import logging - import typer -from sqlalchemy.engine import Engine +from typing import Optional +from sqlalchemy import Engine from sqlalchemy.exc import SQLAlchemyError +from rich.console import Console -from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment - -from .cli_config import ConnectionDefaults, defaults_path, load_connection_defaults from .tables import TableScope -from .ui import console, render_error - - -def resolve_connection( - *, - dotenv: str | None, - engine_schema: str | None, - db_schema: str | None, - athena_source: str | None = None, -) -> ConnectionDefaults: - saved = load_connection_defaults() - return ConnectionDefaults( - dotenv=dotenv if dotenv is not None else saved.dotenv, - engine_schema=engine_schema if engine_schema is not None else saved.engine_schema, - db_schema=db_schema if db_schema is not None else saved.db_schema, - athena_source=athena_source if athena_source is not None else saved.athena_source, - ) - - -def build_engine(*, dotenv: str | None, engine_schema: str | None) -> Engine: - load_environment(dotenv or "") - return create_engine_with_dependencies(get_engine_name(engine_schema), future=True) +from .ui import console, render_error, render_command_header +from ..db import build_engine, resolve_connection, ConnectionDefaults +from ..backends import BackendNotSupportedError def handle_error(exc: Exception) -> None: + if isinstance(exc, BackendNotSupportedError): + console.print(render_error(f"Not supported: {exc}")) + raise typer.Exit(code=1) from exc if isinstance(exc, RuntimeError): console.print(render_error(str(exc))) raise typer.Exit(code=1) from exc @@ -63,24 +42,35 @@ def resolve_selection( return scope or default_scope, None -@functools.lru_cache(maxsize=None) -def configure_logging() -> None: - mode = (load_connection_defaults().logging or "file").strip().lower() - if mode not in {"file", "console", "off"}: - mode = "file" - if mode == "off": - return - - formatter = logging.Formatter("%(asctime)s | %(levelname)-8s | %(name)s | %(message)s") - if mode == "file": - log_path = defaults_path().parent / "logging" / "omop-alchemy.log" - log_path.parent.mkdir(parents=True, exist_ok=True) - handler: logging.Handler = logging.FileHandler(log_path, encoding="utf-8") - else: - handler = logging.StreamHandler() - handler.setFormatter(formatter) +def setup_cli_cmd( + *, + console: Console, + dotenv: Optional[str], + engine_schema: Optional[str], + db_schema: Optional[str], + command_name: str, + vocabulary_included: Optional[bool], + mode_label: str, + athena_source: Optional[str] = None, +) -> tuple[ConnectionDefaults, Engine]: + """Convenience function to resolve connection, print command header, and build engine for CLI commands.""" + + conn = resolve_connection( + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, + athena_source=athena_source, + ) + console.print( + render_command_header( + command_name=command_name, + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=vocabulary_included, + mode_label=mode_label, + ) + ) + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + return conn, engine - root_logger = logging.getLogger() - root_logger.addHandler(handler) - if root_logger.level in {logging.NOTSET, logging.WARNING, logging.ERROR, logging.CRITICAL}: - root_logger.setLevel(logging.INFO) + diff --git a/omop_alchemy/maintenance/cli.py b/omop_alchemy/maintenance/cli.py index a403a51..517a1b6 100644 --- a/omop_alchemy/maintenance/cli.py +++ b/omop_alchemy/maintenance/cli.py @@ -12,7 +12,7 @@ cli_tables as tables, cli_vocab as vocab, ) -from ._cli_utils import configure_logging +from ..logger_config import configure_logging from .help import install_help_customizations install_help_customizations() @@ -20,8 +20,7 @@ app = typer.Typer( help=( "OMOP Alchemy maintenance utilities.\n\n" - "PostgreSQL-only commands: reset-sequences, truncate-tables, " - "foreign-keys, backup-database, restore-database, fulltext." + "Some commands require backend-specific support — see individual command help for details." ), rich_markup_mode="rich", ) diff --git a/omop_alchemy/maintenance/cli_backup.py b/omop_alchemy/maintenance/cli_backup.py index 57fea20..4721cb3 100644 --- a/omop_alchemy/maintenance/cli_backup.py +++ b/omop_alchemy/maintenance/cli_backup.py @@ -3,27 +3,26 @@ from dataclasses import dataclass from datetime import datetime from enum import StrEnum -import os from pathlib import Path -import shutil import subprocess import sqlalchemy as sa import typer -from ..backend_support import Dialect, POSTGRESQL_ONLY_HELP, require_backend -from ._cli_utils import build_engine, handle_error, resolve_connection +from ..backends import resolve_backend, require_backend_support, backend_support_note +from ._cli_utils import handle_error, setup_cli_cmd from .ui import ( console, render_backup_result, render_backup_summary, - render_command_header, render_restore_result, render_restore_summary, ) class BackupFormat(StrEnum): + """Supported pg_dump/psql output formats.""" + CUSTOM = "custom" PLAIN = "plain" @@ -35,22 +34,11 @@ class BackupFormat(StrEnum): @dataclass(frozen=True) -class DatabaseBackupResult: - output_path: str - format: BackupFormat - status: str - detail: str - database_name: str - backend: str - schema_name: str | None - command: tuple[str, ...] - tool_path: str - +class BackupResult: + """Metadata and outcome for a single backup or restore operation.""" -@dataclass(frozen=True) -class DatabaseRestoreResult: - input_path: str - format: BackupFormat + file_path: str + backup_format: BackupFormat status: str detail: str database_name: str @@ -60,167 +48,28 @@ class DatabaseRestoreResult: tool_path: str -def _pg_dump_path() -> str: - tool_path = shutil.which("pg_dump") - if tool_path is None: - raise RuntimeError( - "The `pg_dump` executable is required for database backups but was not found on PATH. " - "Install PostgreSQL client tools and ensure `pg_dump` is available." - ) - return tool_path - - -def _pg_restore_path() -> str: - tool_path = shutil.which("pg_restore") - if tool_path is None: - raise RuntimeError( - "The `pg_restore` executable is required to restore custom PostgreSQL dumps but was not found on PATH. " - "Install PostgreSQL client tools and ensure `pg_restore` is available." - ) - return tool_path - - -def _psql_path() -> str: - tool_path = shutil.which("psql") - if tool_path is None: - raise RuntimeError( - "The `psql` executable is required to restore plain SQL PostgreSQL dumps but was not found on PATH. " - "Install PostgreSQL client tools and ensure `psql` is available." - ) - return tool_path - - -def _default_output_path(format: BackupFormat) -> Path: +def _default_output_path(backup_format: BackupFormat) -> Path: + """Return a timestamped default output path in the current directory matching the chosen backup format.""" timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - return Path.cwd() / f"omop-alchemy-backup-{timestamp}{FORMAT_SUFFIXES[format]}" - - -def _libpq_connection_uri(url: sa.engine.URL) -> str: - if not url.database: - raise RuntimeError( - "Database backup requires a database name in the configured engine URL." - ) - - libpq_url = sa.engine.URL.create( - drivername="postgresql", - username=url.username, - password=None, - host=url.host, - port=url.port, - database=url.database, - query=url.query, - ) - return libpq_url.render_as_string(hide_password=False) - - -def _build_pg_dump_command( - *, - engine: sa.Engine, - output_path: Path, - format: BackupFormat, - db_schema: str | None, - tool_path: str, -) -> tuple[list[str], dict[str, str], str]: - url = engine.url - database_name = url.database - if not database_name: - raise RuntimeError("Database backup requires a database name in the configured engine URL.") - connection_uri = _libpq_connection_uri(url) - - command = [ - tool_path, - "--format", - format.value, - "--file", - str(output_path), - "--dbname", - connection_uri, - "--no-password", - "--no-owner", - "--no-privileges", - ] - - if db_schema: - command.extend(["--schema", db_schema]) - - env = os.environ.copy() - if url.password: - env["PGPASSWORD"] = url.password - - return command, env, database_name - - -def _restore_tool_path(format: BackupFormat) -> str: - if format == BackupFormat.CUSTOM: - return _pg_restore_path() - return _psql_path() - - -def _build_restore_command( - *, - engine: sa.Engine, - input_path: Path, - format: BackupFormat, - db_schema: str | None, - tool_path: str, -) -> tuple[list[str], dict[str, str], str]: - url = engine.url - database_name = url.database - if not database_name: - raise RuntimeError("Database restore requires a database name in the configured engine URL.") - connection_uri = _libpq_connection_uri(url) - - if format == BackupFormat.CUSTOM: - command = [ - tool_path, - "--dbname", - connection_uri, - "--no-password", - "--no-owner", - "--no-privileges", - "--exit-on-error", - ] - if db_schema: - command.extend(["--schema", db_schema]) - command.append(str(input_path)) - else: - command = [ - tool_path, - "--dbname", - connection_uri, - "--no-password", - "--set", - "ON_ERROR_STOP=1", - "--single-transaction", - ] - command.extend(["--file", str(input_path)]) - - env = os.environ.copy() - if url.password: - env["PGPASSWORD"] = url.password - - return command, env, database_name + return Path.cwd() / f"omop-alchemy-backup-{timestamp}{FORMAT_SUFFIXES[backup_format]}" def create_database_backup( engine: sa.Engine, *, output_path: str | Path | None = None, - format: BackupFormat = BackupFormat.CUSTOM, + backup_format: BackupFormat = BackupFormat.CUSTOM, db_schema: str | None = None, dry_run: bool = False, -) -> DatabaseBackupResult: - require_backend(engine, feature="Database backup", supported_dialects=(Dialect.POSTGRESQL,)) - tool_path = _pg_dump_path() - resolved_output_path = Path(output_path) if output_path is not None else _default_output_path(format) +) -> BackupResult: + """Create a database backup artifact at output_path; runs the subprocess unless dry_run is True.""" + backend = resolve_backend(engine) + require_backend_support(backend, "prepare_backup", "Database backup") + resolved_output_path = Path(output_path) if output_path is not None else _default_output_path(backup_format) resolved_output_path = resolved_output_path.expanduser().resolve() - command, env, database_name = _build_pg_dump_command( - engine=engine, - output_path=resolved_output_path, - format=format, - db_schema=db_schema, - tool_path=tool_path, + tool_path, command, env, database_name = backend.prepare_backup( + engine, str(resolved_output_path), backup_format.value, db_schema ) if not dry_run: @@ -233,9 +82,9 @@ def create_database_backup( "Database backup failed via `pg_dump`." + (f" {stderr}" if stderr else "") ) from exc - return DatabaseBackupResult( - output_path=str(resolved_output_path), - format=format, + return BackupResult( + file_path=str(resolved_output_path), + backup_format=backup_format, status="planned" if dry_run else "created", detail=( "Database backup would be created with pg_dump." @@ -254,22 +103,18 @@ def restore_database_backup( engine: sa.Engine, *, input_path: str | Path, - format: BackupFormat, + backup_format: BackupFormat, db_schema: str | None = None, dry_run: bool = False, -) -> DatabaseRestoreResult: - require_backend(engine, feature="Database restore", supported_dialects=(Dialect.POSTGRESQL,)) +) -> BackupResult: + """Restore a database backup; runs the subprocess unless dry_run is True.""" + backend = resolve_backend(engine) + require_backend_support(backend, "prepare_restore", "Database restore") resolved_input_path = Path(input_path).expanduser().resolve() if not resolved_input_path.exists(): raise RuntimeError(f"Backup artifact not found: {resolved_input_path}") - - tool_path = _restore_tool_path(format) - command, env, database_name = _build_restore_command( - engine=engine, - input_path=resolved_input_path, - format=format, - db_schema=db_schema, - tool_path=tool_path, + tool_path, command, env, database_name = backend.prepare_restore( + engine, str(resolved_input_path), backup_format.value, db_schema ) if not dry_run: @@ -281,9 +126,9 @@ def restore_database_backup( "Database restore failed." + (f" {stderr}" if stderr else "") ) from exc - return DatabaseRestoreResult( - input_path=str(resolved_input_path), - format=format, + return BackupResult( + file_path=str(resolved_input_path), + backup_format=backup_format, status="planned" if dry_run else "applied", detail=( "Database restore would be executed using PostgreSQL client tools." @@ -297,42 +142,58 @@ def restore_database_backup( tool_path=tool_path, ) - -app = typer.Typer(rich_markup_mode="rich") - - -@app.command( - "backup-database", - help=f"Create a PostgreSQL dump artifact that can be restored into another environment. {POSTGRESQL_ONLY_HELP}", +# --------------------------------------------------------------------------- +# CLI commands +# --------------------------------------------------------------------------- +app = typer.Typer( + rich_markup_mode="rich", + help=f"Manage database backup and restore operations. {backend_support_note('prepare_backup')}", ) + +@app.command("backup-database") def backup_database_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Optional schema-limited backup."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Restrict the backup to a single schema (pg_dump --schema). Only supported on PostgreSQL.", + ), output_path: str | None = typer.Option( None, - help="Backup artifact path. Defaults to a timestamped file in the current directory.", + help="Output path for the backup artifact. Defaults to a timestamped file in the current directory.", + ), + backup_format: BackupFormat = typer.Option( + BackupFormat.CUSTOM, + help="pg_dump output format. 'custom' produces a binary .dump file; 'plain' produces a plain SQL .sql file.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - format: BackupFormat = typer.Option(BackupFormat.CUSTOM, help="Backup format."), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Create a database backup that can be restored with `restore-database`.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="backup-database", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=None, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) - with console.status("Creating restore-ready PostgreSQL backup..."): + with console.status("Creating restore-ready database backup..."): result = create_database_backup( engine, output_path=output_path, - format=format, + backup_format=backup_format, db_schema=conn.db_schema, dry_run=dry_run, ) @@ -342,39 +203,47 @@ def backup_database_command( handle_error(exc) -@app.command( - "restore-database", - help=f"Restore a PostgreSQL backup artifact into the configured target database. {POSTGRESQL_ONLY_HELP}", -) +@app.command("restore-database") def restore_database_command( - input_path: str = typer.Argument(..., help="Backup artifact path to restore."), - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), + input_path: str = typer.Argument(help="Path to the backup artifact (.dump or .sql) to restore."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), db_schema: str | None = typer.Option( - None, help="Optional schema-limited restore for custom-format dumps." + None, + help="Restrict the restore to a single schema (pg_restore --schema). Only valid for custom-format dumps.", ), - format: BackupFormat = typer.Option( - ..., help="Restore format. Required: choose `custom` or `plain`." + backup_format: BackupFormat = typer.Option( + ..., + help="Format of the artifact to restore. Must match the format used when the backup was created.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Restore a database backup that was created with `backup-database`.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="restore-database", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=None, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) - with console.status("Restoring PostgreSQL backup artifact..."): + with console.status("Restoring database backup..."): result = restore_database_backup( engine, input_path=input_path, - format=format, + backup_format=backup_format, db_schema=conn.db_schema, dry_run=dry_run, ) diff --git a/omop_alchemy/maintenance/cli_config.py b/omop_alchemy/maintenance/cli_config.py index 595f570..5c56e01 100644 --- a/omop_alchemy/maintenance/cli_config.py +++ b/omop_alchemy/maintenance/cli_config.py @@ -1,13 +1,11 @@ from __future__ import annotations - -from dataclasses import dataclass -import json -import os -from pathlib import Path -import tomllib - import typer +from typing import Optional +from ..db import ( + ConnectionDefaults, + defaults_path, +) from .ui import console, render_connection_defaults @@ -18,235 +16,49 @@ PROJECT_MARKER = "pyproject.toml" -@dataclass(frozen=True) -class ConnectionDefaults: - dotenv: str | None = None - engine_schema: str | None = None - db_schema: str | None = None - athena_source: str | None = None - logging: str | None = None - - def with_updates( - self, - *, - dotenv: str | None = None, - engine_schema: str | None = None, - db_schema: str | None = None, - athena_source: str | None = None, - logging: str | None = None, - ) -> "ConnectionDefaults": - return ConnectionDefaults( - dotenv=self.dotenv if dotenv is None else dotenv, - engine_schema=self.engine_schema if engine_schema is None else engine_schema, - db_schema=self.db_schema if db_schema is None else db_schema, - athena_source=( - self.athena_source if athena_source is None else athena_source - ), - logging=self.logging if logging is None else logging, - ) - - -def defaults_path() -> Path: - configured_path = os.getenv(DEFAULTS_ENV_VAR) - if configured_path: - return Path(configured_path).expanduser().resolve() - - current = Path.cwd().resolve() - for directory in (current, *current.parents): - if (directory / PROJECT_MARKER).exists(): - return (directory / DEFAULTS_FILENAME).resolve() - - return (current / DEFAULTS_FILENAME).resolve() - - -def _clean(value: object) -> str | None: - if value is None: - return None - value_str = str(value).strip() - return value_str or None - - -def _resolve_relative_path(config_path: Path, value: object) -> str | None: - cleaned = _clean(value) - if cleaned is None: - return None - - path_value = Path(cleaned).expanduser() - if path_value.is_absolute(): - return str(path_value) - - return str((config_path.parent / path_value).resolve()) - - -def _relative_path_for_storage(config_path: Path, value: str | None) -> str | None: - if value is None: - return None - - path_value = Path(value).expanduser() - if not path_value.is_absolute(): - path_value = (Path.cwd() / path_value).resolve() - - return os.path.relpath(path_value, start=config_path.parent) - - -def load_connection_defaults() -> ConnectionDefaults: - path = defaults_path() - if not path.exists(): - return ConnectionDefaults() - - data = tomllib.loads(path.read_text(encoding="utf-8")) - defaults = data.get(DEFAULTS_SECTION, {}) - connection = data.get(LEGACY_DEFAULTS_SECTION, {}) - - if not isinstance(defaults, dict): - defaults = {} - if not isinstance(connection, dict): - connection = {} - - return ConnectionDefaults( - dotenv=_resolve_relative_path( - path, - defaults.get("dotenv", defaults.get("env_path", connection.get("dotenv"))), - ), - engine_schema=_clean(defaults.get("engine_schema", connection.get("engine_schema"))), - db_schema=_clean(defaults.get("db_schema", connection.get("db_schema"))), - athena_source=_resolve_relative_path( - path, - defaults.get("athena_source", connection.get("athena_source")), - ), - logging=_clean(defaults.get("logging", connection.get("logging"))), - ) - - -def save_connection_defaults(defaults: ConnectionDefaults) -> Path: - path = defaults_path() - path.parent.mkdir(parents=True, exist_ok=True) - - lines = [f"[{DEFAULTS_SECTION}]"] - dotenv = _relative_path_for_storage(path, defaults.dotenv) - if dotenv is not None: - lines.append(f"dotenv = {json.dumps(dotenv)}") - if defaults.engine_schema is not None: - lines.append(f"engine_schema = {json.dumps(defaults.engine_schema)}") - if defaults.db_schema is not None: - lines.append(f"db_schema = {json.dumps(defaults.db_schema)}") - athena_source = _relative_path_for_storage(path, defaults.athena_source) - if athena_source is not None: - lines.append(f"athena_source = {json.dumps(athena_source)}") - if defaults.logging is not None: - lines.append(f"logging = {json.dumps(defaults.logging)}") - lines.append("") - - path.write_text("\n".join(lines), encoding="utf-8") - return path - - -def clear_connection_defaults( - *, - clear_dotenv: bool = False, - clear_engine_schema: bool = False, - clear_db_schema: bool = False, - clear_athena_source: bool = False, -) -> Path | None: - path = defaults_path() - if not path.exists(): - return None - - if not any((clear_dotenv, clear_engine_schema, clear_db_schema, clear_athena_source)): - path.unlink() - return path - - current = load_connection_defaults() - updated = ConnectionDefaults( - dotenv=None if clear_dotenv else current.dotenv, - engine_schema=None if clear_engine_schema else current.engine_schema, - db_schema=None if clear_db_schema else current.db_schema, - athena_source=None if clear_athena_source else current.athena_source, - logging=current.logging, - ) - - if all( - value is None - for value in ( - updated.dotenv, - updated.engine_schema, - updated.db_schema, - updated.athena_source, - updated.logging, - ) - ): - path.unlink() - return path - - save_connection_defaults(updated) - return path - - app = typer.Typer( - help="Manage persisted maintenance CLI connection overrides.", + help="Manage persisted maintenance CLI connection overrides stored in .omop-maint.toml.", rich_markup_mode="rich", ) @app.command("show") def config_show_command() -> None: - """Display current saved connection defaults.""" - defaults = load_connection_defaults() + """Display current saved connection defaults from the nearest .omop-maint.toml file.""" + defaults = ConnectionDefaults.load() console.print(render_connection_defaults(defaults, path=str(defaults_path()))) -@app.command("set-overrides") -def config_set_overrides_command( - dotenv: str | None = typer.Option(None, help="Override dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Override engine schema selector."), - db_schema: str | None = typer.Option(None, help="Override database schema."), - athena_source: str | None = typer.Option( - None, help="Override path to unzipped Athena vocabulary files." +@app.command("override") +def config_override_command( + dotenv: Optional[str] = typer.Option( + None, + help=( + "Path to a .env file to load before resolving the connection. " + "Saved as a path relative to .omop-maint.toml and resolved back to absolute on load." + ), + ), + engine_schema: Optional[str] = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: Optional[str] = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + athena_source: Optional[str] = typer.Option( + None, + help=( + "Path to the unzipped Athena vocabulary CSV directory. " + "Saved relative to .omop-maint.toml; used by load-vocab-source when --athena-source is omitted." + ), ), ) -> None: - """Save one or more connection defaults to the project config file.""" - current = load_connection_defaults() - updated = current.with_updates( + """Persist one or more connection overrides to .omop-maint.toml for future CLI invocations.""" + updated, path = ConnectionDefaults.update_and_save_defaults( dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema, athena_source=athena_source, ) - path = save_connection_defaults(updated) console.print(render_connection_defaults(updated, path=str(path), title="Saved Overrides")) - - -@app.command("clear-overrides") -def config_clear_overrides_command( - dotenv: bool = typer.Option(False, "--dotenv", help="Clear overridden dotenv."), - engine_schema: bool = typer.Option( - False, "--engine-schema", help="Clear overridden engine schema." - ), - db_schema: bool = typer.Option(False, "--db-schema", help="Clear overridden database schema."), - athena_source: bool = typer.Option( - False, "--athena-source", help="Clear overridden Athena source path." - ), -) -> None: - """Clear one or more saved connection overrides.""" - path = clear_connection_defaults( - clear_dotenv=dotenv, - clear_engine_schema=engine_schema, - clear_db_schema=db_schema, - clear_athena_source=athena_source, - ) - if path is None: - console.print( - render_connection_defaults( - ConnectionDefaults(), - path=str(defaults_path()), - title="Overrides Already Clear", - ) - ) - return - console.print( - render_connection_defaults( - load_connection_defaults(), - path=str(path), - title="Overrides Cleared", - ) - ) diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index 1d8f6fd..670efdb 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -1,18 +1,16 @@ from __future__ import annotations from dataclasses import dataclass -from enum import StrEnum import sqlalchemy as sa import typer -from ..backend_support import Dialect, POSTGRESQL_ONLY_HELP, require_backend -from ._cli_utils import build_engine, handle_error, resolve_connection +from ..db import build_engine, resolve_connection +from ..backends import Backend, resolve_backend, require_backend_support, backend_support_note +from ._cli_utils import handle_error, setup_cli_cmd from .tables import ( - MaintenanceTable, TableCategory, existing_maintenance_tables, - qualified_table_name, ) from .ui import ( console, @@ -28,60 +26,49 @@ ) -class ForeignKeyAction(StrEnum): - DISABLE = "disable" - ENABLE = "enable" - - @dataclass(frozen=True) -class ForeignKeyTarget: +class ForeignKeyBase: + """Identity and CDM category metadata shared across all foreign key result types.""" + table_name: str category: TableCategory model_name: str model_module: str + + +@dataclass(frozen=True) +class _FKTableInfo(ForeignKeyBase): + """Internal snapshot of a table's outgoing and incoming FK constraint counts, used to drive trigger management.""" + outgoing_constraint_count: int incoming_constraint_count: int @dataclass(frozen=True) -class ForeignKeyManagementResult: - table_name: str - category: TableCategory - model_name: str - model_module: str +class ForeignKeyManagementResult(ForeignKeyBase): + """Outcome of a FK trigger enable or disable operation for one table.""" + outgoing_constraint_count: int incoming_constraint_count: int - action: ForeignKeyAction + enable: bool status: str detail: str @dataclass(frozen=True) -class ForeignKeyStatusResult: - table_name: str - category: TableCategory - model_name: str - model_module: str - disabled_trigger_count: int - enabled_trigger_count: int +class ForeignKeyStatusResult(ForeignKeyBase): + """Current FK trigger state for one table: counts of disabled vs enabled PostgreSQL RI triggers.""" + outgoing_constraint_count: int incoming_constraint_count: int + disabled_trigger_count: int + enabled_trigger_count: int @dataclass(frozen=True) -class ForeignKeyConstraintViolation: - source_table_name: str - referred_table_name: str - constraint_name: str - violation_count: int +class ForeignKeyValidationResult(ForeignKeyBase): + """FK constraint validation outcome for one table, with counts of violating constraints and rows.""" - -@dataclass(frozen=True) -class ForeignKeyValidationResult: - table_name: str - category: TableCategory - model_name: str - model_module: str outgoing_constraint_count: int incoming_constraint_count: int violating_constraint_count: int @@ -91,36 +78,33 @@ class ForeignKeyValidationResult: @dataclass(frozen=True) -class ForeignKeyValidationReport: - results: tuple[ForeignKeyValidationResult, ...] - violations: tuple[ForeignKeyConstraintViolation, ...] +class ForeignKeyConstraintViolation: + """A single FK constraint that has referential integrity violations, with the violation row count.""" + source_table_name: str + referred_table_name: str + constraint_name: str + violation_count: int -def _ensure_postgresql_supported(engine: sa.Engine, *, feature: str) -> None: - require_backend(engine, feature=feature, supported_dialects=(Dialect.POSTGRESQL,)) +@dataclass(frozen=True) +class ForeignKeyValidationReport: + """Complete FK validation report: per-table results and the full flat list of violations.""" -def _selected_existing_tables( - inspector: sa.Inspector, - *, - db_schema: str | None, - vocabulary_included: bool, -) -> list[MaintenanceTable]: - return existing_maintenance_tables( - inspector, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) + results: tuple[ForeignKeyValidationResult, ...] + violations: tuple[ForeignKeyConstraintViolation, ...] -def collect_foreign_key_targets( +def _collect_fk_info( engine: sa.Engine, *, db_schema: str | None = None, vocabulary_included: bool = False, -) -> list[ForeignKeyTarget]: +) -> list[_FKTableInfo]: + """Return all ORM-managed tables that participate in at least one FK relationship (outgoing or incoming).""" inspector = sa.inspect(engine) - selected_tables = _selected_existing_tables( + + selected_tables = existing_maintenance_tables( inspector, db_schema=db_schema, vocabulary_included=vocabulary_included, @@ -143,7 +127,7 @@ def collect_foreign_key_targets( if referred_table is not None: incoming_counts[referred_table] += 1 - results: list[ForeignKeyTarget] = [] + results: list[_FKTableInfo] = [] for table in selected_tables: if table.table_name not in selected_names: continue @@ -152,7 +136,7 @@ def collect_foreign_key_targets( if outgoing_count == 0 and incoming_count == 0: continue results.append( - ForeignKeyTarget( + _FKTableInfo( table_name=table.table_name, category=table.category, model_name=table.model_name, @@ -167,12 +151,18 @@ def collect_foreign_key_targets( def _collect_strict_validation_failures( connection: sa.Connection, + backend: Backend, *, db_schema: str | None, vocabulary_included: bool, ) -> dict[str, list[ForeignKeyConstraintViolation]]: + """Query every FK constraint across selected tables and return a mapping of table name → violation list. + + Only tables with at least one violation are included in the returned dict. + Used by manage_foreign_key_triggers (strict=True) and validate_foreign_key_constraints. + """ inspector = sa.inspect(connection) - selected_tables = _selected_existing_tables( + selected_tables = existing_maintenance_tables( inspector, db_schema=db_schema, vocabulary_included=vocabulary_included, @@ -196,34 +186,13 @@ def _collect_strict_validation_failures( ): continue - source_table_name = qualified_table_name(table_name, db_schema) - referred_table_name = qualified_table_name(str(referred_table), db_schema) - non_null_predicate = " AND ".join( - f"src.{column_name} IS NOT NULL" - for column_name in constrained_columns - ) - join_predicate = " AND ".join( - f"ref.{referred_column} = src.{constrained_column}" - for constrained_column, referred_column in zip( - constrained_columns, - referred_columns, - strict=True, - ) - ) - - violation_count = int( - connection.exec_driver_sql( - f""" - SELECT COUNT(*) - FROM {source_table_name} AS src - WHERE {non_null_predicate} - AND NOT EXISTS ( - SELECT 1 - FROM {referred_table_name} AS ref - WHERE {join_predicate} - ) - """ - ).scalar_one() + violation_count = backend.count_fk_violations( + connection, + table_name, + str(referred_table), + list(constrained_columns), + list(referred_columns), + db_schema, ) if violation_count == 0: @@ -246,6 +215,7 @@ def _collect_strict_validation_failures( def _strict_failure_detail(violations: list[ForeignKeyConstraintViolation]) -> str: + """Build the detail string used when strict mode aborts trigger enabling due to FK violations.""" constraint_summary = ", ".join( f"{violation.constraint_name} ({violation.violation_count})" for violation in violations[:3] @@ -262,6 +232,7 @@ def _strict_failure_detail(violations: list[ForeignKeyConstraintViolation]) -> s def _validation_failure_detail(violations: list[ForeignKeyConstraintViolation]) -> str: + """Build the per-table detail string for the validate command when violations are found.""" constraint_summary = ", ".join( f"{violation.constraint_name} ({violation.violation_count})" for violation in violations[:3] @@ -282,9 +253,11 @@ def validate_foreign_key_constraints( db_schema: str | None = None, vocabulary_included: bool = False, ) -> ForeignKeyValidationReport: - _ensure_postgresql_supported(engine, feature="Foreign key constraint validation") + """Count rows that violate each FK constraint and return a full per-table validation report.""" + backend = resolve_backend(engine) + require_backend_support(backend, "count_fk_violations", "FK constraint validation") - targets = collect_foreign_key_targets( + targets = _collect_fk_info( engine, db_schema=db_schema, vocabulary_included=vocabulary_included, @@ -293,6 +266,7 @@ def validate_foreign_key_constraints( with engine.connect() as connection: validation_failures = _collect_strict_validation_failures( connection, + backend, db_schema=db_schema, vocabulary_included=vocabulary_included, ) @@ -336,26 +310,28 @@ def validate_foreign_key_constraints( def manage_foreign_key_triggers( engine: sa.Engine, *, - action: ForeignKeyAction, + enable: bool = False, db_schema: str | None = None, vocabulary_included: bool = False, dry_run: bool = False, strict: bool = False, ) -> list[ForeignKeyManagementResult]: - _ensure_postgresql_supported(engine, feature="Foreign key trigger management") + """Enable or disable RI trigger enforcement; with strict=True, aborts on any FK violation.""" + backend = resolve_backend(engine) + require_backend_support(backend, "toggle_fk_triggers", "FK trigger management") - targets = collect_foreign_key_targets( + targets = _collect_fk_info( engine, db_schema=db_schema, vocabulary_included=vocabulary_included, ) - trigger_action = "DISABLE" if action is ForeignKeyAction.DISABLE else "ENABLE" results: list[ForeignKeyManagementResult] = [] with engine.begin() as connection: - if action is ForeignKeyAction.ENABLE and strict: + if enable and strict: validation_failures = _collect_strict_validation_failures( connection, + backend, db_schema=db_schema, vocabulary_included=vocabulary_included, ) @@ -370,7 +346,7 @@ def manage_foreign_key_triggers( model_module=target.model_module, outgoing_constraint_count=target.outgoing_constraint_count, incoming_constraint_count=target.incoming_constraint_count, - action=action, + enable=enable, status="failed" if violations else "skipped", detail=( _strict_failure_detail(violations) @@ -384,9 +360,9 @@ def manage_foreign_key_triggers( for target in targets: detail = ( "FK trigger enforcement would be disabled" - if action is ForeignKeyAction.DISABLE and dry_run + if not enable and dry_run else "FK trigger enforcement disabled" - if action is ForeignKeyAction.DISABLE + if not enable else "Strict FK validation passed; trigger enforcement would be enabled" if strict and dry_run else "Strict FK validation passed; trigger enforcement enabled" @@ -396,10 +372,7 @@ def manage_foreign_key_triggers( else "FK trigger enforcement enabled" ) if not dry_run: - connection.exec_driver_sql( - f"ALTER TABLE {qualified_table_name(target.table_name, db_schema)} " - f"{trigger_action} TRIGGER ALL" - ) + backend.toggle_fk_triggers(connection, target.table_name, db_schema, enable=enable) results.append( ForeignKeyManagementResult( @@ -409,7 +382,7 @@ def manage_foreign_key_triggers( model_module=target.model_module, outgoing_constraint_count=target.outgoing_constraint_count, incoming_constraint_count=target.incoming_constraint_count, - action=action, + enable=enable, status="planned" if dry_run else "applied", detail=detail, ) @@ -424,45 +397,30 @@ def collect_foreign_key_trigger_status( db_schema: str | None = None, vocabulary_included: bool = False, ) -> list[ForeignKeyStatusResult]: - _ensure_postgresql_supported(engine, feature="Foreign key trigger status inspection") + """Query pg_trigger to count disabled vs enabled RI triggers for each participating table.""" + backend = resolve_backend(engine) + require_backend_support(backend, "get_fk_trigger_counts", "FK trigger status inspection") - targets = collect_foreign_key_targets( + targets = _collect_fk_info( engine, db_schema=db_schema, vocabulary_included=vocabulary_included, ) - results: list[ForeignKeyStatusResult] = [] - query = sa.text( - """ - SELECT - SUM(CASE WHEN t.tgenabled = 'D' THEN 1 ELSE 0 END) AS disabled_count, - SUM(CASE WHEN t.tgenabled <> 'D' THEN 1 ELSE 0 END) AS enabled_count - FROM pg_trigger t - JOIN pg_class c ON c.oid = t.tgrelid - JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE t.tgisinternal - AND t.tgname LIKE 'RI_ConstraintTrigger%' - AND c.relname = :table_name - AND (:db_schema IS NULL OR n.nspname = :db_schema) - """ - ) with engine.connect() as connection: for target in targets: - disabled_count, enabled_count = connection.execute( - query, - {"table_name": target.table_name, "db_schema": db_schema}, - ).one() - + disabled_count, enabled_count = backend.get_fk_trigger_counts( + connection, target.table_name, db_schema + ) results.append( ForeignKeyStatusResult( table_name=target.table_name, category=target.category, model_name=target.model_name, model_module=target.model_module, - disabled_trigger_count=int(disabled_count or 0), - enabled_trigger_count=int(enabled_count or 0), + disabled_trigger_count=disabled_count, + enabled_trigger_count=enabled_count, outgoing_constraint_count=target.outgoing_constraint_count, incoming_constraint_count=target.incoming_constraint_count, ) @@ -476,60 +434,98 @@ def collect_foreign_key_trigger_status( # --------------------------------------------------------------------------- app = typer.Typer( - help=f"Manage PostgreSQL RI trigger enforcement for OMOP tables. {POSTGRESQL_ONLY_HELP}", + help=f"Manage RI trigger enforcement for OMOP tables. {backend_support_note('toggle_fk_triggers')}", rich_markup_mode="rich", ) - @app.command("disable") def disable_foreign_keys_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), + strict: bool = typer.Option( + False, + "--strict", + help="Validate all FK relationships and report violations before disabling trigger enforcement.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Disable PostgreSQL RI trigger enforcement for all participating OMOP tables.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="foreign-keys disable", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Managing PostgreSQL foreign key trigger enforcement..."): results = manage_foreign_key_triggers( engine, - action=ForeignKeyAction.DISABLE, + enable=False, db_schema=conn.db_schema, vocabulary_included=vocabulary_included, dry_run=dry_run, - strict=False, + strict=strict, ) console.print(render_foreign_key_results(results)) console.print(render_foreign_key_summary(results, dry_run=dry_run)) - console.print(render_foreign_key_note(ForeignKeyAction.DISABLE, strict=False)) + console.print(render_foreign_key_note(enable=False, strict=strict)) except Exception as exc: handle_error(exc) @app.command("enable") def enable_foreign_keys_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), strict: bool = typer.Option( False, "--strict", - help="Validate all selected foreign key relationships before enabling trigger enforcement.", + help="Validate all FK relationships before enabling trigger enforcement; aborts if any violations are found.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: + """Re-enable PostgreSQL RI trigger enforcement; use --strict to abort if any violations exist first.""" conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) console.print( render_command_header( @@ -550,7 +546,7 @@ def enable_foreign_keys_command( with console.status(status_msg): results = manage_foreign_key_triggers( engine, - action=ForeignKeyAction.ENABLE, + enable=True, db_schema=conn.db_schema, vocabulary_included=vocabulary_included, dry_run=dry_run, @@ -558,30 +554,42 @@ def enable_foreign_keys_command( ) console.print(render_foreign_key_results(results)) console.print(render_foreign_key_summary(results, dry_run=dry_run)) - console.print(render_foreign_key_note(ForeignKeyAction.ENABLE, strict=strict)) + console.print(render_foreign_key_note(enable=True, strict=strict)) except Exception as exc: handle_error(exc) @app.command("status") def foreign_key_status_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Show the current enabled/disabled state of RI triggers for each participating OMOP table.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="foreign-keys status", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="inspect", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Inspecting foreign key trigger status..."): results = collect_foreign_key_trigger_status( engine, @@ -594,28 +602,37 @@ def foreign_key_status_command( handle_error(exc) -@app.command( - "validate", - help="Validate selected foreign key relationships and report violating constraints.", -) +@app.command("validate") def foreign_key_validate_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Validate FK constraints on selected tables and report any rows that violate referential integrity.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="foreign-keys validate", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="inspect", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Validating selected foreign key relationships..."): report = validate_foreign_key_constraints( engine, diff --git a/omop_alchemy/maintenance/cli_fulltext.py b/omop_alchemy/maintenance/cli_fulltext.py index c87ad53..c4452b4 100644 --- a/omop_alchemy/maintenance/cli_fulltext.py +++ b/omop_alchemy/maintenance/cli_fulltext.py @@ -1,56 +1,234 @@ from __future__ import annotations import typer +from sqlalchemy.engine import Engine -from ..backend_support import POSTGRESQL_ONLY_HELP -from ._cli_utils import build_engine, handle_error, resolve_connection -from ..cdm.handlers.fulltext import ( - drop_fulltext_columns, - install_fulltext_columns, - populate_fulltext_columns, -) +from ..backends import backend_support_note as _backend_support_note +from ..backends import resolve_backend, require_backend_support +from ..backends.base import FullTextAction, FullTextError, FullTextResult +from ._cli_utils import handle_error, setup_cli_cmd from .ui import ( console, - render_command_header, render_fulltext_results, render_fulltext_summary, ) app = typer.Typer( - help=f"Manage PostgreSQL full-text sidecar tsvector columns for OMOP vocabulary tables. {POSTGRESQL_ONLY_HELP}", + help=f"Manage full-text search for OMOP vocabulary tables. {_backend_support_note('install_fulltext_on_table')}", rich_markup_mode="rich", ) +# ── Orchestrators ───────────────────────────────────────────────────────────── + +def install_fulltext_columns( + engine: Engine, + *, + db_schema: str | None = None, + create_indexes: bool = True, + fastupdate: bool = False, + dry_run: bool = False, +) -> tuple[FullTextResult, ...]: + """Install tsvector sidecar columns (and optionally GIN indexes) on OMOP vocabulary tables.""" + backend = resolve_backend(engine) + require_backend_support(backend, "install_fulltext_on_table", "Full-text search") + targets = backend.fulltext_targets + + try: + if not dry_run: + with engine.begin() as connection: + for cfg in targets: + backend.install_fulltext_on_table( + connection, + table_name=cfg.table_name, + vector_column_name=cfg.vector_column_name, + index_name=cfg.index_name, + db_schema=db_schema, + create_indexes=create_indexes, + fastupdate=fastupdate, + ) + backend.register_fulltext_metadata() + except FullTextError: + raise + except Exception as exc: + raise FullTextError( + f"Full-text install failed. Underlying error: {exc.__class__.__name__}: {exc}" + ) from exc + + return tuple( + FullTextResult( + target_name=cfg.table_name, + table_name=cfg.table_name, + source_column_name=cfg.source_column_name, + vector_column_name=cfg.vector_column_name, + index_name=cfg.index_name, + action=FullTextAction.INSTALL, + status="planned" if dry_run else "applied", + detail=( + "tsvector column would be installed" + if dry_run and not create_indexes + else "tsvector column and GIN index would be installed" + if dry_run + else "tsvector column installed" + if not create_indexes + else "tsvector column and GIN index installed" + ), + ) + for cfg in targets + ) + + +def populate_fulltext_columns( + engine: Engine, + *, + db_schema: str | None = None, + regconfig: str = "english", + dry_run: bool = False, +) -> tuple[FullTextResult, ...]: + """Populate tsvector sidecar columns with pre-computed search vectors.""" + backend = resolve_backend(engine) + require_backend_support(backend, "populate_fulltext_on_table", "Full-text search") + targets = backend.fulltext_targets + + row_counts: dict[str, int | None] = {} + try: + if not dry_run: + with engine.begin() as connection: + for cfg in targets: + row_counts[cfg.table_name] = backend.populate_fulltext_on_table( + connection, + table_name=cfg.table_name, + vector_column_name=cfg.vector_column_name, + source_column_name=cfg.source_column_name, + db_schema=db_schema, + regconfig=regconfig, + ) + backend.register_fulltext_metadata() + except FullTextError: + raise + except Exception as exc: + raise FullTextError( + f"Full-text populate failed. Underlying error: {exc.__class__.__name__}: {exc}" + ) from exc + + return tuple( + FullTextResult( + target_name=cfg.table_name, + table_name=cfg.table_name, + source_column_name=cfg.source_column_name, + vector_column_name=cfg.vector_column_name, + index_name=cfg.index_name, + action=FullTextAction.POPULATE, + status="planned" if dry_run else "applied", + detail=( + "tsvector column would be populated from source text" + if dry_run + else "tsvector column populated from source text" + ), + row_count=None if dry_run else row_counts.get(cfg.table_name), + ) + for cfg in targets + ) + + +def drop_fulltext_columns( + engine: Engine, + *, + db_schema: str | None = None, + drop_indexes: bool = True, + dry_run: bool = False, +) -> tuple[FullTextResult, ...]: + """Remove tsvector sidecar columns and their associated GIN indexes.""" + backend = resolve_backend(engine) + require_backend_support(backend, "drop_fulltext_on_table", "Full-text search") + targets = backend.fulltext_targets + + try: + if not dry_run: + with engine.begin() as connection: + for cfg in targets: + backend.drop_fulltext_on_table( + connection, + table_name=cfg.table_name, + vector_column_name=cfg.vector_column_name, + index_name=cfg.index_name, + db_schema=db_schema, + drop_indexes=drop_indexes, + ) + backend.unregister_fulltext_metadata() + except FullTextError: + raise + except Exception as exc: + raise FullTextError( + f"Full-text drop failed. Underlying error: {exc.__class__.__name__}: {exc}" + ) from exc + + return tuple( + FullTextResult( + target_name=cfg.table_name, + table_name=cfg.table_name, + source_column_name=cfg.source_column_name, + vector_column_name=cfg.vector_column_name, + index_name=cfg.index_name, + action=FullTextAction.DROP, + status="planned" if dry_run else "applied", + detail=( + "tsvector column would be dropped" + if dry_run and not drop_indexes + else "tsvector column and GIN index would be dropped" + if dry_run + else "tsvector column dropped" + if not drop_indexes + else "tsvector column and GIN index dropped" + ), + ) + for cfg in targets + ) + + +# ── CLI commands ────────────────────────────────────────────────────────────── + @app.command("install") def install_fulltext_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), create_indexes: bool = typer.Option( True, "--create-indexes/--no-create-indexes", - help="Create GIN indexes alongside the tsvector columns.", + help="Create GIN indexes alongside the tsvector columns for fast full-text search.", ), fastupdate: bool = typer.Option( False, "--fastupdate/--no-fastupdate", - help="Set PostgreSQL GIN fastupdate on created indexes.", + help="Enable PostgreSQL GIN fastupdate on newly created indexes (trades write speed for query latency).", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Add tsvector sidecar columns to vocabulary tables and optionally create GIN indexes for fast full-text search.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="fulltext install", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=True, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Managing PostgreSQL full-text sidecar columns..."): results = install_fulltext_columns( engine, @@ -67,27 +245,39 @@ def install_fulltext_command( @app.command("populate") def populate_fulltext_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), regconfig: str = typer.Option( "english", - help="PostgreSQL text search configuration to use for vector population.", + help="PostgreSQL text search configuration to use when building tsvector values (e.g. 'english', 'simple').", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Fill tsvector sidecar columns with pre-computed search vectors using the specified PostgreSQL text search configuration.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="fulltext populate", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=True, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Managing PostgreSQL full-text sidecar columns..."): results = populate_fulltext_columns( engine, @@ -103,28 +293,40 @@ def populate_fulltext_command( @app.command("drop") def drop_fulltext_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), drop_indexes: bool = typer.Option( True, "--drop-indexes/--no-drop-indexes", help="Drop managed GIN indexes before dropping the tsvector columns.", ), - dry_run: bool = typer.Option(False, "--dry-run"), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Remove tsvector sidecar columns and their associated GIN indexes from vocabulary tables.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="fulltext drop", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=True, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Managing PostgreSQL full-text sidecar columns..."): results = drop_fulltext_columns( engine, diff --git a/omop_alchemy/maintenance/cli_indexes.py b/omop_alchemy/maintenance/cli_indexes.py index 6e8ac81..69a893f 100644 --- a/omop_alchemy/maintenance/cli_indexes.py +++ b/omop_alchemy/maintenance/cli_indexes.py @@ -8,18 +8,16 @@ from omop_alchemy.cdm.base.indexing import OMOP_CLUSTER_INDEX_INFO_KEY -from ..backend_support import Dialect, backend_label, supports_backend -from ._cli_utils import build_engine, handle_error, resolve_connection +from ..backends import resolve_backend, backend_supports +from ._cli_utils import handle_error, setup_cli_cmd from .tables import ( MaintenanceTable, TableCategory, - qualified_table_name, schema_adjusted_metadata, select_omop_tables, ) from .ui import ( console, - render_command_header, render_index_note, render_index_results, render_index_summary, @@ -27,12 +25,16 @@ class IndexAction(StrEnum): + """Whether to create or drop ORM-defined secondary indexes.""" + DISABLE = "disable" ENABLE = "enable" @dataclass(frozen=True) class IndexTarget: + """An ORM-defined index that currently exists in the target database.""" + table_name: str category: TableCategory model_name: str @@ -45,6 +47,8 @@ class IndexTarget: @dataclass(frozen=True) class IndexManagementResult: + """Outcome of creating or dropping one ORM-defined index, or clustering a table.""" + operation: str table_name: str category: TableCategory @@ -63,6 +67,7 @@ def _schema_metadata_indexes( tables: list[MaintenanceTable], db_schema: str | None, ) -> dict[tuple[str, str], sa.Index]: + """Return a (table_name, index_name) → Index mapping from ORM metadata, adjusted for db_schema if provided.""" indexes: dict[tuple[str, str], sa.Index] = {} if db_schema is None: @@ -80,6 +85,7 @@ def _schema_metadata_indexes( def _cluster_target_name(table: MaintenanceTable) -> str | None: + """Return the name of the ORM-designated cluster index for a table, or None if no cluster target is defined.""" cluster_indexes = [ str(index.name) for index in table.table.indexes @@ -99,6 +105,7 @@ def _cluster_column_names( table: MaintenanceTable, cluster_index_name: str, ) -> tuple[str, ...]: + """Return the column names of the named cluster index; falls back to the primary key if the index is not found.""" for index in table.table.indexes: if str(index.name) == cluster_index_name: return tuple(column.name for column in index.columns) @@ -111,6 +118,7 @@ def collect_index_targets( db_schema: str | None = None, vocabulary_included: bool = False, ) -> list[IndexTarget]: + """List ORM-defined indexes that currently exist in the target database.""" inspector = sa.inspect(engine) selected_tables = select_omop_tables(vocabulary_included=vocabulary_included) @@ -152,10 +160,12 @@ def manage_indexes( vocabulary_included: bool = False, dry_run: bool = False, ) -> list[IndexManagementResult]: + """Create or drop all ORM-defined indexes; also CLUSTER tables on PostgreSQL when enabling.""" + backend = resolve_backend(engine) inspector = sa.inspect(engine) selected_tables = select_omop_tables(vocabulary_included=vocabulary_included) metadata_indexes = _schema_metadata_indexes(selected_tables, db_schema) - clustering_supported = supports_backend(engine, supported_dialects=(Dialect.POSTGRESQL,)) + clustering_supported = backend_supports(backend, "cluster_table") results: list[IndexManagementResult] = [] @@ -235,17 +245,14 @@ def manage_indexes( status="skipped", detail=( "cluster metadata present but unsupported on " - f"{backend_label(engine.dialect.name)}" + f"{backend.name}" ), ) ) continue if not dry_run: - connection.exec_driver_sql( - f"CLUSTER {qualified_table_name(table.table_name, db_schema)} " - f"USING {cluster_index_name}" - ) + backend.cluster_table(connection, table.table_name, cluster_index_name, db_schema) results.append( IndexManagementResult( @@ -279,24 +286,40 @@ def manage_indexes( @app.command("disable") def disable_indexes_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Drop all ORM-defined secondary indexes from the target database; useful before bulk data loads.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="indexes disable", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Managing metadata-defined indexes..."): results = manage_indexes( engine, @@ -314,24 +337,40 @@ def disable_indexes_command( @app.command("enable") def enable_indexes_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Recreate all ORM-defined secondary indexes; also CLUSTERs tables on PostgreSQL where metadata specifies it.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="indexes enable", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Managing metadata-defined indexes..."): results = manage_indexes( engine, diff --git a/omop_alchemy/maintenance/cli_schema.py b/omop_alchemy/maintenance/cli_schema.py index 63da74b..ededf2a 100644 --- a/omop_alchemy/maintenance/cli_schema.py +++ b/omop_alchemy/maintenance/cli_schema.py @@ -8,12 +8,15 @@ import sqlalchemy as sa from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.engine.interfaces import ReflectedIndex import typer -from omop_alchemy import create_engine_with_dependencies, get_engine_name, load_environment +from omop_alchemy import create_engine_with_dependencies, load_environment +from omop_alchemy.backends.resolve import SupportedDialect +from omop_alchemy.db import get_engine_name -from ..backend_support import Dialect, backend_label -from ._cli_utils import build_engine, handle_error, resolve_connection +from ..backends import resolve_backend +from ._cli_utils import handle_error, setup_cli_cmd from .cli_config import defaults_path from .cli_foreign_keys import ( ForeignKeyStatusResult, @@ -25,7 +28,6 @@ from .tables import ( MaintenanceTable, TableCategory, - TableScope, collect_maintenance_tables, missing_maintenance_tables, qualified_table_name, @@ -35,7 +37,6 @@ ) from .ui import ( console, - render_command_header, render_data_summary_results, render_data_summary_summary, render_doctor_checks, @@ -55,12 +56,22 @@ ) +def _backend_label(dialect_name: str) -> str: + from ..backends.resolve import _DIALECT_TO_BACKEND_MAP, SupportedDialect + try: + return _DIALECT_TO_BACKEND_MAP[SupportedDialect(dialect_name)].name + except (ValueError, KeyError): + return dialect_name + + # --------------------------------------------------------------------------- # info # --------------------------------------------------------------------------- @dataclass(frozen=True) class DependencyStatus: + """Installation status of a Python package or external tool dependency.""" + name: str installed: bool version: str | None @@ -68,6 +79,8 @@ class DependencyStatus: @dataclass(frozen=True) class CommandSupport: + """Readiness assessment for one CLI command given the current backend and connection state.""" + command_name: str requirement: str status: str @@ -76,6 +89,8 @@ class CommandSupport: @dataclass(frozen=True) class MaintenanceInfo: + """Full environment snapshot: package version, connection state, and per-command readiness.""" + package_version: str cli_path: str | None pg_dump_path: str | None @@ -102,10 +117,12 @@ class MaintenanceInfo: def _package_version() -> str: + """Return the installed omop-alchemy package version string.""" return importlib.metadata.version("omop-alchemy") def _dependency_status(distribution_name: str, module_name: str) -> DependencyStatus: + """Check whether a Python package is importable and return its installed version if found.""" installed = importlib.util.find_spec(module_name) is not None version: str | None = None if installed: @@ -117,6 +134,7 @@ def _dependency_status(distribution_name: str, module_name: str) -> DependencySt def _external_dependency_status(name: str, executable_name: str) -> DependencyStatus: + """Check whether an external CLI tool is on PATH and return a DependencyStatus (version always None).""" return DependencyStatus( name=name, installed=shutil.which(executable_name) is not None, @@ -125,6 +143,7 @@ def _external_dependency_status(name: str, executable_name: str) -> DependencySt def _command_support_for_unavailable_engine(detail: str) -> tuple[CommandSupport, ...]: + """Return a full CommandSupport tuple with every command marked blocked, used when the engine cannot be created.""" blocked = "blocked" return ( CommandSupport("doctor", "Any SQLAlchemy backend", blocked, detail), @@ -161,7 +180,8 @@ def _command_support_for_backend( pg_restore_path: str | None, psql_path: str | None, ) -> tuple[CommandSupport, ...]: - current_backend = backend_label(backend) + """Compute the readiness status of every CLI command given the current backend, connection state, and tool availability.""" + current_backend = _backend_label(backend) if not engine_created: blocked_detail = ( f"Backend resolved to {current_backend}, but the engine could not be created: {engine_error}" @@ -179,7 +199,7 @@ def _command_support_for_backend( f"Ready on {current_backend}." if connection_ready else blocked_detail ) - if backend == Dialect.POSTGRESQL: + if backend == SupportedDialect.POSTGRESQL: analyze_status = portable_status analyze_detail = ( "Ready on PostgreSQL; ANALYZE and VACUUM ANALYZE are both supported." @@ -265,18 +285,18 @@ def _command_support_for_backend( "PostgreSQL + pg_dump", ( "ready" - if connection_ready and backend == Dialect.POSTGRESQL and pg_dump_path is not None + if connection_ready and backend == SupportedDialect.POSTGRESQL and pg_dump_path is not None else "blocked" - if backend == Dialect.POSTGRESQL + if backend == SupportedDialect.POSTGRESQL else "unsupported" if connection_ready else "blocked" ), ( "Ready on PostgreSQL; `pg_dump` is available." - if connection_ready and backend == Dialect.POSTGRESQL and pg_dump_path is not None + if connection_ready and backend == SupportedDialect.POSTGRESQL and pg_dump_path is not None else "PostgreSQL is configured, but `pg_dump` is not on PATH." - if connection_ready and backend == Dialect.POSTGRESQL + if connection_ready and backend == SupportedDialect.POSTGRESQL else f"Requires PostgreSQL. Current backend: {current_backend}." if connection_ready else blocked_detail @@ -287,18 +307,18 @@ def _command_support_for_backend( "PostgreSQL + pg_restore/psql", ( "ready" - if connection_ready and backend == Dialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) + if connection_ready and backend == SupportedDialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) else "blocked" - if backend == Dialect.POSTGRESQL + if backend == SupportedDialect.POSTGRESQL else "unsupported" if connection_ready else "blocked" ), ( "Ready on PostgreSQL; restore client tooling is available." - if connection_ready and backend == Dialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) + if connection_ready and backend == SupportedDialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) else "PostgreSQL is configured, but neither `pg_restore` nor `psql` is on PATH." - if connection_ready and backend == Dialect.POSTGRESQL + if connection_ready and backend == SupportedDialect.POSTGRESQL else f"Requires PostgreSQL. Current backend: {current_backend}." if connection_ready else blocked_detail @@ -324,6 +344,7 @@ def collect_maintenance_info( dotenv: str | None = None, vocabulary_included: bool = True, ) -> MaintenanceInfo: + """Probe the current environment: resolve config, attempt a connection, and assess per-command readiness.""" load_environment(dotenv or "") pg_dump_path = shutil.which("pg_dump") pg_restore_path = shutil.which("pg_restore") @@ -441,6 +462,8 @@ def collect_maintenance_info( @dataclass(frozen=True) class DoctorCheck: + """Result of a single named maintenance health check (e.g. 'managed tables', 'schema drift').""" + name: str status: str detail: str @@ -448,6 +471,8 @@ class DoctorCheck: @dataclass(frozen=True) class DoctorRecommendation: + """Actionable recommendation derived from health check results, with an optional CLI command hint.""" + status: str summary: str action: str | None @@ -455,6 +480,8 @@ class DoctorRecommendation: @dataclass(frozen=True) class DoctorReport: + """Complete doctor report: health checks, prioritised recommendations, and optional deep-inspection data.""" + info: MaintenanceInfo checks: tuple[DoctorCheck, ...] recommendations: tuple[DoctorRecommendation, ...] @@ -470,6 +497,7 @@ def _build_recommendations( foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None, foreign_key_validation: ForeignKeyValidationReport | None, ) -> tuple[DoctorRecommendation, ...]: + """Derive a prioritised list of actionable recommendations from the doctor check results.""" recommendations: list[DoctorRecommendation] = [] if not info.connection_ready: @@ -523,7 +551,7 @@ def _build_recommendations( ) ) - if info.backend == Dialect.POSTGRESQL and info.pg_dump_path is None: + if info.backend == SupportedDialect.POSTGRESQL and info.pg_dump_path is None: recommendations.append( DoctorRecommendation( status="warning", @@ -533,7 +561,7 @@ def _build_recommendations( ) if ( - info.backend == Dialect.POSTGRESQL + info.backend == SupportedDialect.POSTGRESQL and info.pg_restore_path is None and info.psql_path is None ): @@ -565,6 +593,7 @@ def collect_doctor_report( vocabulary_included: bool = True, deep: bool = False, ) -> DoctorReport: + """Run all maintenance health checks and return a prioritised report with recommendations.""" load_environment(dotenv or "") info = collect_maintenance_info( engine_schema=engine_schema, @@ -631,7 +660,7 @@ def collect_doctor_report( ) ) - if info.backend == Dialect.POSTGRESQL: + if info.backend == SupportedDialect.POSTGRESQL: foreign_key_status = tuple( collect_foreign_key_trigger_status( engine, @@ -725,7 +754,7 @@ def collect_doctor_report( ) ) - if info.backend == Dialect.POSTGRESQL: + if info.backend == SupportedDialect.POSTGRESQL: backup_tools_ready = info.pg_dump_path is not None and ( info.pg_restore_path is not None or info.psql_path is not None ) @@ -770,6 +799,8 @@ def collect_doctor_report( @dataclass(frozen=True) class ReconciliationIssue: + """A single schema drift detail: column, index, FK, or cluster mismatch between ORM metadata and the database.""" + table_name: str category: TableCategory component: str @@ -782,6 +813,8 @@ class ReconciliationIssue: @dataclass(frozen=True) class TableReconciliationResult: + """Per-table schema reconciliation summary: whether ORM metadata matches the live database.""" + table_name: str category: TableCategory model_name: str @@ -793,12 +826,15 @@ class TableReconciliationResult: @dataclass(frozen=True) class SchemaReconciliationReport: + """Complete reconciliation report across all selected ORM-managed tables.""" + backend: str table_results: tuple[TableReconciliationResult, ...] issues: tuple[ReconciliationIssue, ...] def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: + """Return table unchanged when db_schema is None, or a schema-qualified copy when a schema is specified.""" if db_schema is None: return table @@ -813,12 +849,14 @@ def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: def _normalized_type(type_: sa.types.TypeEngine[object], dialect: sa.engine.Dialect) -> str: + """Compile a SQLAlchemy type to its dialect-specific string and normalise whitespace/case for comparison.""" return type_.compile(dialect=dialect).lower().replace(" ", "") def _expected_foreign_keys( table: sa.Table, ) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint]: + """Index ORM-defined FK constraints by (constrained_cols, referred_table, referred_cols) for diffing.""" expected: dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint] = {} for constraint in table.foreign_key_constraints: constrained_columns = tuple(element.parent.name for element in constraint.elements) @@ -833,6 +871,7 @@ def _actual_foreign_keys( table_name: str, db_schema: str | None, ) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]]: + """Index live FK constraints from the database inspector by the same key tuple used by _expected_foreign_keys.""" actual: dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]] = {} for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): constrained_columns = tuple(foreign_key.get("constrained_columns") or []) @@ -843,6 +882,7 @@ def _actual_foreign_keys( def _expected_indexes(table: sa.Table) -> dict[str, sa.Index]: + """Return ORM-defined named indexes for a table, keyed by index name.""" return { str(index.name): index for index in table.indexes @@ -854,7 +894,8 @@ def _actual_indexes( inspector: sa.Inspector, table_name: str, db_schema: str | None, -) -> dict[str, dict[str, object]]: +) -> dict[str, ReflectedIndex]: + """Return live named indexes from the database inspector, keyed by index name.""" return { str(index["name"]): index for index in inspector.get_indexes(table_name, schema=db_schema) @@ -862,29 +903,6 @@ def _actual_indexes( } -def _actual_cluster_index_name( - connection: sa.Connection, - *, - table_name: str, - db_schema: str | None, -) -> str | None: - result = connection.execute( - sa.text( - """ - SELECT i.relname - FROM pg_index ix - JOIN pg_class t ON t.oid = ix.indrelid - JOIN pg_class i ON i.oid = ix.indexrelid - JOIN pg_namespace n ON n.oid = t.relnamespace - WHERE ix.indisclustered - AND t.relname = :table_name - AND (:db_schema IS NULL OR n.nspname = :db_schema) - """ - ), - {"table_name": table_name, "db_schema": db_schema}, - ).scalar_one_or_none() - return str(result) if result is not None else None - def reconcile_schema( engine: sa.Engine, @@ -892,9 +910,11 @@ def reconcile_schema( db_schema: str | None = None, vocabulary_included: bool = False, ) -> SchemaReconciliationReport: + """Compare ORM metadata against the live database schema; reports missing columns, indexes, FKs, and cluster state.""" excluded_categories: tuple[TableCategory, ...] = ( () if vocabulary_included else (TableCategory.VOCABULARY,) ) + _backend = resolve_backend(engine) selected_tables = select_maintenance_tables(exclude_categories=excluded_categories) inspector = sa.inspect(engine) all_issues: list[ReconciliationIssue] = [] @@ -1120,12 +1140,12 @@ def reconcile_schema( ) ) - if engine.dialect.name == Dialect.POSTGRESQL: + if engine.dialect.name == SupportedDialect.POSTGRESQL: expected_cluster = _cluster_target_name(maintenance_table) - actual_cluster = _actual_cluster_index_name( + actual_cluster = _backend.get_clustered_index_name( connection, - table_name=maintenance_table.table_name, - db_schema=db_schema, + maintenance_table.table_name, + db_schema, ) if expected_cluster != actual_cluster: table_issues.append( @@ -1178,6 +1198,8 @@ def reconcile_schema( @dataclass(frozen=True) class TableCreationResult: + """Outcome of attempting to create one missing ORM-managed table from SQLAlchemy metadata.""" + table_name: str category: TableCategory model_name: str @@ -1187,6 +1209,7 @@ class TableCreationResult: def _table_dependencies(table: MaintenanceTable) -> tuple[str, ...]: + """Return the sorted names of tables that this table's ORM FK constraints refer to.""" return tuple( sorted( { @@ -1203,6 +1226,7 @@ def collect_missing_tables( db_schema: str | None = None, vocabulary_included: bool = True, ) -> list[MaintenanceTable]: + """Return ORM-managed tables that are absent from the target database.""" inspector = sa.inspect(engine) return missing_maintenance_tables( inspector, @@ -1218,6 +1242,7 @@ def create_missing_tables( vocabulary_included: bool = True, dry_run: bool = False, ) -> list[TableCreationResult]: + """Create any ORM-managed tables missing from the target database; skips tables with unresolved FK dependencies.""" inspector = sa.inspect(engine) missing_tables = collect_missing_tables( engine, @@ -1291,6 +1316,8 @@ def create_missing_tables( @dataclass(frozen=True) class TableSummaryResult: + """Row count and existence data for one ORM-managed OMOP table.""" + table_name: str category: TableCategory model_name: str @@ -1307,6 +1334,7 @@ def collect_data_summary( vocabulary_included: bool = False, existing_only: bool = True, ) -> list[TableSummaryResult]: + """Return row counts and existence state for each ORM-managed table in the target database.""" inspector = sa.inspect(engine) tables = select_omop_tables(vocabulary_included=vocabulary_included) @@ -1349,27 +1377,38 @@ def collect_data_summary( app = typer.Typer(rich_markup_mode="rich") -@app.command( - "info", - help="Inspect maintenance CLI readiness, backend compatibility, and current installation state.", -) +@app.command("info") def info_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the managed-table count.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( - command_name="info", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, + """Inspect maintenance CLI readiness, backend compatibility, and current installation state.""" + try: + conn, _ = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, + command_name="info", vocabulary_included=vocabulary_included, mode_label="inspect", - ) - ) - try: + ) + load_environment(conn.dotenv or "") with console.status("Inspecting maintenance environment..."): info = collect_maintenance_info( @@ -1387,32 +1426,42 @@ def info_command( handle_error(exc) -@app.command( - "doctor", - help="Run a read-only maintenance health check across connection readiness, schema drift, and FK state.", -) +@app.command("doctor") def doctor_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), deep: bool = typer.Option( False, "--deep", - help="Include heavier checks such as PostgreSQL foreign key validation.", + help="Include heavier checks: FK validation scans every constraint for referential integrity violations.", ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Run a read-only maintenance health check across connection readiness, schema drift, and FK state.""" + try: + conn, _ = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="doctor", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="inspect", ) - ) - try: load_environment(conn.dotenv or "") with console.status("Running maintenance doctor checks..."): report = collect_doctor_report( @@ -1433,28 +1482,37 @@ def doctor_command( handle_error(exc) -@app.command( - "reconcile-schema", - help="Compare ORM-managed SQLAlchemy metadata against the current target database schema.", -) +@app.command("reconcile-schema") def reconcile_schema_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the reconciliation.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Compare ORM-managed SQLAlchemy metadata against the current target database schema.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="reconcile-schema", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="inspect", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Reconciling ORM metadata against target database schema..."): report = reconcile_schema(engine, db_schema=conn.db_schema, vocabulary_included=vocabulary_included) console.print(render_reconciliation_results(report.table_results)) @@ -1464,29 +1522,42 @@ def reconcile_schema_command( handle_error(exc) -@app.command( - "create-missing-tables", - help="Create missing ORM-managed OMOP tables from metadata.", -) +@app.command("create-missing-tables") def create_missing_tables_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(True, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + True, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection. Enabled by default.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Create missing ORM-managed OMOP tables from metadata.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="create-missing-tables", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Creating missing tables..."): results = create_missing_tables( engine, @@ -1500,29 +1571,42 @@ def create_missing_tables_command( handle_error(exc) -@app.command( - "data-summary", - help="Summarise ORM-managed OMOP tables present in the target database.", -) +@app.command("data-summary") def data_summary_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - include_missing: bool = typer.Option(False, "--include-missing"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the summary.", + ), + include_missing: bool = typer.Option( + False, + "--include-missing", + help="Also list ORM-managed tables that are absent from the target database.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Summarise ORM-managed OMOP tables present in the target database.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="data-summary", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="inspect", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Collecting table summary..."): results = collect_data_summary( engine, diff --git a/omop_alchemy/maintenance/cli_tables.py b/omop_alchemy/maintenance/cli_tables.py index 8e3b689..0bd7f6c 100644 --- a/omop_alchemy/maintenance/cli_tables.py +++ b/omop_alchemy/maintenance/cli_tables.py @@ -5,8 +5,8 @@ import sqlalchemy as sa import typer -from ..backend_support import Dialect, POSTGRESQL_ONLY_HELP, require_backend -from ._cli_utils import build_engine, handle_error, resolve_connection, resolve_selection +from ..backends import resolve_backend, require_backend_support, backend_support_note +from ._cli_utils import handle_error, resolve_selection, setup_cli_cmd from .tables import ( TableCategory, TableScope, @@ -19,7 +19,6 @@ render_analyze_note, render_analyze_results, render_analyze_summary, - render_command_header, render_error, render_sequence_reset_results, render_sequence_reset_summary, @@ -35,6 +34,8 @@ @dataclass(frozen=True) class AnalyzeTableResult: + """Outcome of an ANALYZE or VACUUM ANALYZE operation for one ORM-managed table.""" + table_name: str category: TableCategory model_name: str @@ -53,21 +54,11 @@ def analyze_tables( vacuum: bool = False, dry_run: bool = False, ) -> list[AnalyzeTableResult]: + """Run ANALYZE (or VACUUM ANALYZE) on selected ORM-managed tables to refresh planner statistics.""" if scope is not None and table_names is not None: raise RuntimeError("Use either `scope` or `table_names`, not both.") - require_backend( - engine, - feature="Table analysis", - supported_dialects=(Dialect.POSTGRESQL, Dialect.SQLITE), - ) - - if vacuum and engine.dialect.name != Dialect.POSTGRESQL: - raise RuntimeError( - "VACUUM ANALYZE is only supported for PostgreSQL engines. " - f"Current dialect: '{engine.dialect.name}'." - ) - + backend = resolve_backend(engine) selected_tables = resolve_maintenance_tables(scope=scope, table_names=table_names) inspector = sa.inspect(engine) operation = "VACUUM ANALYZE" if vacuum else "ANALYZE" @@ -95,9 +86,8 @@ def analyze_tables( ) continue - qualified_name = qualified_table_name(maintenance_table.table_name, db_schema) if not dry_run: - connection.exec_driver_sql(f"{operation} {qualified_name}") + backend.analyze_table(connection, maintenance_table.table_name, db_schema, vacuum=vacuum) results.append( AnalyzeTableResult( @@ -124,6 +114,8 @@ def analyze_tables( @dataclass(frozen=True) class TruncateTableResult: + """Outcome of truncating one ORM-managed table, with the pre-truncation row count.""" + table_name: str category: TableCategory model_name: str @@ -139,6 +131,7 @@ def _blocking_foreign_key_references( db_schema: str | None, selected_table_names: set[str], ) -> dict[str, set[str]]: + """Return tables outside the selection that FK-reference at least one selected table, preventing truncation.""" blockers: dict[str, set[str]] = {} for table_name in inspector.get_table_names(schema=db_schema): @@ -155,6 +148,7 @@ def _blocking_foreign_key_references( def _format_blocking_reference_error(blockers: dict[str, set[str]]) -> str: + """Format a human-readable error message listing which external tables are blocking truncation.""" blocker_parts = [ f"{table_name} <- {', '.join(sorted(referencing_tables))}" for table_name, referencing_tables in sorted(blockers.items()) @@ -180,13 +174,14 @@ def truncate_tables( cascade: bool = False, dry_run: bool = False, ) -> list[TruncateTableResult]: + """Truncate selected ORM-managed tables; raises if non-selected tables hold blocking FK references.""" if scope is not None and table_names is not None: raise RuntimeError("Use either `scope` or `table_names`, not both.") if scope is None and table_names is None: raise RuntimeError("Select tables to truncate with `scope` or `table_names`.") - require_backend(engine, feature="Table truncation", supported_dialects=(Dialect.POSTGRESQL,)) - + backend = resolve_backend(engine) + require_backend_support(backend, "truncate_table_batch", "Table truncation") selected_tables = resolve_maintenance_tables(scope=scope, table_names=table_names) inspector = sa.inspect(engine) results: list[TruncateTableResult] = [] @@ -236,18 +231,13 @@ def truncate_tables( raise RuntimeError(_format_blocking_reference_error(blockers)) if existing_tables and not dry_run: - truncate_sql = ( - "TRUNCATE TABLE " - + ", ".join( - qualified_table_name(table_name, db_schema) - for table_name in existing_tables - ) + backend.truncate_table_batch( + connection, + existing_tables, + db_schema, + restart_identities=restart_identities, + cascade=cascade, ) - if restart_identities: - truncate_sql += " RESTART IDENTITY" - if cascade: - truncate_sql += " CASCADE" - connection.exec_driver_sql(truncate_sql) return results @@ -258,6 +248,8 @@ def truncate_tables( @dataclass(frozen=True) class SequenceTarget: + """An ORM-managed table with a single-column integer primary key that owns a PostgreSQL sequence.""" + table_name: str category: TableCategory model_name: str @@ -267,6 +259,8 @@ class SequenceTarget: @dataclass(frozen=True) class SequenceResetResult: + """Outcome of resetting one PostgreSQL sequence to table max + 1.""" + table_name: str category: TableCategory model_name: str @@ -282,6 +276,7 @@ def collect_sequence_targets( *, vocabulary_included: bool = False, ) -> list[SequenceTarget]: + """Return ORM-managed tables that have a single integer primary key and therefore own a sequence.""" targets: list[SequenceTarget] = [] for table in select_omop_tables( vocabulary_included=vocabulary_included, @@ -309,8 +304,9 @@ def reset_model_sequences( vocabulary_included: bool = False, dry_run: bool = False, ) -> list[SequenceResetResult]: - require_backend(engine, feature="Sequence reset", supported_dialects=(Dialect.POSTGRESQL,)) - + """Reset each owned sequence to MAX(pk_column) + 1 to prevent insert conflicts after bulk loads.""" + backend = resolve_backend(engine) + require_backend_support(backend, "find_sequence_name", "Sequence reset") inspector = sa.inspect(engine) targets = collect_sequence_targets(vocabulary_included=vocabulary_included) results: list[SequenceResetResult] = [] @@ -320,14 +316,9 @@ def reset_model_sequences( if not inspector.has_table(target.table_name, schema=db_schema): continue - fully_qualified_table_name = qualified_table_name(target.table_name, db_schema) - sequence_name = connection.execute( - sa.text("SELECT pg_get_serial_sequence(:table_name, :column_name)"), - { - "table_name": fully_qualified_table_name, - "column_name": target.pk_column_name, - }, - ).scalar_one_or_none() + sequence_name = backend.find_sequence_name( + connection, target.table_name, target.pk_column_name, db_schema + ) if sequence_name is None: results.append( @@ -345,19 +336,17 @@ def reset_model_sequences( ) continue + fully_qualified = qualified_table_name(target.table_name, db_schema) current_max = connection.execute( sa.text( f"SELECT COALESCE(MAX({target.pk_column_name}), 0) " - f"FROM {fully_qualified_table_name}" + f"FROM {fully_qualified}" ) ).scalar_one() next_value = int(current_max) + 1 if not dry_run: - connection.execute( - sa.text("SELECT setval(:sequence_name, :next_value, false)"), - {"sequence_name": sequence_name, "next_value": next_value}, - ) + backend.set_sequence_value(connection, sequence_name, next_value) results.append( SequenceResetResult( @@ -384,50 +373,58 @@ def reset_model_sequences( # CLI commands # --------------------------------------------------------------------------- -app = typer.Typer(rich_markup_mode="rich") - +app = typer.Typer(rich_markup_mode="rich", help="Manage Database Tables: analyze, truncate, and reset sequences",) -@app.command( - "analyze-tables", - help="Refresh planner statistics for selected ORM-managed tables.", -) +@app.command("analyze-tables") def analyze_tables_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), scope: TableScope | None = typer.Option( None, "--scope", - help="Category scope to analyze. Defaults to all ORM-managed tables when omitted.", + help="CDM category scope to analyze (e.g. 'clinical', 'vocabulary'). Defaults to all ORM-managed tables when omitted.", case_sensitive=False, ), table: list[str] | None = typer.Option( None, "--table", - help="Specific ORM-managed table name to analyze. Repeat for multiple tables.", + help="Specific ORM-managed table name to analyze. Repeat to target multiple tables.", ), vacuum: bool = typer.Option( False, "--vacuum", - help="Use VACUUM ANALYZE instead of ANALYZE. PostgreSQL only.", + help="Use VACUUM ANALYZE instead of plain ANALYZE to also reclaim dead tuples. Not available on all backends.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: + """Analyse selected ORM-managed tables to update planner statistics.""" resolved_scope, resolved_tables = resolve_selection( scope=scope, tables=table, default_scope=TableScope.ALL ) - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="analyze-tables", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=None, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Refreshing planner statistics for selected tables..."): results = analyze_tables( engine, @@ -446,27 +443,43 @@ def analyze_tables_command( @app.command( "reset-sequences", - help=f"Reset owned sequences from table max + 1. {POSTGRESQL_ONLY_HELP}", + help=f"Reset each owned sequence to MAX(pk) + 1 to prevent insert conflicts after bulk loads. {backend_support_note('find_sequence_name')}", ) def reset_sequences_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), - vocabulary_included: bool = typer.Option(False, "--vocab/--no-vocab"), - dry_run: bool = typer.Option(False, "--dry-run"), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + vocabulary_included: bool = typer.Option( + False, + "--vocab/--no-vocab", + help="Include OMOP vocabulary tables in the selection.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), ) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + """Reset each owned sequence to MAX(pk) + 1 to prevent insert conflicts after bulk loads.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="reset-sequences", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Resetting PostgreSQL sequences..."): results = reset_model_sequences( engine, @@ -482,40 +495,54 @@ def reset_sequences_command( @app.command( "truncate-tables", - help=f"Truncate selected ORM-managed tables. {POSTGRESQL_ONLY_HELP}", + help=f"Truncate selected ORM-managed OMOP tables; aborts if external FK references would block unless --cascade is set. {backend_support_note('truncate_table_batch')}", ) def truncate_tables_command( - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), - db_schema: str | None = typer.Option(None, help="Database schema override."), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + db_schema: str | None = typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), scope: TableScope | None = typer.Option( None, "--scope", - help="Category scope to truncate.", + help="CDM category scope to truncate (e.g. 'clinical', 'vocabulary'). Must specify scope or --table.", case_sensitive=False, ), table: list[str] | None = typer.Option( None, "--table", - help="Specific ORM-managed table name to truncate. Repeat for multiple tables.", + help="Specific ORM-managed table name to truncate. Repeat to target multiple tables.", ), restart_identities: bool = typer.Option( False, "--restart-identities", - help="Restart owned identities during truncation.", + help="Reset owned sequences to 1 after truncation (TRUNCATE ... RESTART IDENTITY).", ), cascade: bool = typer.Option( False, "--cascade", - help="Include dependent tables via PostgreSQL CASCADE.", + help="Automatically truncate dependent tables via PostgreSQL CASCADE. Use with care.", ), yes: bool = typer.Option( False, "--yes", - help="Confirm that you want to apply this destructive operation.", + help="Confirm the destructive operation. Required when not using --dry-run.", + ), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", ), - dry_run: bool = typer.Option(False, "--dry-run"), ) -> None: + """Truncate selected ORM-managed OMOP tables; aborts if external FK references would block unless --cascade is set.""" resolved_scope, resolved_tables = resolve_selection(scope=scope, tables=table) if resolved_scope is None and resolved_tables is None: console.print( @@ -528,18 +555,16 @@ def truncate_tables_command( ) raise typer.Exit(code=1) - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print( - render_command_header( + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="truncate-tables", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=None, mode_label="dry-run" if dry_run else "apply", ) - ) - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) with console.status("Truncating selected tables..."): results = truncate_tables( engine, diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 6d7deae..0c7a517 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -11,6 +11,7 @@ from orm_loader.tables.typing import CSVTableProtocol from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeElapsedColumn +from ..backends.resolve import SupportedDialect from omop_alchemy.cdm.model.vocabulary import ( Concept, Concept_Ancestor, @@ -24,15 +25,14 @@ Vocabulary, ) -from ..backend_support import Dialect, require_backend -from ._cli_utils import build_engine, handle_error, resolve_connection -from .cli_foreign_keys import ForeignKeyAction, manage_foreign_key_triggers +from ..backends import resolve_backend +from ._cli_utils import handle_error, setup_cli_cmd +from .cli_foreign_keys import manage_foreign_key_triggers from .cli_indexes import IndexAction, manage_indexes from .cli_tables import reset_model_sequences from .tables import TableCategory, schema_adjusted_metadata, select_maintenance_tables from .ui import ( console, - render_command_header, render_error, render_vocab_load_results, render_vocab_load_summary, @@ -49,6 +49,8 @@ @dataclass(frozen=True) class VocabularyLoadResult: + """Outcome of loading one Athena vocabulary CSV file via the staged ORM CSV loader.""" + table_name: str status: str row_count: int | None @@ -59,6 +61,8 @@ class VocabularyLoadResult: @dataclass(frozen=True) class VocabularyLoadReport: + """Complete vocabulary load report: per-table outcomes and load session metadata.""" + source_path: str backend: str db_schema: str | None @@ -70,6 +74,8 @@ class VocabularyLoadReport: @dataclass(frozen=True) class VocabularyLoadProgress: + """Progress event emitted after each table load phase; drives the CLI progress bar.""" + phase: str table_name: str | None table_index: int @@ -126,6 +132,7 @@ def _emit_progress( total_units: float, detail: str, ) -> None: + """Fire the caller-supplied progress callback with a normalised VocabularyLoadProgress snapshot; no-ops if None.""" if progress_callback is None: return @@ -150,6 +157,7 @@ def _is_missing_staging_table_error( *, model: VocabularyModel, ) -> bool: + """Return True if the exception is a ProgrammingError caused by the staging table not existing yet.""" staging_table_name = model.staging_tablename() message = str(exc).lower() return ( @@ -169,6 +177,7 @@ def _load_vocab_model_csv( chunksize: int | None = None, index_strategy: str = "auto", ) -> int: + """Call model.load_csv; if the staging table is absent, create it and retry once.""" load_kwargs: dict[str, object] = { "merge_strategy": merge_strategy, "quote_mode": quote_mode, @@ -200,15 +209,9 @@ def _load_vocab_model_csv( ) -def _ensure_supported_backend(engine: sa.Engine) -> None: - require_backend( - engine, - feature="Vocabulary source loading", - supported_dialects=(Dialect.SQLITE, Dialect.POSTGRESQL), - ) - def _find_vocab_csv_path(source_path: Path, table_name: str) -> Path | None: + """Locate the CSV file for table_name under source_path, trying exact name, lower, upper, and case-insensitive glob.""" direct_candidates = ( source_path / f"{table_name}.csv", source_path / f"{table_name.lower()}.csv", @@ -226,6 +229,7 @@ def _find_vocab_csv_path(source_path: Path, table_name: str) -> Path | None: def _missing_required_files(source_path: Path) -> list[str]: + """Return the table names of required vocabulary CSVs that cannot be found under source_path.""" missing: list[str] = [] for model in REQUIRED_VOCAB_MODELS: if _find_vocab_csv_path(source_path, model.__tablename__) is None: @@ -238,6 +242,7 @@ def _create_missing_vocabulary_tables( *, db_schema: str | None, ) -> int: + """Create any vocabulary-category ORM tables that are absent from the target database; returns the count created.""" vocab_tables = select_maintenance_tables( categories=(TableCategory.VOCABULARY,), ) @@ -270,17 +275,18 @@ def _configure_loader_connection( *, db_schema: str | None, ) -> None: + """Set search_path on PostgreSQL connections when a db_schema is requested; raises on SQLite with a schema.""" if db_schema is None: return - if connection.dialect.name != Dialect.POSTGRESQL: + if connection.dialect.name != SupportedDialect.POSTGRESQL: raise RuntimeError( "Vocabulary source loading with `--db-schema` is only supported on PostgreSQL. " "SQLite uses the default database namespace." ) - quoted_schema = '"' + db_schema.replace('"', '""') + '"' - connection.exec_driver_sql(f"SET search_path TO {quoted_schema}") + backend = resolve_backend(connection.engine) + backend.configure_schema_context(connection, db_schema) def load_vocab_source( @@ -294,8 +300,7 @@ def load_vocab_source( bulk_mode: bool = True, progress_callback: VocabularyLoadProgressCallback | None = None, ) -> VocabularyLoadReport: - _ensure_supported_backend(engine) - + """Load all Athena vocabulary CSVs from source_path; with bulk_mode, indexes and FK triggers are toggled around the load.""" resolved_source_path = Path(source_path).expanduser().resolve() if not resolved_source_path.exists() or not resolved_source_path.is_dir(): raise RuntimeError( @@ -364,12 +369,12 @@ def load_vocab_source( _use_bulk_mode = ( bulk_mode and not dry_run - and engine.dialect.name == Dialect.POSTGRESQL + and engine.dialect.name == SupportedDialect.POSTGRESQL ) if _use_bulk_mode: manage_foreign_key_triggers( engine, - action=ForeignKeyAction.DISABLE, + enable=False, vocabulary_included=True, db_schema=db_schema, dry_run=False, @@ -552,7 +557,7 @@ def load_vocab_source( ) manage_foreign_key_triggers( engine, - action=ForeignKeyAction.ENABLE, + enable=True, vocabulary_included=True, db_schema=db_schema, dry_run=False, @@ -560,7 +565,7 @@ def load_vocab_source( results.extend(missing_optional_results) - if not dry_run and engine.dialect.name == Dialect.POSTGRESQL: + if not dry_run and engine.dialect.name == SupportedDialect.POSTGRESQL: sequence_results = reset_model_sequences( engine, db_schema=db_schema, @@ -592,13 +597,20 @@ def load_vocab_source( ) def load_vocab_source_command( athena_source: str | None = typer.Option( - None, help="Path to unzipped Athena vocabulary CSV files." + None, + help="Path to the unzipped Athena vocabulary CSV directory. Falls back to the saved athena-source default.", + ), + dotenv: str | None = typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + engine_schema: str | None = typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", ), - dotenv: str | None = typer.Option(None, help="Optional dotenv file to load."), - engine_schema: str | None = typer.Option(None, help="Engine schema selector."), db_schema: str | None = typer.Option( None, - help="Database schema override. PostgreSQL only; uses search_path for ORM CSV loading.", + help="Database schema to target. Sets search_path on PostgreSQL before loading; not supported on SQLite.", ), merge_strategy: MergeStrategy = typer.Option( "replace", @@ -618,40 +630,37 @@ def load_vocab_source_command( help=( "Disable FK triggers and drop indexes globally before loading, then rebuild after. " "Much faster than per-table management for a full vocabulary reload. " - "PostgreSQL only; ignored on SQLite. " + "Requires FK trigger and index management support; ignored on backends that do not support it. " "If the load fails mid-way, run `indexes enable --vocab` and `foreign-keys enable` to recover." ), ), - dry_run: bool = typer.Option(False, "--dry-run"), + dry_run: bool = typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), ) -> None: - conn = resolve_connection( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - athena_source=athena_source, - ) - console.print( - render_command_header( + """Load all Athena vocabulary CSVs from the configured source path, optionally toggling indexes and FK triggers for speed.""" + try: + conn, engine = setup_cli_cmd( + console=console, + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, command_name="load-vocab-source", - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, vocabulary_included=True, mode_label="dry-run" if dry_run else "apply", + athena_source=athena_source ) - ) - - if conn.athena_source is None: - console.print( - render_error( - "No Athena vocabulary source path is configured. " - "Set it with `omop-alchemy config set-overrides --athena-source ` " - "or pass `--athena-source`." + if conn.athena_source is None: + console.print( + render_error( + "No Athena vocabulary source path is configured. " + "Set it with `omop-alchemy config override --athena-source ` " + "or pass `--athena-source`." + ) ) - ) - raise typer.Exit(code=1) - - try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + raise typer.Exit(code=1) with Progress( SpinnerColumn(), diff --git a/omop_alchemy/maintenance/help.py b/omop_alchemy/maintenance/help.py index 3157bbd..bf108ca 100644 --- a/omop_alchemy/maintenance/help.py +++ b/omop_alchemy/maintenance/help.py @@ -11,7 +11,7 @@ from rich.table import Table from rich.text import Text -from ..backend_support import POSTGRESQL_ONLY_HELP +_BACKEND_NOTE_MARKER = "Supported backends:" def _optional_str(value: object) -> str | None: @@ -21,11 +21,13 @@ def _optional_str(value: object) -> str | None: def _strip_backend_flag(helptext: str) -> tuple[str, str | None]: - suffix = f". {POSTGRESQL_ONLY_HELP}" - if helptext.endswith(suffix): - return helptext[: -len(suffix)], POSTGRESQL_ONLY_HELP - if helptext.endswith(POSTGRESQL_ONLY_HELP): - return helptext[: -len(POSTGRESQL_ONLY_HELP)].rstrip(), POSTGRESQL_ONLY_HELP + """Split off a trailing 'Supported backends: ...' note, if present.""" + marker = f". {_BACKEND_NOTE_MARKER}" + idx = helptext.rfind(marker) + if idx != -1: + return helptext[:idx], helptext[idx + 2:] # skip leading ". " + if helptext.startswith(_BACKEND_NOTE_MARKER): + return "", helptext return helptext, None diff --git a/omop_alchemy/maintenance/ui.py b/omop_alchemy/maintenance/ui.py index 0d2202b..bfe1244 100644 --- a/omop_alchemy/maintenance/ui.py +++ b/omop_alchemy/maintenance/ui.py @@ -9,17 +9,23 @@ from rich.table import Table from rich.text import Text -from omop_alchemy.cdm.handlers.fulltext import FullTextResult +from omop_alchemy.backends.base import FullTextResult -from ..backend_support import backend_label +from ..backends.resolve import _DIALECT_TO_BACKEND_MAP, SupportedDialect as _SupportedDialect + + +def __backend_label(dialect_name: str) -> str: + try: + return _DIALECT_TO_BACKEND_MAP[_SupportedDialect(dialect_name)].name + except (ValueError, KeyError): + return dialect_name from .ascii import render_banner from .tables import TableCategory if TYPE_CHECKING: - from .cli_backup import DatabaseBackupResult, DatabaseRestoreResult + from .cli_backup import BackupResult from .cli_config import ConnectionDefaults from .cli_foreign_keys import ( - ForeignKeyAction, ForeignKeyConstraintViolation, ForeignKeyManagementResult, ForeignKeyStatusResult, @@ -143,11 +149,8 @@ def render_connection_defaults( grid.add_column(style="bold cyan") grid.add_column() grid.add_row("File", path) - grid.add_row("dotenv", defaults.dotenv or "-") - grid.add_row("engine_schema", defaults.engine_schema or "-") - grid.add_row("db_schema", defaults.db_schema or "-") - grid.add_row("athena_source", defaults.athena_source or "-") - grid.add_row("logging", defaults.logging or "file") + for param, value in defaults.to_dict().items(): + grid.add_row(param.replace("_", " ").title(), value or "-") return Panel.fit(grid, title=f"[bold]{title}[/bold]", border_style="blue") @@ -201,7 +204,7 @@ def render_info_database(info: MaintenanceInfo) -> Panel: grid.add_column(style="bold cyan") grid.add_column() grid.add_row("Engine URL", info.engine_url or "-") - grid.add_row("Backend", backend_label(info.backend) if info.backend else "-") + grid.add_row("Backend", _backend_label(info.backend) if info.backend else "-") grid.add_row("Engine created", _bool_label(info.engine_created)) grid.add_row("Connection ready", _bool_label(info.connection_ready)) @@ -238,32 +241,32 @@ def render_info_dependencies(info: MaintenanceInfo) -> RenderableType: return table -def render_backup_result(result: DatabaseBackupResult) -> Panel: +def render_backup_result(result: BackupResult) -> Panel: grid = Table.grid(padding=(0, 2)) grid.add_column(style="bold cyan") grid.add_column() grid.add_row("Status", _status_text(result.status)) - grid.add_row("Backend", backend_label(result.backend)) + grid.add_row("Backend", _backend_label(result.backend)) grid.add_row("Database", result.database_name) grid.add_row("Schema", result.schema_name or "all schemas") - grid.add_row("Format", result.format.value) - grid.add_row("Output", result.output_path) + grid.add_row("Format", result.backup_format.value) + grid.add_row("Output", result.file_path) grid.add_row("Tool", result.tool_path) grid.add_row("Detail", result.detail) return Panel.fit(grid, title="[bold]Backup[/bold]", border_style="green" if result.status == "created" else "cyan") -def render_backup_summary(result: DatabaseBackupResult, *, dry_run: bool) -> Panel: +def render_backup_summary(result: BackupResult, *, dry_run: bool) -> Panel: restore_hint = ( - f"Restore with `pg_restore -d {result.output_path}`." - if result.format.value == "custom" - else f"Restore with `psql -d -f {result.output_path}`." + f"Restore with `pg_restore -d {result.file_path}`." + if result.backup_format.value == "custom" + else f"Restore with `psql -d -f {result.file_path}`." ) grid = Table.grid(padding=(0, 2)) grid.add_column(style="bold cyan") grid.add_column() - grid.add_row("Artifact", result.output_path) - grid.add_row("Format", result.format.value) + grid.add_row("Artifact", result.file_path) + grid.add_row("Format", result.backup_format.value) grid.add_row("Restore", restore_hint) grid.add_row( "Summary", @@ -272,27 +275,27 @@ def render_backup_summary(result: DatabaseBackupResult, *, dry_run: bool) -> Pan return Panel.fit(grid, title="[bold]Summary[/bold]", border_style="cyan" if dry_run else "green") -def render_restore_result(result: DatabaseRestoreResult) -> Panel: +def render_restore_result(result: BackupResult) -> Panel: grid = Table.grid(padding=(0, 2)) grid.add_column(style="bold cyan") grid.add_column() grid.add_row("Status", _status_text(result.status)) - grid.add_row("Backend", backend_label(result.backend)) + grid.add_row("Backend", _backend_label(result.backend)) grid.add_row("Database", result.database_name) grid.add_row("Schema", result.schema_name or "all schemas") - grid.add_row("Format", result.format.value) - grid.add_row("Input", result.input_path) + grid.add_row("Format", result.backup_format.value) + grid.add_row("Input", result.file_path) grid.add_row("Tool", result.tool_path) grid.add_row("Detail", result.detail) return Panel.fit(grid, title="[bold]Restore[/bold]", border_style="green" if result.status == "applied" else "cyan") -def render_restore_summary(result: DatabaseRestoreResult, *, dry_run: bool) -> Panel: +def render_restore_summary(result: BackupResult, *, dry_run: bool) -> Panel: grid = Table.grid(padding=(0, 2)) grid.add_column(style="bold cyan") grid.add_column() - grid.add_row("Artifact", result.input_path) - grid.add_row("Format", result.format.value) + grid.add_row("Artifact", result.file_path) + grid.add_row("Format", result.backup_format.value) grid.add_row( "Summary", "Restore planned; no changes were applied to the target database." @@ -363,7 +366,7 @@ def render_reconciliation_summary(report: SchemaReconciliationReport) -> Panel: grid = Table.grid(padding=(0, 2)) grid.add_column(style="bold cyan") grid.add_column() - grid.add_row("Backend", backend_label(report.backend)) + grid.add_row("Backend", _backend_label(report.backend)) grid.add_row("Tables", str(len(report.table_results))) if matched: grid.add_row("Matched", str(matched)) @@ -657,7 +660,7 @@ def render_foreign_key_results(results: Iterable[ForeignKeyManagementResult]) -> style = STATUS_STYLES.get(result.status, "white") table.add_row( Text(result.status.upper(), style=style), - result.action.value, + "Enable" if result.enable else "Disable", result.table_name, _category_label(result.category), str(result.outgoing_constraint_count), @@ -669,7 +672,7 @@ def render_foreign_key_results(results: Iterable[ForeignKeyManagementResult]) -> def render_foreign_key_summary(results: Iterable[ForeignKeyManagementResult], *, dry_run: bool) -> Panel: items = list(results) - action = items[0].action.value if items else "manage" + action = "Enable" if items and items[0].enable else "Disable" failed = sum(item.status == "failed" for item in items) skipped = sum(item.status == "skipped" for item in items) grid = Table.grid(padding=(0, 2)) @@ -694,8 +697,8 @@ def render_foreign_key_summary(results: Iterable[ForeignKeyManagementResult], *, return Panel.fit(grid, title="[bold]Summary[/bold]", border_style=border_style) -def render_foreign_key_note(action: ForeignKeyAction, *, strict: bool = False) -> Panel: - if action == "disable": +def render_foreign_key_note(enable: bool, *, strict: bool = False) -> Panel: + if not enable: body = ( "PostgreSQL keeps the foreign key constraints defined in metadata. " "This command disables the internal RI triggers that enforce them." diff --git a/omop_alchemy/py.typed b/omop_alchemy/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_analyze_tables.py b/tests/test_analyze_tables.py index f8bc7a3..06576ea 100644 --- a/tests/test_analyze_tables.py +++ b/tests/test_analyze_tables.py @@ -40,5 +40,5 @@ def test_analyze_tables_rejects_vacuum_on_sqlite(tmp_path): with pytest.raises(RuntimeError) as exc_info: analyze_tables(engine, scope=TableScope.CLINICAL, vacuum=True) - assert "VACUUM ANALYZE is only supported for PostgreSQL" in str(exc_info.value) + assert "not supported by the SQLite backend" in str(exc_info.value) diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index a90ef97..e760c13 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -1,7 +1,7 @@ from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.cli_config import defaults_path, load_connection_defaults +from omop_alchemy.maintenance.cli_config import defaults_path, ConnectionDefaults from omop_alchemy.maintenance.cli_indexes import IndexAction, IndexManagementResult from omop_alchemy.maintenance.tables import TableCategory @@ -10,13 +10,13 @@ def test_config_set_overrides_and_show(): - """Config set-overrides persists values and config show surfaces them.""" + """Config override persists values and config show surfaces them.""" with runner.isolated_filesystem(): result = runner.invoke( app, [ "config", - "set-overrides", + "override", "--dotenv", ".env.test", "--engine-schema", @@ -30,7 +30,7 @@ def test_config_set_overrides_and_show(): assert result.exit_code == 0 assert defaults_path().exists() - loaded_defaults = load_connection_defaults() + loaded_defaults = ConnectionDefaults.load() assert loaded_defaults.dotenv == str((defaults_path().parent / ".env.test").resolve()) assert loaded_defaults.engine_schema == "cdm" assert loaded_defaults.db_schema == "public" @@ -75,15 +75,15 @@ def fake_manage_indexes( return [] monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( @@ -96,7 +96,7 @@ def fake_manage_indexes( app, [ "config", - "set-overrides", + "override", "--dotenv", ".env.saved", "--engine-schema", @@ -124,10 +124,10 @@ def test_config_show_surfaces_manual_logging_setting() -> None: encoding="utf-8", ) - loaded_defaults = load_connection_defaults() + loaded_defaults = ConnectionDefaults.load() assert loaded_defaults.logging == "off" show_result = runner.invoke(app, ["config", "show"]) assert show_result.exit_code == 0 - assert "logging" in show_result.stdout + assert "Logging" in show_result.stdout assert "off" in show_result.stdout diff --git a/tests/test_config_driver.py b/tests/test_config_driver.py index 7d3522b..7b3d517 100644 --- a/tests/test_config_driver.py +++ b/tests/test_config_driver.py @@ -7,7 +7,7 @@ """ import pytest -from omop_alchemy.config import ( +from omop_alchemy.db import ( POSTGRES_DRIVER_MODULES, _missing_driver_message, create_engine_with_dependencies, diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py index 61a0609..d97691b 100644 --- a/tests/test_foreign_keys.py +++ b/tests/test_foreign_keys.py @@ -5,10 +5,9 @@ from omop_alchemy.maintenance.cli import app from omop_alchemy.maintenance.cli_schema import create_missing_tables from omop_alchemy.maintenance.cli_foreign_keys import ( - ForeignKeyAction, ForeignKeyConstraintViolation, validate_foreign_key_constraints, - collect_foreign_key_targets, + _collect_fk_info, collect_foreign_key_trigger_status, manage_foreign_key_triggers, ) @@ -20,14 +19,14 @@ def _engine(tmp_path): return sa.create_engine(f"sqlite:///{tmp_path / 'foreign_keys.db'}", future=True) -def test_collect_foreign_key_targets_finds_participating_tables(tmp_path): - """Test collect foreign key targets finds participating tables.""" +def test_collect_fk_info_finds_participating_tables(tmp_path): + """Test _collect_fk_info finds participating tables.""" engine = _engine(tmp_path) create_missing_tables(engine) targets = { target.table_name: target - for target in collect_foreign_key_targets(engine) + for target in _collect_fk_info(engine) } assert "person" in targets @@ -42,11 +41,11 @@ def test_manage_foreign_key_triggers_supports_dry_run(tmp_path): with pytest.raises(RuntimeError) as exc_info: manage_foreign_key_triggers( engine, - action=ForeignKeyAction.DISABLE, + enable=False, dry_run=True, ) - assert "only supported for PostgreSQL" in str(exc_info.value) + assert "not supported by the SQLite backend" in str(exc_info.value) def test_collect_foreign_key_trigger_status_is_safe_on_sqlite(tmp_path): @@ -57,7 +56,7 @@ def test_collect_foreign_key_trigger_status_is_safe_on_sqlite(tmp_path): with pytest.raises(RuntimeError) as exc_info: collect_foreign_key_trigger_status(engine) - assert "only supported for PostgreSQL" in str(exc_info.value) + assert "not supported by the SQLite backend" in str(exc_info.value) def test_validate_foreign_key_constraints_is_safe_on_sqlite(tmp_path): @@ -68,7 +67,7 @@ def test_validate_foreign_key_constraints_is_safe_on_sqlite(tmp_path): with pytest.raises(RuntimeError) as exc_info: validate_foreign_key_constraints(engine) - assert "only supported for PostgreSQL" in str(exc_info.value) + assert "not supported by the SQLite backend" in str(exc_info.value) def test_disable_foreign_keys_cli_fails_gracefully_for_sqlite(monkeypatch): @@ -83,15 +82,15 @@ def fake_create_engine(url: str, *, future: bool) -> sa.Engine: return sa.create_engine(url, future=future) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) @@ -101,7 +100,32 @@ def fake_create_engine(url: str, *, future: bool) -> sa.Engine: ) assert result.exit_code == 1 - assert "only supported for PostgreSQL engines" in result.stdout + assert "not supported by the SQLite" in result.stdout + + +def _make_fake_backend(): + from omop_alchemy.backends.base import Backend + + class _FakeBackend(Backend): + @property + def name(self) -> str: + return "FakePostgres" + + @property + def dialect(self) -> str: + return "postgresql" + + def analyze_table(self, conn, table_name, db_schema, *, vacuum=False) -> None: + pass + + def toggle_fk_triggers(self, conn, table_name, db_schema, *, enable: bool) -> None: + action = "ENABLE" if enable else "DISABLE" + conn.exec_driver_sql(f"ALTER TABLE {table_name} {action} TRIGGER ALL") + + def count_fk_violations(self, conn, *, source_table, referred_table, constraint_name, db_schema=None): + return 0 + + return _FakeBackend() def test_manage_foreign_key_triggers_strict_does_not_enable_on_validation_failure(monkeypatch): @@ -124,11 +148,11 @@ def begin(self): return _FakeConnection() monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys._ensure_postgresql_supported", - lambda engine, *, feature: None, + "omop_alchemy.maintenance.cli_foreign_keys.resolve_backend", + lambda engine: _make_fake_backend(), ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys.collect_foreign_key_targets", + "omop_alchemy.maintenance.cli_foreign_keys._collect_fk_info", lambda engine, *, db_schema=None, vocabulary_included=False: [ type("Target", (), { "table_name": "person", @@ -150,7 +174,7 @@ def begin(self): ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_foreign_keys._collect_strict_validation_failures", - lambda connection, *, db_schema=None, vocabulary_included=False: { + lambda connection, backend, *, db_schema=None, vocabulary_included=False: { "visit_occurrence": [ ForeignKeyConstraintViolation( source_table_name="visit_occurrence", @@ -164,7 +188,7 @@ def begin(self): results = manage_foreign_key_triggers( _FakeEngine(), - action=ForeignKeyAction.ENABLE, + enable=True, strict=True, ) @@ -194,11 +218,11 @@ def begin(self): return _FakeConnection() monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys._ensure_postgresql_supported", - lambda engine, *, feature: None, + "omop_alchemy.maintenance.cli_foreign_keys.resolve_backend", + lambda engine: _make_fake_backend(), ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys.collect_foreign_key_targets", + "omop_alchemy.maintenance.cli_foreign_keys._collect_fk_info", lambda engine, *, db_schema=None, vocabulary_included=False: [ type("Target", (), { "table_name": "person", @@ -212,12 +236,12 @@ def begin(self): ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_foreign_keys._collect_strict_validation_failures", - lambda connection, *, db_schema=None, vocabulary_included=False: {}, + lambda connection, backend, *, db_schema=None, vocabulary_included=False: {}, ) results = manage_foreign_key_triggers( _FakeEngine(), - action=ForeignKeyAction.ENABLE, + enable=True, strict=True, ) @@ -245,14 +269,14 @@ def fake_create_engine(url: str, *, future: bool) -> str: def fake_manage_foreign_key_triggers( engine: object, *, - action: ForeignKeyAction, + enable: bool, db_schema: str | None = None, vocabulary_included: bool = False, dry_run: bool = False, strict: bool = False, ): calls["engine"] = engine - calls["action"] = action + calls["enable"] = enable calls["db_schema"] = db_schema calls["vocabulary_included"] = vocabulary_included calls["dry_run"] = dry_run @@ -260,15 +284,15 @@ def fake_manage_foreign_key_triggers( return [] monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( @@ -283,7 +307,7 @@ def fake_manage_foreign_key_triggers( assert result.exit_code == 0 assert calls["strict"] is True - assert calls["action"] is ForeignKeyAction.ENABLE + assert calls["enable"] is True assert "enable --strict" in result.stdout @@ -301,11 +325,11 @@ def connect(self): return _FakeConnection() monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys._ensure_postgresql_supported", - lambda engine, *, feature: None, + "omop_alchemy.maintenance.cli_foreign_keys.resolve_backend", + lambda engine: _make_fake_backend(), ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys.collect_foreign_key_targets", + "omop_alchemy.maintenance.cli_foreign_keys._collect_fk_info", lambda engine, *, db_schema=None, vocabulary_included=False: [ type("Target", (), { "table_name": "person", @@ -327,7 +351,7 @@ def connect(self): ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_foreign_keys._collect_strict_validation_failures", - lambda connection, *, db_schema=None, vocabulary_included=False: { + lambda connection, backend, *, db_schema=None, vocabulary_included=False: { "visit_occurrence": [ ForeignKeyConstraintViolation( source_table_name="visit_occurrence", @@ -406,15 +430,15 @@ def fake_validate_foreign_key_constraints( ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( diff --git a/tests/test_fulltext.py b/tests/test_fulltext.py index 5a798c4..c22bc9d 100644 --- a/tests/test_fulltext.py +++ b/tests/test_fulltext.py @@ -2,25 +2,26 @@ import pytest from typer.testing import CliRunner -from omop_alchemy.cdm.handlers.fulltext import ( +from omop_alchemy.backends import ( CONCEPT_NAME_TSVECTOR_COLUMN, CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, FullTextAction, FullTextResult, - concept_name_tsvector_expression, - drop_fulltext_columns, - install_fulltext_columns, - populate_fulltext_columns, - register_optional_fulltext_columns, - unregister_optional_fulltext_columns, + PostgresBackend, ) from omop_alchemy.cdm.model.vocabulary.concept import Concept from omop_alchemy.cdm.model.vocabulary.concept_synonym import Concept_Synonym from omop_alchemy.maintenance.cli import app - +from omop_alchemy.maintenance.cli_fulltext import ( + drop_fulltext_columns, + install_fulltext_columns, + populate_fulltext_columns, +) runner = CliRunner() +_postgres = PostgresBackend() + class _FakeDialect: name = "postgresql" @@ -74,38 +75,38 @@ def begin(self) -> _FakeBegin: return _FakeBegin(self.connection) -def test_register_and_unregister_optional_fulltext_columns_toggle_metadata(): - """Register/unregister helpers toggle optional tsvector metadata columns.""" - unregister_optional_fulltext_columns() +def test_register_and_unregister_fulltext_metadata_toggle_columns(): + """register/unregister_fulltext_metadata toggle optional tsvector metadata columns.""" + _postgres.unregister_fulltext_metadata() assert CONCEPT_NAME_TSVECTOR_COLUMN not in Concept.__table__.c assert CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN not in Concept_Synonym.__table__.c - register_optional_fulltext_columns() + _postgres.register_fulltext_metadata() assert CONCEPT_NAME_TSVECTOR_COLUMN in Concept.__table__.c assert CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN in Concept_Synonym.__table__.c - unregister_optional_fulltext_columns() + _postgres.unregister_fulltext_metadata() assert CONCEPT_NAME_TSVECTOR_COLUMN not in Concept.__table__.c assert CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN not in Concept_Synonym.__table__.c def test_concept_name_tsvector_expression_prefers_registered_column(): - """Expression helper falls back to computed SQL unless the stored column is registered.""" - unregister_optional_fulltext_columns() - fallback = concept_name_tsvector_expression() + """Expression builder falls back to computed SQL unless the stored column is registered.""" + _postgres.unregister_fulltext_metadata() + fallback = _postgres.concept_name_tsvector_expression() assert "to_tsvector" in str(fallback) - register_optional_fulltext_columns() + _postgres.register_fulltext_metadata() try: - stored = concept_name_tsvector_expression() + stored = _postgres.concept_name_tsvector_expression() assert stored is Concept.__table__.c[CONCEPT_NAME_TSVECTOR_COLUMN] finally: - unregister_optional_fulltext_columns() + _postgres.unregister_fulltext_metadata() def test_install_fulltext_columns_builds_postgresql_ddl_and_registers_metadata(): """Install emits expected PostgreSQL DDL and registers optional metadata columns.""" - unregister_optional_fulltext_columns() + _postgres.unregister_fulltext_metadata() engine = _FakeEngine() results = install_fulltext_columns( @@ -119,7 +120,7 @@ def test_install_fulltext_columns_builds_postgresql_ddl_and_registers_metadata() assert all(result.status == "applied" for result in results) statements = [call[1] for call in engine.connection.calls] assert any( - "ALTER TABLE public.concept ADD COLUMN IF NOT EXISTS concept_name_tsvector tsvector" in statement + 'ALTER TABLE "public"."concept" ADD COLUMN IF NOT EXISTS concept_name_tsvector tsvector' in statement for statement in statements ) assert any( @@ -127,12 +128,12 @@ def test_install_fulltext_columns_builds_postgresql_ddl_and_registers_metadata() for statement in statements ) assert CONCEPT_NAME_TSVECTOR_COLUMN in Concept.__table__.c - unregister_optional_fulltext_columns() + _postgres.unregister_fulltext_metadata() def test_populate_fulltext_columns_issues_update_with_regconfig_and_row_counts(): """Populate issues parameterized UPDATE statements and reports row counts.""" - unregister_optional_fulltext_columns() + _postgres.unregister_fulltext_metadata() engine = _FakeEngine(rowcount=11) results = populate_fulltext_columns( @@ -144,15 +145,15 @@ def test_populate_fulltext_columns_issues_update_with_regconfig_and_row_counts() assert all(result.status == "applied" for result in results) assert [result.row_count for result in results] == [11, 11] execute_calls = [call for call in engine.connection.calls if call[0] == "execute"] - assert any("UPDATE public.concept" in call[1] for call in execute_calls) + assert any('UPDATE "public"."concept"' in call[1] for call in execute_calls) assert any("CAST(:regconfig AS regconfig)" in call[1] for call in execute_calls) assert all(call[2] == {"regconfig": "simple"} for call in execute_calls) - unregister_optional_fulltext_columns() + _postgres.unregister_fulltext_metadata() def test_drop_fulltext_columns_drops_schema_objects_and_unregisters_metadata(): """Drop removes fulltext schema objects and unregisters optional metadata columns.""" - register_optional_fulltext_columns() + _postgres.register_fulltext_metadata() engine = _FakeEngine() results = drop_fulltext_columns( @@ -166,7 +167,7 @@ def test_drop_fulltext_columns_drops_schema_objects_and_unregisters_metadata(): statements = [call[1] for call in engine.connection.calls] assert any("DROP INDEX IF EXISTS public.idx_gin_concept_name_tsvector" in statement for statement in statements) assert any( - "ALTER TABLE public.concept DROP COLUMN IF EXISTS concept_name_tsvector" in statement + 'ALTER TABLE "public"."concept" DROP COLUMN IF EXISTS concept_name_tsvector' in statement for statement in statements ) assert CONCEPT_NAME_TSVECTOR_COLUMN not in Concept.__table__.c @@ -192,7 +193,7 @@ def test_fulltext_management_requires_postgresql(tmp_path, fn_name): with pytest.raises(RuntimeError) as exc_info: fn(engine) - assert "only supported for PostgreSQL engines" in str(exc_info.value) + assert "not supported by the SQLite backend" in str(exc_info.value) def test_fulltext_install_cli_passes_options(monkeypatch): @@ -231,7 +232,7 @@ def fake_install_fulltext_columns( ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli_fulltext.build_engine", + "omop_alchemy.maintenance._cli_utils.build_engine", fake_build_engine, ) monkeypatch.setattr( diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 615e980..bde25df 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -162,15 +162,15 @@ def fake_manage_indexes( ] monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index a2b00f2..f65364d 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -202,15 +202,15 @@ def fake_load_vocab_source( ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( @@ -225,7 +225,7 @@ def fake_load_vocab_source( app, [ "config", - "set-overrides", + "override", "--athena-source", str(athena_dir), "--engine-schema", @@ -464,7 +464,7 @@ def fail_load_vocab_source(*args, **kwargs): ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli_vocab.build_engine", + "omop_alchemy.maintenance._cli_utils.build_engine", fake_build_engine, ) monkeypatch.setattr( diff --git a/tests/test_truncate_tables.py b/tests/test_truncate_tables.py index eee3297..584e197 100644 --- a/tests/test_truncate_tables.py +++ b/tests/test_truncate_tables.py @@ -19,7 +19,7 @@ def test_truncate_tables_requires_postgresql(tmp_path): with pytest.raises(RuntimeError) as exc_info: truncate_tables(engine, scope=TableScope.CLINICAL, dry_run=True) - assert "only supported for PostgreSQL engines" in str(exc_info.value) + assert "not supported by the SQLite backend" in str(exc_info.value) def test_truncate_tables_reports_blocking_foreign_key_references(monkeypatch, tmp_path): @@ -27,7 +27,7 @@ def test_truncate_tables_reports_blocking_foreign_key_references(monkeypatch, tm engine = sa.create_engine(f"sqlite:///{tmp_path / 'truncate_fk.db'}", future=True) create_missing_tables(engine, vocabulary_included=True) - monkeypatch.setattr(truncate_tables_module, "require_backend", lambda *args, **kwargs: None) + monkeypatch.setattr(truncate_tables_module, "require_backend_support", lambda *args, **kwargs: None) with pytest.raises(RuntimeError) as exc_info: truncate_tables(engine, scope=TableScope.CLINICAL, dry_run=False) @@ -92,15 +92,15 @@ def fake_truncate_tables( ] monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_environment", + "omop_alchemy.db.load_environment", fake_load_environment, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.get_engine_name", + "omop_alchemy.db.get_engine_name", fake_get_engine_name, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.create_engine_with_dependencies", + "omop_alchemy.db.create_engine_with_dependencies", fake_create_engine, ) monkeypatch.setattr( From a3f4cc3c09bf2f8b879188110487d84824290f71 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 22:51:56 +0000 Subject: [PATCH 03/25] Wrap common interface with omop_command --- omop_alchemy/maintenance/_cli_utils.py | 163 ++++++++++--- omop_alchemy/maintenance/cli_backup.py | 104 +++------ omop_alchemy/maintenance/cli_foreign_keys.py | 135 +++-------- omop_alchemy/maintenance/cli_fulltext.py | 151 +++--------- omop_alchemy/maintenance/cli_indexes.py | 104 +++------ omop_alchemy/maintenance/cli_schema.py | 234 +++++-------------- omop_alchemy/maintenance/cli_tables.py | 174 ++++---------- omop_alchemy/maintenance/cli_vocab.py | 108 ++++----- tests/test_truncate_tables.py | 10 +- 9 files changed, 413 insertions(+), 770 deletions(-) diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index 2863194..2a690ff 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -1,16 +1,138 @@ from __future__ import annotations + +import functools +import inspect +from typing import Any, Callable, Optional, TypeVar + import typer -from typing import Optional -from sqlalchemy import Engine from sqlalchemy.exc import SQLAlchemyError -from rich.console import Console from .tables import TableScope from .ui import console, render_error, render_command_header -from ..db import build_engine, resolve_connection, ConnectionDefaults +from ..db import build_engine, resolve_connection from ..backends import BackendNotSupportedError +_F = TypeVar("_F", bound=Callable[..., Any]) + +# ── Shared injected CLI params ──────────────────────────────────────────────── +# Built once and reused so every decorated command gets identical help text. + +_DOTENV_PARAM = inspect.Parameter( + "dotenv", + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=typer.Option( + None, + help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", + ), + annotation=Optional[str], +) +_ENGINE_SCHEMA_PARAM = inspect.Parameter( + "engine_schema", + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=typer.Option( + None, + help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", + ), + annotation=Optional[str], +) +_DB_SCHEMA_PARAM = inspect.Parameter( + "db_schema", + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=typer.Option( + None, + help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", + ), + annotation=Optional[str], +) +_DRY_RUN_PARAM = inspect.Parameter( + "dry_run", + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), + annotation=bool, +) + +_INJECTED_NAMES = {"dotenv", "engine_schema", "db_schema"} + + +# ── Decorator ───────────────────────────────────────────────────────────────── + +def omop_command( + command_name: str, + *, + vocabulary_included: bool | None = None, + dry_run: bool = False, + mode_label: str | None = None, +) -> Callable[[_F], _F]: + """Decorator that eliminates CLI boilerplate for every omop-alchemy command. + + Injects ``dotenv``, ``engine_schema``, ``db_schema`` (and optionally + ``dry_run``) into the Typer CLI signature, calls :func:`setup_cli_cmd`, + and wraps the body in ``try/except handle_error``. + + The decorated function must accept ``(conn, engine, ...)`` as its first + two positional parameters; the decorator supplies them. Any + ``vocabulary_included`` or ``athena_source`` parameter declared in the + function is automatically forwarded to :func:`setup_cli_cmd`. + """ + def decorator(func: _F) -> _F: + @functools.wraps(func) + def wrapper(**kwargs: Any) -> Any: + dotenv = kwargs.pop("dotenv", None) + engine_schema = kwargs.pop("engine_schema", None) + db_schema = kwargs.pop("db_schema", None) + athena_source = kwargs.pop("athena_source", None) + _dry_run = kwargs.pop("dry_run", False) if dry_run else False + _vocab = kwargs.get("vocabulary_included", vocabulary_included) + _mode = mode_label if mode_label is not None else ("dry-run" if _dry_run else "apply") + try: + conn = resolve_connection( + dotenv=dotenv, + engine_schema=engine_schema, + db_schema=db_schema, + athena_source=athena_source, + ) + console.print( + render_command_header( + command_name=command_name, + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, + vocabulary_included=_vocab, + mode_label=_mode, + ) + ) + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + if dry_run: + return func(conn, engine, dry_run=_dry_run, **kwargs) # type: ignore[arg-type] + return func(conn, engine, **kwargs) # type: ignore[arg-type] + except Exception as exc: + handle_error(exc) + + # Rebuild the Typer-visible signature: + # • skip conn/engine (decorator supplies them) + # • skip dotenv/engine_schema/db_schema (decorator injects them) + # • skip dry_run if the decorator owns it (to avoid duplication) + orig_params = list(inspect.signature(func).parameters.values()) + func_params = [ + p for p in orig_params[2:] + if p.name not in _INJECTED_NAMES + and not (dry_run and p.name == "dry_run") + ] + new_params = [_DOTENV_PARAM, _ENGINE_SCHEMA_PARAM, _DB_SCHEMA_PARAM] + func_params + if dry_run: + new_params.append(_DRY_RUN_PARAM) + wrapper.__signature__ = inspect.signature(func).replace(parameters=new_params) # type: ignore[attr-defined] + + return wrapper # type: ignore[return-value] + return decorator # type: ignore[return-value] + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + def handle_error(exc: Exception) -> None: if isinstance(exc, BackendNotSupportedError): console.print(render_error(f"Not supported: {exc}")) @@ -41,36 +163,3 @@ def resolve_selection( return None, selected return scope or default_scope, None - -def setup_cli_cmd( - *, - console: Console, - dotenv: Optional[str], - engine_schema: Optional[str], - db_schema: Optional[str], - command_name: str, - vocabulary_included: Optional[bool], - mode_label: str, - athena_source: Optional[str] = None, -) -> tuple[ConnectionDefaults, Engine]: - """Convenience function to resolve connection, print command header, and build engine for CLI commands.""" - - conn = resolve_connection( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - athena_source=athena_source, - ) - console.print( - render_command_header( - command_name=command_name, - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - mode_label=mode_label, - ) - ) - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) - return conn, engine - - diff --git a/omop_alchemy/maintenance/cli_backup.py b/omop_alchemy/maintenance/cli_backup.py index 4721cb3..e8dfb40 100644 --- a/omop_alchemy/maintenance/cli_backup.py +++ b/omop_alchemy/maintenance/cli_backup.py @@ -10,7 +10,7 @@ import typer from ..backends import resolve_backend, require_backend_support, backend_support_note -from ._cli_utils import handle_error, setup_cli_cmd +from ._cli_utils import omop_command from .ui import ( console, render_backup_result, @@ -151,19 +151,10 @@ def restore_database_backup( ) @app.command("backup-database") +@omop_command("backup-database", dry_run=True) def backup_database_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Restrict the backup to a single schema (pg_dump --schema). Only supported on PostgreSQL.", - ), + conn, + engine, output_path: str | None = typer.Option( None, help="Output path for the backup artifact. Defaults to a timestamped file in the current directory.", @@ -172,82 +163,41 @@ def backup_database_command( BackupFormat.CUSTOM, help="pg_dump output format. 'custom' produces a binary .dump file; 'plain' produces a plain SQL .sql file.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Create a database backup that can be restored with `restore-database`.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="backup-database", - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", + with console.status("Creating restore-ready database backup..."): + result = create_database_backup( + engine, + output_path=output_path, + backup_format=backup_format, + db_schema=conn.db_schema, + dry_run=dry_run, ) - with console.status("Creating restore-ready database backup..."): - result = create_database_backup( - engine, - output_path=output_path, - backup_format=backup_format, - db_schema=conn.db_schema, - dry_run=dry_run, - ) - console.print(render_backup_result(result)) - console.print(render_backup_summary(result, dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_backup_result(result)) + console.print(render_backup_summary(result, dry_run=dry_run)) @app.command("restore-database") +@omop_command("restore-database", dry_run=True) def restore_database_command( + conn, + engine, input_path: str = typer.Argument(help="Path to the backup artifact (.dump or .sql) to restore."), - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Restrict the restore to a single schema (pg_restore --schema). Only valid for custom-format dumps.", - ), backup_format: BackupFormat = typer.Option( ..., help="Format of the artifact to restore. Must match the format used when the backup was created.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Restore a database backup that was created with `backup-database`.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="restore-database", - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", + with console.status("Restoring database backup..."): + result = restore_database_backup( + engine, + input_path=input_path, + backup_format=backup_format, + db_schema=conn.db_schema, + dry_run=dry_run, ) - with console.status("Restoring database backup..."): - result = restore_database_backup( - engine, - input_path=input_path, - backup_format=backup_format, - db_schema=conn.db_schema, - dry_run=dry_run, - ) - console.print(render_restore_result(result)) - console.print(render_restore_summary(result, dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_restore_result(result)) + console.print(render_restore_summary(result, dry_run=dry_run)) diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index 670efdb..8e99691 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -7,7 +7,7 @@ from ..db import build_engine, resolve_connection from ..backends import Backend, resolve_backend, require_backend_support, backend_support_note -from ._cli_utils import handle_error, setup_cli_cmd +from ._cli_utils import handle_error, omop_command from .tables import ( TableCategory, existing_maintenance_tables, @@ -439,60 +439,35 @@ def collect_foreign_key_trigger_status( ) @app.command("disable") +@omop_command("foreign-keys disable", dry_run=True) def disable_foreign_keys_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", help="Include OMOP vocabulary tables in the selection.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), strict: bool = typer.Option( False, "--strict", help="Validate all FK relationships and report violations before disabling trigger enforcement.", ), + dry_run: bool = False, ) -> None: """Disable PostgreSQL RI trigger enforcement for all participating OMOP tables.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="foreign-keys disable", + with console.status("Managing PostgreSQL foreign key trigger enforcement..."): + results = manage_foreign_key_triggers( + engine, + enable=False, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", + dry_run=dry_run, + strict=strict, ) - with console.status("Managing PostgreSQL foreign key trigger enforcement..."): - results = manage_foreign_key_triggers( - engine, - enable=False, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - strict=strict, - ) - console.print(render_foreign_key_results(results)) - console.print(render_foreign_key_summary(results, dry_run=dry_run)) - console.print(render_foreign_key_note(enable=False, strict=strict)) - except Exception as exc: - handle_error(exc) + console.print(render_foreign_key_results(results)) + console.print(render_foreign_key_summary(results, dry_run=dry_run)) + console.print(render_foreign_key_note(enable=False, strict=strict)) @app.command("enable") @@ -560,19 +535,10 @@ def enable_foreign_keys_command( @app.command("status") +@omop_command("foreign-keys status", mode_label="inspect") def foreign_key_status_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -580,42 +546,21 @@ def foreign_key_status_command( ), ) -> None: """Show the current enabled/disabled state of RI triggers for each participating OMOP table.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="foreign-keys status", + with console.status("Inspecting foreign key trigger status..."): + results = collect_foreign_key_trigger_status( + engine, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="inspect", ) - with console.status("Inspecting foreign key trigger status..."): - results = collect_foreign_key_trigger_status( - engine, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_foreign_key_status_results(results)) - console.print(render_foreign_key_status_summary(results)) - except Exception as exc: - handle_error(exc) + console.print(render_foreign_key_status_results(results)) + console.print(render_foreign_key_status_summary(results)) @app.command("validate") +@omop_command("foreign-keys validate", mode_label="inspect") def foreign_key_validate_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -623,24 +568,12 @@ def foreign_key_validate_command( ), ) -> None: """Validate FK constraints on selected tables and report any rows that violate referential integrity.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="foreign-keys validate", + with console.status("Validating selected foreign key relationships..."): + report = validate_foreign_key_constraints( + engine, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="inspect", ) - with console.status("Validating selected foreign key relationships..."): - report = validate_foreign_key_constraints( - engine, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_foreign_key_validation_results(report.results)) - console.print(render_foreign_key_validation_issues(report.violations)) - console.print(render_foreign_key_validation_summary(report)) - except Exception as exc: - handle_error(exc) + console.print(render_foreign_key_validation_results(report.results)) + console.print(render_foreign_key_validation_issues(report.violations)) + console.print(render_foreign_key_validation_summary(report)) diff --git a/omop_alchemy/maintenance/cli_fulltext.py b/omop_alchemy/maintenance/cli_fulltext.py index c4452b4..bcbba86 100644 --- a/omop_alchemy/maintenance/cli_fulltext.py +++ b/omop_alchemy/maintenance/cli_fulltext.py @@ -6,7 +6,7 @@ from ..backends import backend_support_note as _backend_support_note from ..backends import resolve_backend, require_backend_support from ..backends.base import FullTextAction, FullTextError, FullTextResult -from ._cli_utils import handle_error, setup_cli_cmd +from ._cli_utils import omop_command from .ui import ( console, render_fulltext_results, @@ -189,19 +189,10 @@ def drop_fulltext_columns( # ── CLI commands ────────────────────────────────────────────────────────────── @app.command("install") +@omop_command("fulltext install", vocabulary_included=True, dry_run=True) def install_fulltext_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, create_indexes: bool = typer.Option( True, "--create-indexes/--no-create-indexes", @@ -212,129 +203,63 @@ def install_fulltext_command( "--fastupdate/--no-fastupdate", help="Enable PostgreSQL GIN fastupdate on newly created indexes (trades write speed for query latency).", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Add tsvector sidecar columns to vocabulary tables and optionally create GIN indexes for fast full-text search.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="fulltext install", - vocabulary_included=True, - mode_label="dry-run" if dry_run else "apply", + with console.status("Managing PostgreSQL full-text sidecar columns..."): + results = install_fulltext_columns( + engine, + db_schema=conn.db_schema, + create_indexes=create_indexes, + fastupdate=fastupdate, + dry_run=dry_run, ) - with console.status("Managing PostgreSQL full-text sidecar columns..."): - results = install_fulltext_columns( - engine, - db_schema=conn.db_schema, - create_indexes=create_indexes, - fastupdate=fastupdate, - dry_run=dry_run, - ) - console.print(render_fulltext_results(results)) - console.print(render_fulltext_summary(results, action="install", dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_fulltext_results(results)) + console.print(render_fulltext_summary(results, action="install", dry_run=dry_run)) @app.command("populate") +@omop_command("fulltext populate", vocabulary_included=True, dry_run=True) def populate_fulltext_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, regconfig: str = typer.Option( "english", help="PostgreSQL text search configuration to use when building tsvector values (e.g. 'english', 'simple').", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Fill tsvector sidecar columns with pre-computed search vectors using the specified PostgreSQL text search configuration.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="fulltext populate", - vocabulary_included=True, - mode_label="dry-run" if dry_run else "apply", + with console.status("Managing PostgreSQL full-text sidecar columns..."): + results = populate_fulltext_columns( + engine, + db_schema=conn.db_schema, + regconfig=regconfig, + dry_run=dry_run, ) - with console.status("Managing PostgreSQL full-text sidecar columns..."): - results = populate_fulltext_columns( - engine, - db_schema=conn.db_schema, - regconfig=regconfig, - dry_run=dry_run, - ) - console.print(render_fulltext_results(results)) - console.print(render_fulltext_summary(results, action="populate", dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_fulltext_results(results)) + console.print(render_fulltext_summary(results, action="populate", dry_run=dry_run)) @app.command("drop") +@omop_command("fulltext drop", vocabulary_included=True, dry_run=True) def drop_fulltext_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, drop_indexes: bool = typer.Option( True, "--drop-indexes/--no-drop-indexes", help="Drop managed GIN indexes before dropping the tsvector columns.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Remove tsvector sidecar columns and their associated GIN indexes from vocabulary tables.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="fulltext drop", - vocabulary_included=True, - mode_label="dry-run" if dry_run else "apply", + with console.status("Managing PostgreSQL full-text sidecar columns..."): + results = drop_fulltext_columns( + engine, + db_schema=conn.db_schema, + drop_indexes=drop_indexes, + dry_run=dry_run, ) - with console.status("Managing PostgreSQL full-text sidecar columns..."): - results = drop_fulltext_columns( - engine, - db_schema=conn.db_schema, - drop_indexes=drop_indexes, - dry_run=dry_run, - ) - console.print(render_fulltext_results(results)) - console.print(render_fulltext_summary(results, action="drop", dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_fulltext_results(results)) + console.print(render_fulltext_summary(results, action="drop", dry_run=dry_run)) diff --git a/omop_alchemy/maintenance/cli_indexes.py b/omop_alchemy/maintenance/cli_indexes.py index 69a893f..1d7e858 100644 --- a/omop_alchemy/maintenance/cli_indexes.py +++ b/omop_alchemy/maintenance/cli_indexes.py @@ -9,7 +9,7 @@ from omop_alchemy.cdm.base.indexing import OMOP_CLUSTER_INDEX_INFO_KEY from ..backends import resolve_backend, backend_supports -from ._cli_utils import handle_error, setup_cli_cmd +from ._cli_utils import omop_command from .tables import ( MaintenanceTable, TableCategory, @@ -285,102 +285,52 @@ def manage_indexes( @app.command("disable") +@omop_command("indexes disable", dry_run=True) def disable_indexes_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", help="Include OMOP vocabulary tables in the selection.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Drop all ORM-defined secondary indexes from the target database; useful before bulk data loads.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="indexes disable", + with console.status("Managing metadata-defined indexes..."): + results = manage_indexes( + engine, + action=IndexAction.DISABLE, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", + dry_run=dry_run, ) - with console.status("Managing metadata-defined indexes..."): - results = manage_indexes( - engine, - action=IndexAction.DISABLE, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_index_results(results)) - console.print(render_index_summary(results, dry_run=dry_run)) - console.print(render_index_note(IndexAction.DISABLE)) - except Exception as exc: - handle_error(exc) + console.print(render_index_results(results)) + console.print(render_index_summary(results, dry_run=dry_run)) + console.print(render_index_note(IndexAction.DISABLE)) @app.command("enable") +@omop_command("indexes enable", dry_run=True) def enable_indexes_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", help="Include OMOP vocabulary tables in the selection.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Recreate all ORM-defined secondary indexes; also CLUSTERs tables on PostgreSQL where metadata specifies it.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="indexes enable", + with console.status("Managing metadata-defined indexes..."): + results = manage_indexes( + engine, + action=IndexAction.ENABLE, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", + dry_run=dry_run, ) - with console.status("Managing metadata-defined indexes..."): - results = manage_indexes( - engine, - action=IndexAction.ENABLE, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_index_results(results)) - console.print(render_index_summary(results, dry_run=dry_run)) - console.print(render_index_note(IndexAction.ENABLE)) - except Exception as exc: - handle_error(exc) + console.print(render_index_results(results)) + console.print(render_index_summary(results, dry_run=dry_run)) + console.print(render_index_note(IndexAction.ENABLE)) diff --git a/omop_alchemy/maintenance/cli_schema.py b/omop_alchemy/maintenance/cli_schema.py index ededf2a..71bedb9 100644 --- a/omop_alchemy/maintenance/cli_schema.py +++ b/omop_alchemy/maintenance/cli_schema.py @@ -16,7 +16,7 @@ from omop_alchemy.db import get_engine_name from ..backends import resolve_backend -from ._cli_utils import handle_error, setup_cli_cmd +from ._cli_utils import omop_command from .cli_config import defaults_path from .cli_foreign_keys import ( ForeignKeyStatusResult, @@ -1378,19 +1378,10 @@ def collect_data_summary( @app.command("info") +@omop_command("info", mode_label="inspect") def info_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -1398,48 +1389,26 @@ def info_command( ), ) -> None: """Inspect maintenance CLI readiness, backend compatibility, and current installation state.""" - try: - conn, _ = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="info", + load_environment(conn.dotenv or "") + with console.status("Inspecting maintenance environment..."): + info = collect_maintenance_info( + dotenv=conn.dotenv, + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - - load_environment(conn.dotenv or "") - with console.status("Inspecting maintenance environment..."): - info = collect_maintenance_info( - dotenv=conn.dotenv, - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - ) - console.print(render_info_environment(info)) - console.print(render_info_database(info)) - console.print(render_info_dependencies(info)) - console.print(render_info_command_support(info.command_support)) - console.print(render_info_summary(info)) - except Exception as exc: - handle_error(exc) + ) + console.print(render_info_environment(info)) + console.print(render_info_database(info)) + console.print(render_info_dependencies(info)) + console.print(render_info_command_support(info.command_support)) + console.print(render_info_summary(info)) @app.command("doctor") +@omop_command("doctor", mode_label="inspect") def doctor_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -1452,50 +1421,29 @@ def doctor_command( ), ) -> None: """Run a read-only maintenance health check across connection readiness, schema drift, and FK state.""" - try: - conn, _ = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="doctor", + load_environment(conn.dotenv or "") + with console.status("Running maintenance doctor checks..."): + report = collect_doctor_report( + dotenv=conn.dotenv, + engine_schema=conn.engine_schema, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="inspect", + deep=deep, ) - load_environment(conn.dotenv or "") - with console.status("Running maintenance doctor checks..."): - report = collect_doctor_report( - dotenv=conn.dotenv, - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - deep=deep, - ) - console.print(render_info_environment(report.info)) - console.print(render_info_database(report.info)) - console.print(render_doctor_checks(report.checks)) - if deep and report.foreign_key_validation is not None: - console.print(render_foreign_key_validation_issues(report.foreign_key_validation.violations)) - console.print(render_doctor_recommendations(report.recommendations)) - console.print(render_doctor_summary(report, deep=deep)) - except Exception as exc: - handle_error(exc) + console.print(render_info_environment(report.info)) + console.print(render_info_database(report.info)) + console.print(render_doctor_checks(report.checks)) + if deep and report.foreign_key_validation is not None: + console.print(render_foreign_key_validation_issues(report.foreign_key_validation.violations)) + console.print(render_doctor_recommendations(report.recommendations)) + console.print(render_doctor_summary(report, deep=deep)) @app.command("reconcile-schema") +@omop_command("reconcile-schema", mode_label="inspect") def reconcile_schema_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -1503,88 +1451,42 @@ def reconcile_schema_command( ), ) -> None: """Compare ORM-managed SQLAlchemy metadata against the current target database schema.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="reconcile-schema", - vocabulary_included=vocabulary_included, - mode_label="inspect", - ) - with console.status("Reconciling ORM metadata against target database schema..."): - report = reconcile_schema(engine, db_schema=conn.db_schema, vocabulary_included=vocabulary_included) - console.print(render_reconciliation_results(report.table_results)) - console.print(render_reconciliation_issues(report.issues)) - console.print(render_reconciliation_summary(report)) - except Exception as exc: - handle_error(exc) + with console.status("Reconciling ORM metadata against target database schema..."): + report = reconcile_schema(engine, db_schema=conn.db_schema, vocabulary_included=vocabulary_included) + console.print(render_reconciliation_results(report.table_results)) + console.print(render_reconciliation_issues(report.issues)) + console.print(render_reconciliation_summary(report)) @app.command("create-missing-tables") +@omop_command("create-missing-tables", dry_run=True) def create_missing_tables_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( True, "--vocab/--no-vocab", help="Include OMOP vocabulary tables in the selection. Enabled by default.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Create missing ORM-managed OMOP tables from metadata.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="create-missing-tables", + with console.status("Creating missing tables..."): + results = create_missing_tables( + engine, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", + dry_run=dry_run, ) - with console.status("Creating missing tables..."): - results = create_missing_tables( - engine, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_table_creation_results(results)) - console.print(render_table_creation_summary(results, dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_table_creation_results(results)) + console.print(render_table_creation_summary(results, dry_run=dry_run)) @app.command("data-summary") +@omop_command("data-summary", mode_label="inspect") def data_summary_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -1597,24 +1499,12 @@ def data_summary_command( ), ) -> None: """Summarise ORM-managed OMOP tables present in the target database.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="data-summary", + with console.status("Collecting table summary..."): + results = collect_data_summary( + engine, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="inspect", + existing_only=not include_missing, ) - with console.status("Collecting table summary..."): - results = collect_data_summary( - engine, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - existing_only=not include_missing, - ) - console.print(render_data_summary_results(results)) - console.print(render_data_summary_summary(results)) - except Exception as exc: - handle_error(exc) + console.print(render_data_summary_results(results)) + console.print(render_data_summary_summary(results)) diff --git a/omop_alchemy/maintenance/cli_tables.py b/omop_alchemy/maintenance/cli_tables.py index 0bd7f6c..6ef78c0 100644 --- a/omop_alchemy/maintenance/cli_tables.py +++ b/omop_alchemy/maintenance/cli_tables.py @@ -6,7 +6,7 @@ import typer from ..backends import resolve_backend, require_backend_support, backend_support_note -from ._cli_utils import handle_error, resolve_selection, setup_cli_cmd +from ._cli_utils import omop_command, resolve_selection from .tables import ( TableCategory, TableScope, @@ -376,19 +376,10 @@ def reset_model_sequences( app = typer.Typer(rich_markup_mode="rich", help="Manage Database Tables: analyze, truncate, and reset sequences",) @app.command("analyze-tables") +@omop_command("analyze-tables", dry_run=True) def analyze_tables_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, scope: TableScope | None = typer.Option( None, "--scope", @@ -405,111 +396,61 @@ def analyze_tables_command( "--vacuum", help="Use VACUUM ANALYZE instead of plain ANALYZE to also reclaim dead tuples. Not available on all backends.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Analyse selected ORM-managed tables to update planner statistics.""" resolved_scope, resolved_tables = resolve_selection( scope=scope, tables=table, default_scope=TableScope.ALL ) - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="analyze-tables", - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", + with console.status("Refreshing planner statistics for selected tables..."): + results = analyze_tables( + engine, + db_schema=conn.db_schema, + scope=resolved_scope, + table_names=resolved_tables, + vacuum=vacuum, + dry_run=dry_run, ) - with console.status("Refreshing planner statistics for selected tables..."): - results = analyze_tables( - engine, - db_schema=conn.db_schema, - scope=resolved_scope, - table_names=resolved_tables, - vacuum=vacuum, - dry_run=dry_run, - ) - console.print(render_analyze_results(results)) - console.print(render_analyze_summary(results, dry_run=dry_run)) - console.print(render_analyze_note()) - except Exception as exc: - handle_error(exc) + console.print(render_analyze_results(results)) + console.print(render_analyze_summary(results, dry_run=dry_run)) + console.print(render_analyze_note()) @app.command( "reset-sequences", help=f"Reset each owned sequence to MAX(pk) + 1 to prevent insert conflicts after bulk loads. {backend_support_note('find_sequence_name')}", ) +@omop_command("reset-sequences", dry_run=True) def reset_sequences_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", help="Include OMOP vocabulary tables in the selection.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Reset each owned sequence to MAX(pk) + 1 to prevent insert conflicts after bulk loads.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="reset-sequences", + with console.status("Resetting PostgreSQL sequences..."): + results = reset_model_sequences( + engine, + db_schema=conn.db_schema, vocabulary_included=vocabulary_included, - mode_label="dry-run" if dry_run else "apply", + dry_run=dry_run, ) - with console.status("Resetting PostgreSQL sequences..."): - results = reset_model_sequences( - engine, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - dry_run=dry_run, - ) - console.print(render_sequence_reset_results(results)) - console.print(render_sequence_reset_summary(results, dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_sequence_reset_results(results)) + console.print(render_sequence_reset_summary(results, dry_run=dry_run)) @app.command( "truncate-tables", help=f"Truncate selected ORM-managed OMOP tables; aborts if external FK references would block unless --cascade is set. {backend_support_note('truncate_table_batch')}", ) +@omop_command("truncate-tables", dry_run=True) def truncate_tables_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), + conn, + engine, scope: TableScope | None = typer.Option( None, "--scope", @@ -536,11 +477,7 @@ def truncate_tables_command( "--yes", help="Confirm the destructive operation. Required when not using --dry-run.", ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Truncate selected ORM-managed OMOP tables; aborts if external FK references would block unless --cascade is set.""" resolved_scope, resolved_tables = resolve_selection(scope=scope, tables=table) @@ -554,36 +491,23 @@ def truncate_tables_command( render_error("Truncation is destructive. Re-run with `--yes`, or use `--dry-run` first.") ) raise typer.Exit(code=1) - - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="truncate-tables", - vocabulary_included=None, - mode_label="dry-run" if dry_run else "apply", + with console.status("Truncating selected tables..."): + results = truncate_tables( + engine, + db_schema=conn.db_schema, + scope=resolved_scope, + table_names=resolved_tables, + restart_identities=restart_identities, + cascade=cascade, + dry_run=dry_run, ) - with console.status("Truncating selected tables..."): - results = truncate_tables( - engine, - db_schema=conn.db_schema, - scope=resolved_scope, - table_names=resolved_tables, - restart_identities=restart_identities, - cascade=cascade, - dry_run=dry_run, - ) - console.print(render_truncate_results(results)) - console.print( - render_truncate_summary( - results, - dry_run=dry_run, - restart_identities=restart_identities, - cascade=cascade, - ) + console.print(render_truncate_results(results)) + console.print( + render_truncate_summary( + results, + dry_run=dry_run, + restart_identities=restart_identities, + cascade=cascade, ) - console.print(render_truncate_note()) - except Exception as exc: - handle_error(exc) + ) + console.print(render_truncate_note()) diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 0c7a517..18dcf92 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -26,7 +26,7 @@ ) from ..backends import resolve_backend -from ._cli_utils import handle_error, setup_cli_cmd +from ._cli_utils import omop_command from .cli_foreign_keys import manage_foreign_key_triggers from .cli_indexes import IndexAction, manage_indexes from .cli_tables import reset_model_sequences @@ -595,23 +595,14 @@ def load_vocab_source( "load-vocab-source", help="Load Athena vocabulary CSV files from a configured source path using the ORM staged CSV loader.", ) +@omop_command("load-vocab-source", vocabulary_included=True, dry_run=True) def load_vocab_source_command( + conn, + engine, athena_source: str | None = typer.Option( None, help="Path to the unzipped Athena vocabulary CSV directory. Falls back to the saved athena-source default.", ), - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target. Sets search_path on PostgreSQL before loading; not supported on SQLite.", - ), merge_strategy: MergeStrategy = typer.Option( "replace", help=( @@ -634,25 +625,10 @@ def load_vocab_source_command( "If the load fails mid-way, run `indexes enable --vocab` and `foreign-keys enable` to recover." ), ), - dry_run: bool = typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), + dry_run: bool = False, ) -> None: """Load all Athena vocabulary CSVs from the configured source path, optionally toggling indexes and FK triggers for speed.""" - try: - conn, engine = setup_cli_cmd( - console=console, - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - command_name="load-vocab-source", - vocabulary_included=True, - mode_label="dry-run" if dry_run else "apply", - athena_source=athena_source - ) - if conn.athena_source is None: + if conn.athena_source is None: console.print( render_error( "No Athena vocabulary source path is configured. " @@ -662,42 +638,40 @@ def load_vocab_source_command( ) raise typer.Exit(code=1) - with Progress( - SpinnerColumn(), - TextColumn("[bold cyan]{task.description}"), - BarColumn(bar_width=None), - TaskProgressColumn(), - TimeElapsedColumn(), - console=console, - transient=False, - ) as progress: - task_id = progress.add_task( - "Preparing Athena vocabulary load...", total=100.0, completed=0 - ) - completed_tables: list[str] = [] - - def _update_progress(event: VocabularyLoadProgress) -> None: - progress.update(task_id, completed=event.percent, description=event.detail) - if event.phase == "commit-complete" and event.table_name is not None: - completed_tables.append(event.table_name) - progress.console.print( - f"[green]loaded[/green] [bold]{event.table_name}[/bold] " - f"({len(completed_tables)}/{event.table_count})" - ) + with Progress( + SpinnerColumn(), + TextColumn("[bold cyan]{task.description}"), + BarColumn(bar_width=None), + TaskProgressColumn(), + TimeElapsedColumn(), + console=console, + transient=False, + ) as progress: + task_id = progress.add_task( + "Preparing Athena vocabulary load...", total=100.0, completed=0 + ) + completed_tables: list[str] = [] + + def _update_progress(event: VocabularyLoadProgress) -> None: + progress.update(task_id, completed=event.percent, description=event.detail) + if event.phase == "commit-complete" and event.table_name is not None: + completed_tables.append(event.table_name) + progress.console.print( + f"[green]loaded[/green] [bold]{event.table_name}[/bold] " + f"({len(completed_tables)}/{event.table_count})" + ) - report = load_vocab_source( - engine, - source_path=conn.athena_source, - db_schema=conn.db_schema, - dry_run=dry_run, - merge_strategy=merge_strategy, - chunksize=None if chunksize == 0 else chunksize, - bulk_mode=bulk_mode, - progress_callback=_update_progress, - ) - progress.update(task_id, completed=100.0, description="Athena vocabulary load complete") + report = load_vocab_source( + engine, + source_path=conn.athena_source, + db_schema=conn.db_schema, + dry_run=dry_run, + merge_strategy=merge_strategy, + chunksize=None if chunksize == 0 else chunksize, + bulk_mode=bulk_mode, + progress_callback=_update_progress, + ) + progress.update(task_id, completed=100.0, description="Athena vocabulary load complete") - console.print(render_vocab_load_results(report.results)) - console.print(render_vocab_load_summary(report, dry_run=dry_run)) - except Exception as exc: - handle_error(exc) + console.print(render_vocab_load_results(report.results)) + console.print(render_vocab_load_summary(report, dry_run=dry_run)) diff --git a/tests/test_truncate_tables.py b/tests/test_truncate_tables.py index 584e197..ab9a29c 100644 --- a/tests/test_truncate_tables.py +++ b/tests/test_truncate_tables.py @@ -38,8 +38,16 @@ def test_truncate_tables_reports_blocking_foreign_key_references(monkeypatch, tm assert "--cascade" in message -def test_truncate_tables_cli_requires_confirmation(): +def test_truncate_tables_cli_requires_confirmation(monkeypatch): """Test truncate tables cli requires confirmation.""" + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.build_engine", + lambda *, dotenv, engine_schema: "ENGINE", + ) + monkeypatch.setattr( + "omop_alchemy.db.resolve_connection", + lambda **kwargs: type("C", (), {"dotenv": None, "engine_schema": None, "db_schema": None, "athena_source": None})(), + ) result = runner.invoke(app, ["truncate-tables", "--scope", "clinical"]) assert result.exit_code == 1 From 9f560150f487dfa2a20e2d29480cf631131d28ef Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 23:12:38 +0000 Subject: [PATCH 04/25] Split up cli_schema into various smaller ones --- omop_alchemy/maintenance/cli_schema.py | 1369 +---------------- omop_alchemy/maintenance/cli_schema_doctor.py | 359 +++++ omop_alchemy/maintenance/cli_schema_info.py | 421 +++++ .../maintenance/cli_schema_reconcile.py | 408 +++++ .../maintenance/cli_schema_summary.py | 63 + omop_alchemy/maintenance/cli_schema_tables.py | 127 ++ 6 files changed, 1404 insertions(+), 1343 deletions(-) create mode 100644 omop_alchemy/maintenance/cli_schema_doctor.py create mode 100644 omop_alchemy/maintenance/cli_schema_info.py create mode 100644 omop_alchemy/maintenance/cli_schema_reconcile.py create mode 100644 omop_alchemy/maintenance/cli_schema_summary.py create mode 100644 omop_alchemy/maintenance/cli_schema_tables.py diff --git a/omop_alchemy/maintenance/cli_schema.py b/omop_alchemy/maintenance/cli_schema.py index 71bedb9..19daf55 100644 --- a/omop_alchemy/maintenance/cli_schema.py +++ b/omop_alchemy/maintenance/cli_schema.py @@ -1,39 +1,36 @@ from __future__ import annotations -from dataclasses import dataclass -import importlib.metadata -import importlib.util -import os -import shutil - -import sqlalchemy as sa -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.engine.interfaces import ReflectedIndex import typer -from omop_alchemy import create_engine_with_dependencies, load_environment -from omop_alchemy.backends.resolve import SupportedDialect -from omop_alchemy.db import get_engine_name +from omop_alchemy import load_environment -from ..backends import resolve_backend from ._cli_utils import omop_command -from .cli_config import defaults_path -from .cli_foreign_keys import ( - ForeignKeyStatusResult, - ForeignKeyValidationReport, - collect_foreign_key_trigger_status, - validate_foreign_key_constraints, +from .cli_schema_doctor import ( + DoctorCheck as DoctorCheck, + DoctorReport as DoctorReport, + DoctorRecommendation as DoctorRecommendation, + collect_doctor_report, +) +from .cli_schema_info import ( + CommandSupport as CommandSupport, + DependencyStatus as DependencyStatus, + MaintenanceInfo as MaintenanceInfo, + collect_maintenance_info, +) +from .cli_schema_reconcile import ( + ReconciliationIssue as ReconciliationIssue, + SchemaReconciliationReport as SchemaReconciliationReport, + TableReconciliationResult as TableReconciliationResult, + reconcile_schema, ) -from .cli_indexes import _cluster_target_name -from .tables import ( - MaintenanceTable, - TableCategory, - collect_maintenance_tables, - missing_maintenance_tables, - qualified_table_name, - schema_adjusted_metadata, - select_maintenance_tables, - select_omop_tables, +from .cli_schema_summary import ( + TableSummaryResult as TableSummaryResult, + collect_data_summary, +) +from .cli_schema_tables import ( + TableCreationResult as TableCreationResult, + collect_missing_tables as collect_missing_tables, + create_missing_tables, ) from .ui import ( console, @@ -56,1320 +53,6 @@ ) -def _backend_label(dialect_name: str) -> str: - from ..backends.resolve import _DIALECT_TO_BACKEND_MAP, SupportedDialect - try: - return _DIALECT_TO_BACKEND_MAP[SupportedDialect(dialect_name)].name - except (ValueError, KeyError): - return dialect_name - - -# --------------------------------------------------------------------------- -# info -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class DependencyStatus: - """Installation status of a Python package or external tool dependency.""" - - name: str - installed: bool - version: str | None - - -@dataclass(frozen=True) -class CommandSupport: - """Readiness assessment for one CLI command given the current backend and connection state.""" - - command_name: str - requirement: str - status: str - detail: str - - -@dataclass(frozen=True) -class MaintenanceInfo: - """Full environment snapshot: package version, connection state, and per-command readiness.""" - - package_version: str - cli_path: str | None - pg_dump_path: str | None - pg_restore_path: str | None - psql_path: str | None - defaults_file: str - defaults_exists: bool - dotenv_path: str | None - dotenv_exists: bool | None - engine_schema: str | None - db_schema: str | None - engine_url: str | None - backend: str | None - engine_created: bool - engine_error: str | None - connection_ready: bool - connection_error: str | None - managed_table_count: int - existing_table_count: int | None - missing_table_count: int | None - vocabulary_included: bool - dependencies: tuple[DependencyStatus, ...] - command_support: tuple[CommandSupport, ...] - - -def _package_version() -> str: - """Return the installed omop-alchemy package version string.""" - return importlib.metadata.version("omop-alchemy") - - -def _dependency_status(distribution_name: str, module_name: str) -> DependencyStatus: - """Check whether a Python package is importable and return its installed version if found.""" - installed = importlib.util.find_spec(module_name) is not None - version: str | None = None - if installed: - try: - version = importlib.metadata.version(distribution_name) - except importlib.metadata.PackageNotFoundError: - version = None - return DependencyStatus(name=distribution_name, installed=installed, version=version) - - -def _external_dependency_status(name: str, executable_name: str) -> DependencyStatus: - """Check whether an external CLI tool is on PATH and return a DependencyStatus (version always None).""" - return DependencyStatus( - name=name, - installed=shutil.which(executable_name) is not None, - version=None, - ) - - -def _command_support_for_unavailable_engine(detail: str) -> tuple[CommandSupport, ...]: - """Return a full CommandSupport tuple with every command marked blocked, used when the engine cannot be created.""" - blocked = "blocked" - return ( - CommandSupport("doctor", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("data-summary", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("analyze-tables", "PostgreSQL/SQLite", blocked, detail), - CommandSupport("create-missing-tables", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("indexes disable", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("indexes enable", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("reconcile-schema", "Any SQLAlchemy backend", blocked, detail), - CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", blocked, detail), - CommandSupport("backup-database", "PostgreSQL + pg_dump", blocked, detail), - CommandSupport("restore-database", "PostgreSQL + pg_restore/psql", blocked, detail), - CommandSupport("fulltext install", "PostgreSQL", blocked, detail), - CommandSupport("fulltext populate", "PostgreSQL", blocked, detail), - CommandSupport("fulltext drop", "PostgreSQL", blocked, detail), - CommandSupport("reset-sequences", "PostgreSQL", blocked, detail), - CommandSupport("truncate-tables", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys disable", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys enable", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys enable --strict", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys status", "PostgreSQL", blocked, detail), - CommandSupport("foreign-keys validate", "PostgreSQL", blocked, detail), - ) - - -def _command_support_for_backend( - *, - backend: str, - engine_created: bool, - engine_error: str | None, - connection_ready: bool, - connection_error: str | None, - pg_dump_path: str | None, - pg_restore_path: str | None, - psql_path: str | None, -) -> tuple[CommandSupport, ...]: - """Compute the readiness status of every CLI command given the current backend, connection state, and tool availability.""" - current_backend = _backend_label(backend) - if not engine_created: - blocked_detail = ( - f"Backend resolved to {current_backend}, but the engine could not be created: {engine_error}" - if engine_error - else f"Backend resolved to {current_backend}, but the engine could not be created." - ) - else: - blocked_detail = ( - f"Backend resolved to {current_backend}, but the connection test failed: {connection_error}" - if connection_error - else f"Backend resolved to {current_backend}, but the connection test failed." - ) - portable_status = "ready" if connection_ready else "blocked" - portable_detail = ( - f"Ready on {current_backend}." if connection_ready else blocked_detail - ) - - if backend == SupportedDialect.POSTGRESQL: - analyze_status = portable_status - analyze_detail = ( - "Ready on PostgreSQL; ANALYZE and VACUUM ANALYZE are both supported." - if connection_ready - else blocked_detail - ) - enable_indexes_status = portable_status - enable_indexes_detail = ( - "Ready on PostgreSQL; index DDL and clustering metadata are both supported." - if connection_ready - else blocked_detail - ) - postgresql_status = portable_status - postgresql_detail = "Ready on PostgreSQL." if connection_ready else blocked_detail - vocab_load_status = portable_status - vocab_load_detail = ( - "Ready on PostgreSQL when an Athena source path is configured." - if connection_ready - else blocked_detail - ) - elif backend == "sqlite": - analyze_status = "limited" if connection_ready else "blocked" - analyze_detail = ( - "Ready on SQLite; ANALYZE is supported, but `--vacuum` is unavailable." - if connection_ready - else blocked_detail - ) - enable_indexes_status = "limited" if connection_ready else "blocked" - enable_indexes_detail = ( - "Ready on SQLite; index DDL is supported, but clustering metadata will be skipped." - if connection_ready - else blocked_detail - ) - postgresql_status = "unsupported" if connection_ready else "blocked" - postgresql_detail = ( - f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - vocab_load_status = portable_status - vocab_load_detail = ( - "Ready on SQLite when an Athena source path is configured." - if connection_ready - else blocked_detail - ) - else: - analyze_status = "unsupported" if connection_ready else "blocked" - analyze_detail = ( - f"Requires PostgreSQL or SQLite. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - enable_indexes_status = "limited" if connection_ready else "blocked" - enable_indexes_detail = ( - f"Ready on {current_backend}; index DDL is supported, but clustering metadata will be skipped." - if connection_ready - else blocked_detail - ) - postgresql_status = "unsupported" if connection_ready else "blocked" - postgresql_detail = ( - f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - vocab_load_status = "unsupported" if connection_ready else "blocked" - vocab_load_detail = ( - f"Requires SQLite or PostgreSQL plus a configured Athena source path. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ) - - return ( - CommandSupport("doctor", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("data-summary", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("analyze-tables", "PostgreSQL/SQLite", analyze_status, analyze_detail), - CommandSupport("create-missing-tables", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("indexes disable", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("indexes enable", "Any SQLAlchemy backend", enable_indexes_status, enable_indexes_detail), - CommandSupport("reconcile-schema", "Any SQLAlchemy backend", portable_status, portable_detail), - CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", vocab_load_status, vocab_load_detail), - CommandSupport( - "backup-database", - "PostgreSQL + pg_dump", - ( - "ready" - if connection_ready and backend == SupportedDialect.POSTGRESQL and pg_dump_path is not None - else "blocked" - if backend == SupportedDialect.POSTGRESQL - else "unsupported" - if connection_ready - else "blocked" - ), - ( - "Ready on PostgreSQL; `pg_dump` is available." - if connection_ready and backend == SupportedDialect.POSTGRESQL and pg_dump_path is not None - else "PostgreSQL is configured, but `pg_dump` is not on PATH." - if connection_ready and backend == SupportedDialect.POSTGRESQL - else f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ), - ), - CommandSupport( - "restore-database", - "PostgreSQL + pg_restore/psql", - ( - "ready" - if connection_ready and backend == SupportedDialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) - else "blocked" - if backend == SupportedDialect.POSTGRESQL - else "unsupported" - if connection_ready - else "blocked" - ), - ( - "Ready on PostgreSQL; restore client tooling is available." - if connection_ready and backend == SupportedDialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) - else "PostgreSQL is configured, but neither `pg_restore` nor `psql` is on PATH." - if connection_ready and backend == SupportedDialect.POSTGRESQL - else f"Requires PostgreSQL. Current backend: {current_backend}." - if connection_ready - else blocked_detail - ), - ), - CommandSupport("fulltext install", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("fulltext populate", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("fulltext drop", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("reset-sequences", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("truncate-tables", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys disable", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys enable", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys enable --strict", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys status", "PostgreSQL", postgresql_status, postgresql_detail), - CommandSupport("foreign-keys validate", "PostgreSQL", postgresql_status, postgresql_detail), - ) - - -def collect_maintenance_info( - *, - engine_schema: str | None = None, - db_schema: str | None = None, - dotenv: str | None = None, - vocabulary_included: bool = True, -) -> MaintenanceInfo: - """Probe the current environment: resolve config, attempt a connection, and assess per-command readiness.""" - load_environment(dotenv or "") - pg_dump_path = shutil.which("pg_dump") - pg_restore_path = shutil.which("pg_restore") - psql_path = shutil.which("psql") - defaults_file = defaults_path() - dependencies = ( - _dependency_status("sqlalchemy", "sqlalchemy"), - _dependency_status("typer", "typer"), - _dependency_status("rich", "rich"), - _dependency_status("psycopg", "psycopg"), - _dependency_status("psycopg2-binary", "psycopg2"), - _external_dependency_status("pg_dump", "pg_dump"), - _external_dependency_status("pg_restore", "pg_restore"), - _external_dependency_status("psql", "psql"), - ) - managed_tables = select_maintenance_tables( - exclude_categories=(() if vocabulary_included else (TableCategory.VOCABULARY,)) - ) - cli_path = shutil.which("omop-alchemy") - dotenv_exists = None if dotenv is None else os.path.exists(dotenv) - - engine_name: str | None = None - engine_url: str | None = None - backend: str | None = None - engine_created = False - engine_error: str | None = None - connection_ready = False - connection_error: str | None = None - existing_table_count: int | None = None - missing_table_count: int | None = None - - try: - engine_name = get_engine_name(engine_schema) - url = sa.engine.make_url(engine_name) - engine_url = url.render_as_string(hide_password=True) - backend = url.get_backend_name() - except RuntimeError as exc: - engine_error = str(exc) - except Exception as exc: - engine_error = f"Could not resolve engine configuration: {exc}" - - if engine_name is not None: - try: - engine = create_engine_with_dependencies(engine_name, future=True) - engine_created = True - except RuntimeError as exc: - engine_error = str(exc) - except Exception as exc: - engine_error = f"Could not create engine: {exc}" - else: - try: - with engine.connect() as connection: - connection.exec_driver_sql("SELECT 1") - connection_ready = True - missing_tables = collect_missing_tables( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - missing_table_count = len(missing_tables) - existing_table_count = len(managed_tables) - missing_table_count - except SQLAlchemyError as exc: - connection_error = f"{exc.__class__.__name__}: {exc}" - except Exception as exc: - connection_error = str(exc) - finally: - engine.dispose() - - if backend is None: - command_support = _command_support_for_unavailable_engine( - engine_error or "No engine configuration could be resolved." - ) - else: - command_support = _command_support_for_backend( - backend=backend, - engine_created=engine_created, - engine_error=engine_error, - connection_ready=connection_ready, - connection_error=connection_error, - pg_dump_path=pg_dump_path, - pg_restore_path=pg_restore_path, - psql_path=psql_path, - ) - - return MaintenanceInfo( - package_version=_package_version(), - cli_path=cli_path, - pg_dump_path=pg_dump_path, - pg_restore_path=pg_restore_path, - psql_path=psql_path, - defaults_file=str(defaults_file), - defaults_exists=defaults_file.exists(), - dotenv_path=dotenv, - dotenv_exists=dotenv_exists, - engine_schema=engine_schema, - db_schema=db_schema, - engine_url=engine_url, - backend=backend, - engine_created=engine_created, - engine_error=engine_error, - connection_ready=connection_ready, - connection_error=connection_error, - managed_table_count=len(managed_tables), - existing_table_count=existing_table_count, - missing_table_count=missing_table_count, - vocabulary_included=vocabulary_included, - dependencies=dependencies, - command_support=command_support, - ) - - -# --------------------------------------------------------------------------- -# doctor -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class DoctorCheck: - """Result of a single named maintenance health check (e.g. 'managed tables', 'schema drift').""" - - name: str - status: str - detail: str - - -@dataclass(frozen=True) -class DoctorRecommendation: - """Actionable recommendation derived from health check results, with an optional CLI command hint.""" - - status: str - summary: str - action: str | None - - -@dataclass(frozen=True) -class DoctorReport: - """Complete doctor report: health checks, prioritised recommendations, and optional deep-inspection data.""" - - info: MaintenanceInfo - checks: tuple[DoctorCheck, ...] - recommendations: tuple[DoctorRecommendation, ...] - reconciliation: SchemaReconciliationReport | None - foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None - foreign_key_validation: ForeignKeyValidationReport | None - - -def _build_recommendations( - *, - info: MaintenanceInfo, - reconciliation: SchemaReconciliationReport | None, - foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None, - foreign_key_validation: ForeignKeyValidationReport | None, -) -> tuple[DoctorRecommendation, ...]: - """Derive a prioritised list of actionable recommendations from the doctor check results.""" - recommendations: list[DoctorRecommendation] = [] - - if not info.connection_ready: - recommendations.append( - DoctorRecommendation( - status="failed", - summary="Database connection is not ready for maintenance operations.", - action="Check the engine configuration, backend driver, and target database reachability.", - ) - ) - return tuple(recommendations) - - if info.missing_table_count: - recommendations.append( - DoctorRecommendation( - status="warning", - summary=f"{info.missing_table_count} ORM-managed table(s) are missing from the target database.", - action="Run `omop-alchemy create-missing-tables` before attempting bulk operations.", - ) - ) - - if reconciliation is not None and reconciliation.issues: - recommendations.append( - DoctorRecommendation( - status="warning", - summary=f"Schema reconciliation found {len(reconciliation.issues)} difference(s) against ORM metadata.", - action="Review `omop-alchemy reconcile-schema` output before continuing with ETL or maintenance work.", - ) - ) - - if foreign_key_status is not None and any( - item.disabled_trigger_count > 0 for item in foreign_key_status - ): - recommendations.append( - DoctorRecommendation( - status="warning", - summary="Some PostgreSQL RI triggers are currently disabled.", - action="If loading is complete, run `omop-alchemy foreign-keys validate` and then `omop-alchemy foreign-keys enable --strict`.", - ) - ) - - if ( - foreign_key_validation is not None - and any(result.status == "failed" for result in foreign_key_validation.results) - ): - recommendations.append( - DoctorRecommendation( - status="failed", - summary="Foreign key validation found violating rows.", - action="Fix the reported rows, then rerun `omop-alchemy foreign-keys enable --strict`.", - ) - ) - - if info.backend == SupportedDialect.POSTGRESQL and info.pg_dump_path is None: - recommendations.append( - DoctorRecommendation( - status="warning", - summary="`pg_dump` is not on PATH, so backup-database is unavailable from this machine.", - action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", - ) - ) - - if ( - info.backend == SupportedDialect.POSTGRESQL - and info.pg_restore_path is None - and info.psql_path is None - ): - recommendations.append( - DoctorRecommendation( - status="warning", - summary="Neither `pg_restore` nor `psql` is on PATH, so restore-database is unavailable from this machine.", - action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", - ) - ) - - if not recommendations: - recommendations.append( - DoctorRecommendation( - status="passed", - summary="No obvious maintenance blockers were detected.", - action=None, - ) - ) - - return tuple(recommendations) - - -def collect_doctor_report( - *, - engine_schema: str | None = None, - db_schema: str | None = None, - dotenv: str | None = None, - vocabulary_included: bool = True, - deep: bool = False, -) -> DoctorReport: - """Run all maintenance health checks and return a prioritised report with recommendations.""" - load_environment(dotenv or "") - info = collect_maintenance_info( - engine_schema=engine_schema, - db_schema=db_schema, - dotenv=dotenv, - vocabulary_included=vocabulary_included, - ) - - checks = [ - DoctorCheck( - name="connection", - status="passed" if info.connection_ready else "failed", - detail=( - "Target database connection succeeded." - if info.connection_ready - else info.connection_error or info.engine_error or "Connection could not be established." - ), - ) - ] - - reconciliation: SchemaReconciliationReport | None = None - foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None = None - foreign_key_validation: ForeignKeyValidationReport | None = None - - if info.connection_ready: - engine = create_engine_with_dependencies(get_engine_name(engine_schema), future=True) - try: - missing_table_count = info.missing_table_count or 0 - checks.append( - DoctorCheck( - name="managed tables", - status="passed" if missing_table_count == 0 else "warning", - detail=( - "All selected ORM-managed tables exist." - if missing_table_count == 0 - else f"{missing_table_count} selected table(s) are missing." - ), - ) - ) - - if deep: - reconciliation = reconcile_schema( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - checks.append( - DoctorCheck( - name="schema drift", - status="passed" if not reconciliation.issues else "warning", - detail=( - "ORM metadata matches the target database." - if not reconciliation.issues - else f"{len(reconciliation.issues)} difference(s) detected." - ), - ) - ) - else: - checks.append( - DoctorCheck( - name="schema drift", - status="skipped", - detail="Run `omop-alchemy doctor --deep` to reconcile ORM metadata against the target database.", - ) - ) - - if info.backend == SupportedDialect.POSTGRESQL: - foreign_key_status = tuple( - collect_foreign_key_trigger_status( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - ) - disabled_tables = sum( - item.disabled_trigger_count > 0 for item in foreign_key_status - ) - checks.append( - DoctorCheck( - name="foreign keys", - status="passed" if disabled_tables == 0 else "warning", - detail=( - "All inspected RI triggers are enabled." - if disabled_tables == 0 - else f"{disabled_tables} table(s) still have disabled RI triggers." - ), - ) - ) - - if deep: - foreign_key_validation = validate_foreign_key_constraints( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - violating_tables = sum( - result.status == "failed" for result in foreign_key_validation.results - ) - checks.append( - DoctorCheck( - name="foreign key validation", - status="passed" if violating_tables == 0 else "failed", - detail=( - "All selected foreign key relationships passed validation." - if violating_tables == 0 - else f"{violating_tables} table(s) have violating foreign key rows." - ), - ) - ) - else: - checks.append( - DoctorCheck( - name="foreign key validation", - status="skipped", - detail="Run `omop-alchemy doctor --deep` to validate selected foreign key relationships.", - ) - ) - else: - checks.append( - DoctorCheck( - name="foreign keys", - status="skipped", - detail="Foreign key trigger inspection is only available on PostgreSQL.", - ) - ) - checks.append( - DoctorCheck( - name="foreign key validation", - status="skipped", - detail="Foreign key validation is only available on PostgreSQL.", - ) - ) - finally: - engine.dispose() - else: - checks.extend( - ( - DoctorCheck( - name="managed tables", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - DoctorCheck( - name="foreign keys", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - DoctorCheck( - name="schema drift", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - DoctorCheck( - name="foreign key validation", - status="skipped", - detail="Skipped because the database connection is not ready.", - ), - ) - ) - - if info.backend == SupportedDialect.POSTGRESQL: - backup_tools_ready = info.pg_dump_path is not None and ( - info.pg_restore_path is not None or info.psql_path is not None - ) - checks.append( - DoctorCheck( - name="backup tooling", - status="passed" if backup_tools_ready else "warning", - detail=( - "PostgreSQL backup and restore client tools are available." - if backup_tools_ready - else "PostgreSQL client tools are incomplete on this machine." - ), - ) - ) - else: - checks.append( - DoctorCheck( - name="backup tooling", - status="skipped", - detail="Backup and restore tooling checks are only relevant for PostgreSQL targets.", - ) - ) - - return DoctorReport( - info=info, - checks=tuple(checks), - recommendations=_build_recommendations( - info=info, - reconciliation=reconciliation, - foreign_key_status=foreign_key_status, - foreign_key_validation=foreign_key_validation, - ), - reconciliation=reconciliation, - foreign_key_status=foreign_key_status, - foreign_key_validation=foreign_key_validation, - ) - - -# --------------------------------------------------------------------------- -# reconcile_schema -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class ReconciliationIssue: - """A single schema drift detail: column, index, FK, or cluster mismatch between ORM metadata and the database.""" - - table_name: str - category: TableCategory - component: str - object_name: str - status: str - expected: str | None - actual: str | None - detail: str - - -@dataclass(frozen=True) -class TableReconciliationResult: - """Per-table schema reconciliation summary: whether ORM metadata matches the live database.""" - - table_name: str - category: TableCategory - model_name: str - model_module: str - status: str - issue_count: int - detail: str - - -@dataclass(frozen=True) -class SchemaReconciliationReport: - """Complete reconciliation report across all selected ORM-managed tables.""" - - backend: str - table_results: tuple[TableReconciliationResult, ...] - issues: tuple[ReconciliationIssue, ...] - - -def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: - """Return table unchanged when db_schema is None, or a schema-qualified copy when a schema is specified.""" - if db_schema is None: - return table - - metadata = sa.MetaData() - return table.to_metadata( - metadata, - schema=db_schema, - referred_schema_fn=( - lambda _table, to_schema, _constraint, _referred_schema: to_schema - ), - ) - - -def _normalized_type(type_: sa.types.TypeEngine[object], dialect: sa.engine.Dialect) -> str: - """Compile a SQLAlchemy type to its dialect-specific string and normalise whitespace/case for comparison.""" - return type_.compile(dialect=dialect).lower().replace(" ", "") - - -def _expected_foreign_keys( - table: sa.Table, -) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint]: - """Index ORM-defined FK constraints by (constrained_cols, referred_table, referred_cols) for diffing.""" - expected: dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint] = {} - for constraint in table.foreign_key_constraints: - constrained_columns = tuple(element.parent.name for element in constraint.elements) - referred_columns = tuple(element.column.name for element in constraint.elements) - referred_table = constraint.referred_table.name - expected[(constrained_columns, referred_table, referred_columns)] = constraint - return expected - - -def _actual_foreign_keys( - inspector: sa.Inspector, - table_name: str, - db_schema: str | None, -) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]]: - """Index live FK constraints from the database inspector by the same key tuple used by _expected_foreign_keys.""" - actual: dict[tuple[tuple[str, ...], str, tuple[str, ...]], dict[str, object]] = {} - for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): - constrained_columns = tuple(foreign_key.get("constrained_columns") or []) - referred_columns = tuple(foreign_key.get("referred_columns") or []) - referred_table = str(foreign_key.get("referred_table")) - actual[(constrained_columns, referred_table, referred_columns)] = foreign_key - return actual - - -def _expected_indexes(table: sa.Table) -> dict[str, sa.Index]: - """Return ORM-defined named indexes for a table, keyed by index name.""" - return { - str(index.name): index - for index in table.indexes - if index.name is not None - } - - -def _actual_indexes( - inspector: sa.Inspector, - table_name: str, - db_schema: str | None, -) -> dict[str, ReflectedIndex]: - """Return live named indexes from the database inspector, keyed by index name.""" - return { - str(index["name"]): index - for index in inspector.get_indexes(table_name, schema=db_schema) - if index.get("name") is not None - } - - - -def reconcile_schema( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = False, -) -> SchemaReconciliationReport: - """Compare ORM metadata against the live database schema; reports missing columns, indexes, FKs, and cluster state.""" - excluded_categories: tuple[TableCategory, ...] = ( - () if vocabulary_included else (TableCategory.VOCABULARY,) - ) - _backend = resolve_backend(engine) - selected_tables = select_maintenance_tables(exclude_categories=excluded_categories) - inspector = sa.inspect(engine) - all_issues: list[ReconciliationIssue] = [] - table_results: list[TableReconciliationResult] = [] - - with engine.connect() as connection: - for maintenance_table in selected_tables: - table_issues: list[ReconciliationIssue] = [] - exists = inspector.has_table(maintenance_table.table_name, schema=db_schema) - if not exists: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="table", - object_name=maintenance_table.table_name, - status="missing", - expected="present", - actual="absent", - detail="ORM-managed table is missing from the target database.", - ) - ) - table_results.append( - TableReconciliationResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - status="missing", - issue_count=1, - detail="Table is missing from the target database.", - ) - ) - all_issues.extend(table_issues) - continue - - expected_table = _schema_table(maintenance_table.table, db_schema) - expected_columns = {column.name: column for column in expected_table.columns} - actual_columns = { - str(column["name"]): column - for column in inspector.get_columns(maintenance_table.table_name, schema=db_schema) - } - actual_pk_names = tuple( - inspector.get_pk_constraint(maintenance_table.table_name, schema=db_schema).get("constrained_columns") or [] - ) - expected_pk_names = tuple(column.name for column in expected_table.primary_key.columns) - - for column_name, column in expected_columns.items(): - if column_name not in actual_columns: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="missing", - expected=_normalized_type(column.type, engine.dialect), - actual=None, - detail="Column is defined in ORM metadata but missing from the database.", - ) - ) - - for column_name, column in actual_columns.items(): - if column_name not in expected_columns: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="unexpected", - expected=None, - actual=_normalized_type(column["type"], engine.dialect), - detail="Column exists in the database but is not defined in ORM metadata.", - ) - ) - - for column_name in sorted(set(expected_columns).intersection(actual_columns)): - expected_column = expected_columns[column_name] - actual_column = actual_columns[column_name] - expected_type = _normalized_type(expected_column.type, engine.dialect) - actual_type = _normalized_type(actual_column["type"], engine.dialect) - if expected_type != actual_type: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="mismatch", - expected=expected_type, - actual=actual_type, - detail="Column type differs from ORM metadata.", - ) - ) - - expected_nullable = False if column_name in expected_pk_names else bool(expected_column.nullable) - actual_nullable = False if column_name in actual_pk_names else bool(actual_column["nullable"]) - if expected_nullable != actual_nullable: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="column", - object_name=column_name, - status="mismatch", - expected="nullable" if expected_nullable else "not nullable", - actual="nullable" if actual_nullable else "not nullable", - detail="Column nullability differs from ORM metadata.", - ) - ) - - if expected_pk_names != actual_pk_names: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="primary_key", - object_name=maintenance_table.table_name, - status="mismatch", - expected=", ".join(expected_pk_names), - actual=", ".join(actual_pk_names) if actual_pk_names else None, - detail="Primary key columns differ from ORM metadata.", - ) - ) - - expected_fks = _expected_foreign_keys(expected_table) - actual_fks = _actual_foreign_keys(inspector, maintenance_table.table_name, db_schema) - - for signature, constraint in expected_fks.items(): - if signature not in actual_fks: - constrained_columns, referred_table, referred_columns = signature - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="foreign_key", - object_name=constraint.name or ",".join(constrained_columns), - status="missing", - expected=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", - actual=None, - detail="Foreign key is defined in ORM metadata but missing from the database.", - ) - ) - - for signature, foreign_key in actual_fks.items(): - if signature not in expected_fks: - constrained_columns, referred_table, referred_columns = signature - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="foreign_key", - object_name=str(foreign_key.get("name") or ",".join(constrained_columns)), - status="unexpected", - expected=None, - actual=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", - detail="Foreign key exists in the database but is not defined in ORM metadata.", - ) - ) - - expected_idxs = _expected_indexes(expected_table) - actual_idxs = _actual_indexes(inspector, maintenance_table.table_name, db_schema) - - for index_name, index in expected_idxs.items(): - if index_name not in actual_idxs: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="missing", - expected=", ".join(column.name for column in index.columns), - actual=None, - detail="Index is defined in ORM metadata but missing from the database.", - ) - ) - continue - - actual_index = actual_idxs[index_name] - expected_columns_for_index = tuple(column.name for column in index.columns) - actual_columns_for_index = tuple(actual_index.get("column_names") or []) - if expected_columns_for_index != actual_columns_for_index: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="mismatch", - expected=", ".join(expected_columns_for_index), - actual=", ".join(actual_columns_for_index) if actual_columns_for_index else None, - detail="Index columns differ from ORM metadata.", - ) - ) - if bool(index.unique) != bool(actual_index.get("unique")): - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="mismatch", - expected="unique" if index.unique else "non-unique", - actual="unique" if actual_index.get("unique") else "non-unique", - detail="Index uniqueness differs from ORM metadata.", - ) - ) - - for index_name, index in actual_idxs.items(): - if index_name not in expected_idxs: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="index", - object_name=index_name, - status="unexpected", - expected=None, - actual=", ".join(index.get("column_names") or []), - detail="Index exists in the database but is not defined in ORM metadata.", - ) - ) - - if engine.dialect.name == SupportedDialect.POSTGRESQL: - expected_cluster = _cluster_target_name(maintenance_table) - actual_cluster = _backend.get_clustered_index_name( - connection, - maintenance_table.table_name, - db_schema, - ) - if expected_cluster != actual_cluster: - table_issues.append( - ReconciliationIssue( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - component="cluster", - object_name=maintenance_table.table_name, - status=( - "missing" - if expected_cluster and not actual_cluster - else "unexpected" - if actual_cluster and not expected_cluster - else "mismatch" - ), - expected=expected_cluster, - actual=actual_cluster, - detail="Table clustering differs from ORM metadata.", - ) - ) - - table_status = "matched" if not table_issues else "drifted" - table_results.append( - TableReconciliationResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - status=table_status, - issue_count=len(table_issues), - detail=( - "No differences detected." - if not table_issues - else f"{len(table_issues)} difference(s) detected." - ), - ) - ) - all_issues.extend(table_issues) - - return SchemaReconciliationReport( - backend=engine.dialect.name, - table_results=tuple(table_results), - issues=tuple(all_issues), - ) - - -# --------------------------------------------------------------------------- -# create_missing_tables -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class TableCreationResult: - """Outcome of attempting to create one missing ORM-managed table from SQLAlchemy metadata.""" - - table_name: str - category: TableCategory - model_name: str - model_module: str - status: str - detail: str - - -def _table_dependencies(table: MaintenanceTable) -> tuple[str, ...]: - """Return the sorted names of tables that this table's ORM FK constraints refer to.""" - return tuple( - sorted( - { - constraint.referred_table.name - for constraint in table.table.foreign_key_constraints - } - ) - ) - - -def collect_missing_tables( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = True, -) -> list[MaintenanceTable]: - """Return ORM-managed tables that are absent from the target database.""" - inspector = sa.inspect(engine) - return missing_maintenance_tables( - inspector, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - - -def create_missing_tables( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = True, - dry_run: bool = False, -) -> list[TableCreationResult]: - """Create any ORM-managed tables missing from the target database; skips tables with unresolved FK dependencies.""" - inspector = sa.inspect(engine) - missing_tables = collect_missing_tables( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - existing_table_names = set(inspector.get_table_names(schema=db_schema)) - missing_table_names = {table.table_name for table in missing_tables} - - blocked_dependencies: dict[str, tuple[str, ...]] = {} - for maintenance_table in missing_tables: - unresolved_dependencies = tuple( - dependency_name - for dependency_name in _table_dependencies(maintenance_table) - if dependency_name not in existing_table_names - and dependency_name not in missing_table_names - ) - if unresolved_dependencies: - blocked_dependencies[maintenance_table.table_name] = unresolved_dependencies - - creatable_tables = [ - table - for table in missing_tables - if table.table_name not in blocked_dependencies - ] - - results: list[TableCreationResult] = [] - with engine.begin() as connection: - if creatable_tables and not dry_run: - metadata, adjusted_tables = schema_adjusted_metadata( - collect_maintenance_tables(), - db_schema=db_schema, - ) - metadata.create_all( - bind=connection, - tables=[adjusted_tables[table.table_name] for table in creatable_tables], - checkfirst=True, - ) - - for maintenance_table in missing_tables: - blocked = blocked_dependencies.get(maintenance_table.table_name) - results.append( - TableCreationResult( - table_name=maintenance_table.table_name, - category=maintenance_table.category, - model_name=maintenance_table.model_name, - model_module=maintenance_table.model_module, - status=( - "blocked" - if blocked is not None - else "planned" - if dry_run - else "created" - ), - detail=( - "table blocked by unresolved dependencies: " + ", ".join(blocked) - if blocked is not None - else "table would be created from ORM metadata" - if dry_run - else "table created from ORM metadata" - ), - ) - ) - - return results - - -# --------------------------------------------------------------------------- -# data_summary -# --------------------------------------------------------------------------- - -@dataclass(frozen=True) -class TableSummaryResult: - """Row count and existence data for one ORM-managed OMOP table.""" - - table_name: str - category: TableCategory - model_name: str - model_module: str - primary_key_columns: tuple[str, ...] - exists: bool - row_count: int | None - - -def collect_data_summary( - engine: sa.Engine, - *, - db_schema: str | None = None, - vocabulary_included: bool = False, - existing_only: bool = True, -) -> list[TableSummaryResult]: - """Return row counts and existence state for each ORM-managed table in the target database.""" - inspector = sa.inspect(engine) - tables = select_omop_tables(vocabulary_included=vocabulary_included) - - results: list[TableSummaryResult] = [] - with engine.connect() as connection: - for table in tables: - exists = inspector.has_table(table.table_name, schema=db_schema) - if not exists and existing_only: - continue - - row_count: int | None = None - if exists: - row_count = int( - connection.execute( - sa.text( - f"SELECT COUNT(*) FROM {qualified_table_name(table.table_name, db_schema)}" - ) - ).scalar_one() - ) - - results.append( - TableSummaryResult( - table_name=table.table_name, - category=table.category, - model_name=table.model_name, - model_module=table.model_module, - primary_key_columns=table.primary_key_names, - exists=exists, - row_count=row_count, - ) - ) - - return results - - # --------------------------------------------------------------------------- # CLI commands # --------------------------------------------------------------------------- diff --git a/omop_alchemy/maintenance/cli_schema_doctor.py b/omop_alchemy/maintenance/cli_schema_doctor.py new file mode 100644 index 0000000..e60c140 --- /dev/null +++ b/omop_alchemy/maintenance/cli_schema_doctor.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from omop_alchemy import create_engine_with_dependencies, load_environment +from omop_alchemy.backends.resolve import SupportedDialect +from omop_alchemy.db import get_engine_name + +from .cli_foreign_keys import ( + ForeignKeyStatusResult, + ForeignKeyValidationReport, + collect_foreign_key_trigger_status, + validate_foreign_key_constraints, +) +from .cli_schema_info import ( + MaintenanceInfo, + collect_maintenance_info, +) +from .cli_schema_reconcile import ( + SchemaReconciliationReport, + reconcile_schema, +) + + +# --------------------------------------------------------------------------- +# doctor +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DoctorCheck: + """Result of a single named maintenance health check (e.g. 'managed tables', 'schema drift').""" + + name: str + status: str + detail: str + + +@dataclass(frozen=True) +class DoctorRecommendation: + """Actionable recommendation derived from health check results, with an optional CLI command hint.""" + + status: str + summary: str + action: str | None + + +@dataclass(frozen=True) +class DoctorReport: + """Complete doctor report: health checks, prioritised recommendations, and optional deep-inspection data.""" + + info: MaintenanceInfo + checks: tuple[DoctorCheck, ...] + recommendations: tuple[DoctorRecommendation, ...] + reconciliation: SchemaReconciliationReport | None + foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None + foreign_key_validation: ForeignKeyValidationReport | None + + +def _build_recommendations( + *, + info: MaintenanceInfo, + reconciliation: SchemaReconciliationReport | None, + foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None, + foreign_key_validation: ForeignKeyValidationReport | None, +) -> tuple[DoctorRecommendation, ...]: + """Derive a prioritised list of actionable recommendations from the doctor check results.""" + recommendations: list[DoctorRecommendation] = [] + + if not info.connection_ready: + recommendations.append( + DoctorRecommendation( + status="failed", + summary="Database connection is not ready for maintenance operations.", + action="Check the engine configuration, backend driver, and target database reachability.", + ) + ) + return tuple(recommendations) + + if info.missing_table_count: + recommendations.append( + DoctorRecommendation( + status="warning", + summary=f"{info.missing_table_count} ORM-managed table(s) are missing from the target database.", + action="Run `omop-alchemy create-missing-tables` before attempting bulk operations.", + ) + ) + + if reconciliation is not None and reconciliation.issues: + recommendations.append( + DoctorRecommendation( + status="warning", + summary=f"Schema reconciliation found {len(reconciliation.issues)} difference(s) against ORM metadata.", + action="Review `omop-alchemy reconcile-schema` output before continuing with ETL or maintenance work.", + ) + ) + + if foreign_key_status is not None and any( + item.disabled_trigger_count > 0 for item in foreign_key_status + ): + recommendations.append( + DoctorRecommendation( + status="warning", + summary="Some PostgreSQL RI triggers are currently disabled.", + action="If loading is complete, run `omop-alchemy foreign-keys validate` and then `omop-alchemy foreign-keys enable --strict`.", + ) + ) + + if ( + foreign_key_validation is not None + and any(result.status == "failed" for result in foreign_key_validation.results) + ): + recommendations.append( + DoctorRecommendation( + status="failed", + summary="Foreign key validation found violating rows.", + action="Fix the reported rows, then rerun `omop-alchemy foreign-keys enable --strict`.", + ) + ) + + if info.backend == SupportedDialect.POSTGRESQL and info.pg_dump_path is None: + recommendations.append( + DoctorRecommendation( + status="warning", + summary="`pg_dump` is not on PATH, so backup-database is unavailable from this machine.", + action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", + ) + ) + + if ( + info.backend == SupportedDialect.POSTGRESQL + and info.pg_restore_path is None + and info.psql_path is None + ): + recommendations.append( + DoctorRecommendation( + status="warning", + summary="Neither `pg_restore` nor `psql` is on PATH, so restore-database is unavailable from this machine.", + action="Install PostgreSQL client tools on the machine running `omop-alchemy`.", + ) + ) + + if not recommendations: + recommendations.append( + DoctorRecommendation( + status="passed", + summary="No obvious maintenance blockers were detected.", + action=None, + ) + ) + + return tuple(recommendations) + + +def collect_doctor_report( + *, + engine_schema: str | None = None, + db_schema: str | None = None, + dotenv: str | None = None, + vocabulary_included: bool = True, + deep: bool = False, +) -> DoctorReport: + """Run all maintenance health checks and return a prioritised report with recommendations.""" + load_environment(dotenv or "") + info = collect_maintenance_info( + engine_schema=engine_schema, + db_schema=db_schema, + dotenv=dotenv, + vocabulary_included=vocabulary_included, + ) + + checks = [ + DoctorCheck( + name="connection", + status="passed" if info.connection_ready else "failed", + detail=( + "Target database connection succeeded." + if info.connection_ready + else info.connection_error or info.engine_error or "Connection could not be established." + ), + ) + ] + + reconciliation: SchemaReconciliationReport | None = None + foreign_key_status: tuple[ForeignKeyStatusResult, ...] | None = None + foreign_key_validation: ForeignKeyValidationReport | None = None + + if info.connection_ready: + engine = create_engine_with_dependencies(get_engine_name(engine_schema), future=True) + try: + missing_table_count = info.missing_table_count or 0 + checks.append( + DoctorCheck( + name="managed tables", + status="passed" if missing_table_count == 0 else "warning", + detail=( + "All selected ORM-managed tables exist." + if missing_table_count == 0 + else f"{missing_table_count} selected table(s) are missing." + ), + ) + ) + + if deep: + reconciliation = reconcile_schema( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + checks.append( + DoctorCheck( + name="schema drift", + status="passed" if not reconciliation.issues else "warning", + detail=( + "ORM metadata matches the target database." + if not reconciliation.issues + else f"{len(reconciliation.issues)} difference(s) detected." + ), + ) + ) + else: + checks.append( + DoctorCheck( + name="schema drift", + status="skipped", + detail="Run `omop-alchemy doctor --deep` to reconcile ORM metadata against the target database.", + ) + ) + + if info.backend == SupportedDialect.POSTGRESQL: + foreign_key_status = tuple( + collect_foreign_key_trigger_status( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + ) + disabled_tables = sum( + item.disabled_trigger_count > 0 for item in foreign_key_status + ) + checks.append( + DoctorCheck( + name="foreign keys", + status="passed" if disabled_tables == 0 else "warning", + detail=( + "All inspected RI triggers are enabled." + if disabled_tables == 0 + else f"{disabled_tables} table(s) still have disabled RI triggers." + ), + ) + ) + + if deep: + foreign_key_validation = validate_foreign_key_constraints( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + violating_tables = sum( + result.status == "failed" for result in foreign_key_validation.results + ) + checks.append( + DoctorCheck( + name="foreign key validation", + status="passed" if violating_tables == 0 else "failed", + detail=( + "All selected foreign key relationships passed validation." + if violating_tables == 0 + else f"{violating_tables} table(s) have violating foreign key rows." + ), + ) + ) + else: + checks.append( + DoctorCheck( + name="foreign key validation", + status="skipped", + detail="Run `omop-alchemy doctor --deep` to validate selected foreign key relationships.", + ) + ) + else: + checks.append( + DoctorCheck( + name="foreign keys", + status="skipped", + detail="Foreign key trigger inspection is only available on PostgreSQL.", + ) + ) + checks.append( + DoctorCheck( + name="foreign key validation", + status="skipped", + detail="Foreign key validation is only available on PostgreSQL.", + ) + ) + finally: + engine.dispose() + else: + checks.extend( + ( + DoctorCheck( + name="managed tables", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + DoctorCheck( + name="foreign keys", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + DoctorCheck( + name="schema drift", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + DoctorCheck( + name="foreign key validation", + status="skipped", + detail="Skipped because the database connection is not ready.", + ), + ) + ) + + if info.backend == SupportedDialect.POSTGRESQL: + backup_tools_ready = info.pg_dump_path is not None and ( + info.pg_restore_path is not None or info.psql_path is not None + ) + checks.append( + DoctorCheck( + name="backup tooling", + status="passed" if backup_tools_ready else "warning", + detail=( + "PostgreSQL backup and restore client tools are available." + if backup_tools_ready + else "PostgreSQL client tools are incomplete on this machine." + ), + ) + ) + else: + checks.append( + DoctorCheck( + name="backup tooling", + status="skipped", + detail="Backup and restore tooling checks are only relevant for PostgreSQL targets.", + ) + ) + + return DoctorReport( + info=info, + checks=tuple(checks), + recommendations=_build_recommendations( + info=info, + reconciliation=reconciliation, + foreign_key_status=foreign_key_status, + foreign_key_validation=foreign_key_validation, + ), + reconciliation=reconciliation, + foreign_key_status=foreign_key_status, + foreign_key_validation=foreign_key_validation, + ) diff --git a/omop_alchemy/maintenance/cli_schema_info.py b/omop_alchemy/maintenance/cli_schema_info.py new file mode 100644 index 0000000..2812fb8 --- /dev/null +++ b/omop_alchemy/maintenance/cli_schema_info.py @@ -0,0 +1,421 @@ +from __future__ import annotations + +from dataclasses import dataclass +import importlib.metadata +import importlib.util +import os +import shutil + +import sqlalchemy as sa +from sqlalchemy.exc import SQLAlchemyError + +from omop_alchemy import create_engine_with_dependencies, load_environment +from omop_alchemy.backends.resolve import SupportedDialect +from omop_alchemy.db import get_engine_name + +from .cli_config import defaults_path +from .cli_schema_tables import collect_missing_tables +from .tables import ( + TableCategory, + select_maintenance_tables, +) + + +def _backend_label(dialect_name: str) -> str: + from ..backends.resolve import _DIALECT_TO_BACKEND_MAP, SupportedDialect + try: + return _DIALECT_TO_BACKEND_MAP[SupportedDialect(dialect_name)].name + except (ValueError, KeyError): + return dialect_name + + +# --------------------------------------------------------------------------- +# info +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DependencyStatus: + """Installation status of a Python package or external tool dependency.""" + + name: str + installed: bool + version: str | None + + +@dataclass(frozen=True) +class CommandSupport: + """Readiness assessment for one CLI command given the current backend and connection state.""" + + command_name: str + requirement: str + status: str + detail: str + + +@dataclass(frozen=True) +class MaintenanceInfo: + """Full environment snapshot: package version, connection state, and per-command readiness.""" + + package_version: str + cli_path: str | None + pg_dump_path: str | None + pg_restore_path: str | None + psql_path: str | None + defaults_file: str + defaults_exists: bool + dotenv_path: str | None + dotenv_exists: bool | None + engine_schema: str | None + db_schema: str | None + engine_url: str | None + backend: str | None + engine_created: bool + engine_error: str | None + connection_ready: bool + connection_error: str | None + managed_table_count: int + existing_table_count: int | None + missing_table_count: int | None + vocabulary_included: bool + dependencies: tuple[DependencyStatus, ...] + command_support: tuple[CommandSupport, ...] + + +def _package_version() -> str: + """Return the installed omop-alchemy package version string.""" + return importlib.metadata.version("omop-alchemy") + + +def _dependency_status(distribution_name: str, module_name: str) -> DependencyStatus: + """Check whether a Python package is importable and return its installed version if found.""" + installed = importlib.util.find_spec(module_name) is not None + version: str | None = None + if installed: + try: + version = importlib.metadata.version(distribution_name) + except importlib.metadata.PackageNotFoundError: + version = None + return DependencyStatus(name=distribution_name, installed=installed, version=version) + + +def _external_dependency_status(name: str, executable_name: str) -> DependencyStatus: + """Check whether an external CLI tool is on PATH and return a DependencyStatus (version always None).""" + return DependencyStatus( + name=name, + installed=shutil.which(executable_name) is not None, + version=None, + ) + + +def _command_support_for_unavailable_engine(detail: str) -> tuple[CommandSupport, ...]: + """Return a full CommandSupport tuple with every command marked blocked, used when the engine cannot be created.""" + blocked = "blocked" + return ( + CommandSupport("doctor", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("data-summary", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("analyze-tables", "PostgreSQL/SQLite", blocked, detail), + CommandSupport("create-missing-tables", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("indexes disable", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("indexes enable", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("reconcile-schema", "Any SQLAlchemy backend", blocked, detail), + CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", blocked, detail), + CommandSupport("backup-database", "PostgreSQL + pg_dump", blocked, detail), + CommandSupport("restore-database", "PostgreSQL + pg_restore/psql", blocked, detail), + CommandSupport("fulltext install", "PostgreSQL", blocked, detail), + CommandSupport("fulltext populate", "PostgreSQL", blocked, detail), + CommandSupport("fulltext drop", "PostgreSQL", blocked, detail), + CommandSupport("reset-sequences", "PostgreSQL", blocked, detail), + CommandSupport("truncate-tables", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys disable", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys enable", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys enable --strict", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys status", "PostgreSQL", blocked, detail), + CommandSupport("foreign-keys validate", "PostgreSQL", blocked, detail), + ) + + +def _command_support_for_backend( + *, + backend: str, + engine_created: bool, + engine_error: str | None, + connection_ready: bool, + connection_error: str | None, + pg_dump_path: str | None, + pg_restore_path: str | None, + psql_path: str | None, +) -> tuple[CommandSupport, ...]: + """Compute the readiness status of every CLI command given the current backend, connection state, and tool availability.""" + current_backend = _backend_label(backend) + if not engine_created: + blocked_detail = ( + f"Backend resolved to {current_backend}, but the engine could not be created: {engine_error}" + if engine_error + else f"Backend resolved to {current_backend}, but the engine could not be created." + ) + else: + blocked_detail = ( + f"Backend resolved to {current_backend}, but the connection test failed: {connection_error}" + if connection_error + else f"Backend resolved to {current_backend}, but the connection test failed." + ) + portable_status = "ready" if connection_ready else "blocked" + portable_detail = ( + f"Ready on {current_backend}." if connection_ready else blocked_detail + ) + + if backend == SupportedDialect.POSTGRESQL: + analyze_status = portable_status + analyze_detail = ( + "Ready on PostgreSQL; ANALYZE and VACUUM ANALYZE are both supported." + if connection_ready + else blocked_detail + ) + enable_indexes_status = portable_status + enable_indexes_detail = ( + "Ready on PostgreSQL; index DDL and clustering metadata are both supported." + if connection_ready + else blocked_detail + ) + postgresql_status = portable_status + postgresql_detail = "Ready on PostgreSQL." if connection_ready else blocked_detail + vocab_load_status = portable_status + vocab_load_detail = ( + "Ready on PostgreSQL when an Athena source path is configured." + if connection_ready + else blocked_detail + ) + elif backend == "sqlite": + analyze_status = "limited" if connection_ready else "blocked" + analyze_detail = ( + "Ready on SQLite; ANALYZE is supported, but `--vacuum` is unavailable." + if connection_ready + else blocked_detail + ) + enable_indexes_status = "limited" if connection_ready else "blocked" + enable_indexes_detail = ( + "Ready on SQLite; index DDL is supported, but clustering metadata will be skipped." + if connection_ready + else blocked_detail + ) + postgresql_status = "unsupported" if connection_ready else "blocked" + postgresql_detail = ( + f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + vocab_load_status = portable_status + vocab_load_detail = ( + "Ready on SQLite when an Athena source path is configured." + if connection_ready + else blocked_detail + ) + else: + analyze_status = "unsupported" if connection_ready else "blocked" + analyze_detail = ( + f"Requires PostgreSQL or SQLite. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + enable_indexes_status = "limited" if connection_ready else "blocked" + enable_indexes_detail = ( + f"Ready on {current_backend}; index DDL is supported, but clustering metadata will be skipped." + if connection_ready + else blocked_detail + ) + postgresql_status = "unsupported" if connection_ready else "blocked" + postgresql_detail = ( + f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + vocab_load_status = "unsupported" if connection_ready else "blocked" + vocab_load_detail = ( + f"Requires SQLite or PostgreSQL plus a configured Athena source path. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ) + + return ( + CommandSupport("doctor", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("data-summary", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("analyze-tables", "PostgreSQL/SQLite", analyze_status, analyze_detail), + CommandSupport("create-missing-tables", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("indexes disable", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("indexes enable", "Any SQLAlchemy backend", enable_indexes_status, enable_indexes_detail), + CommandSupport("reconcile-schema", "Any SQLAlchemy backend", portable_status, portable_detail), + CommandSupport("load-vocab-source", "SQLite/PostgreSQL + Athena CSV source", vocab_load_status, vocab_load_detail), + CommandSupport( + "backup-database", + "PostgreSQL + pg_dump", + ( + "ready" + if connection_ready and backend == SupportedDialect.POSTGRESQL and pg_dump_path is not None + else "blocked" + if backend == SupportedDialect.POSTGRESQL + else "unsupported" + if connection_ready + else "blocked" + ), + ( + "Ready on PostgreSQL; `pg_dump` is available." + if connection_ready and backend == SupportedDialect.POSTGRESQL and pg_dump_path is not None + else "PostgreSQL is configured, but `pg_dump` is not on PATH." + if connection_ready and backend == SupportedDialect.POSTGRESQL + else f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ), + ), + CommandSupport( + "restore-database", + "PostgreSQL + pg_restore/psql", + ( + "ready" + if connection_ready and backend == SupportedDialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) + else "blocked" + if backend == SupportedDialect.POSTGRESQL + else "unsupported" + if connection_ready + else "blocked" + ), + ( + "Ready on PostgreSQL; restore client tooling is available." + if connection_ready and backend == SupportedDialect.POSTGRESQL and (pg_restore_path is not None or psql_path is not None) + else "PostgreSQL is configured, but neither `pg_restore` nor `psql` is on PATH." + if connection_ready and backend == SupportedDialect.POSTGRESQL + else f"Requires PostgreSQL. Current backend: {current_backend}." + if connection_ready + else blocked_detail + ), + ), + CommandSupport("fulltext install", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("fulltext populate", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("fulltext drop", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("reset-sequences", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("truncate-tables", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys disable", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys enable", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys enable --strict", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys status", "PostgreSQL", postgresql_status, postgresql_detail), + CommandSupport("foreign-keys validate", "PostgreSQL", postgresql_status, postgresql_detail), + ) + + +def collect_maintenance_info( + *, + engine_schema: str | None = None, + db_schema: str | None = None, + dotenv: str | None = None, + vocabulary_included: bool = True, +) -> MaintenanceInfo: + """Probe the current environment: resolve config, attempt a connection, and assess per-command readiness.""" + load_environment(dotenv or "") + pg_dump_path = shutil.which("pg_dump") + pg_restore_path = shutil.which("pg_restore") + psql_path = shutil.which("psql") + defaults_file = defaults_path() + dependencies = ( + _dependency_status("sqlalchemy", "sqlalchemy"), + _dependency_status("typer", "typer"), + _dependency_status("rich", "rich"), + _dependency_status("psycopg", "psycopg"), + _dependency_status("psycopg2-binary", "psycopg2"), + _external_dependency_status("pg_dump", "pg_dump"), + _external_dependency_status("pg_restore", "pg_restore"), + _external_dependency_status("psql", "psql"), + ) + managed_tables = select_maintenance_tables( + exclude_categories=(() if vocabulary_included else (TableCategory.VOCABULARY,)) + ) + cli_path = shutil.which("omop-alchemy") + dotenv_exists = None if dotenv is None else os.path.exists(dotenv) + + engine_name: str | None = None + engine_url: str | None = None + backend: str | None = None + engine_created = False + engine_error: str | None = None + connection_ready = False + connection_error: str | None = None + existing_table_count: int | None = None + missing_table_count: int | None = None + + try: + engine_name = get_engine_name(engine_schema) + url = sa.engine.make_url(engine_name) + engine_url = url.render_as_string(hide_password=True) + backend = url.get_backend_name() + except RuntimeError as exc: + engine_error = str(exc) + except Exception as exc: + engine_error = f"Could not resolve engine configuration: {exc}" + + if engine_name is not None: + try: + engine = create_engine_with_dependencies(engine_name, future=True) + engine_created = True + except RuntimeError as exc: + engine_error = str(exc) + except Exception as exc: + engine_error = f"Could not create engine: {exc}" + else: + try: + with engine.connect() as connection: + connection.exec_driver_sql("SELECT 1") + connection_ready = True + missing_tables = collect_missing_tables( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + missing_table_count = len(missing_tables) + existing_table_count = len(managed_tables) - missing_table_count + except SQLAlchemyError as exc: + connection_error = f"{exc.__class__.__name__}: {exc}" + except Exception as exc: + connection_error = str(exc) + finally: + engine.dispose() + + if backend is None: + command_support = _command_support_for_unavailable_engine( + engine_error or "No engine configuration could be resolved." + ) + else: + command_support = _command_support_for_backend( + backend=backend, + engine_created=engine_created, + engine_error=engine_error, + connection_ready=connection_ready, + connection_error=connection_error, + pg_dump_path=pg_dump_path, + pg_restore_path=pg_restore_path, + psql_path=psql_path, + ) + + return MaintenanceInfo( + package_version=_package_version(), + cli_path=cli_path, + pg_dump_path=pg_dump_path, + pg_restore_path=pg_restore_path, + psql_path=psql_path, + defaults_file=str(defaults_file), + defaults_exists=defaults_file.exists(), + dotenv_path=dotenv, + dotenv_exists=dotenv_exists, + engine_schema=engine_schema, + db_schema=db_schema, + engine_url=engine_url, + backend=backend, + engine_created=engine_created, + engine_error=engine_error, + connection_ready=connection_ready, + connection_error=connection_error, + managed_table_count=len(managed_tables), + existing_table_count=existing_table_count, + missing_table_count=missing_table_count, + vocabulary_included=vocabulary_included, + dependencies=dependencies, + command_support=command_support, + ) diff --git a/omop_alchemy/maintenance/cli_schema_reconcile.py b/omop_alchemy/maintenance/cli_schema_reconcile.py new file mode 100644 index 0000000..f458639 --- /dev/null +++ b/omop_alchemy/maintenance/cli_schema_reconcile.py @@ -0,0 +1,408 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import sqlalchemy as sa +from sqlalchemy.engine.interfaces import ReflectedForeignKeyConstraint, ReflectedIndex + +from ..backends.resolve import SupportedDialect +from ..backends import resolve_backend +from .cli_indexes import _cluster_target_name +from .tables import ( + TableCategory, + select_maintenance_tables, +) + + +@dataclass(frozen=True) +class ReconciliationIssue: + """A single schema drift detail: column, index, FK, or cluster mismatch between ORM metadata and the database.""" + + table_name: str + category: TableCategory + component: str + object_name: str + status: str + expected: str | None + actual: str | None + detail: str + + +@dataclass(frozen=True) +class TableReconciliationResult: + """Per-table schema reconciliation summary: whether ORM metadata matches the live database.""" + + table_name: str + category: TableCategory + model_name: str + model_module: str + status: str + issue_count: int + detail: str + + +@dataclass(frozen=True) +class SchemaReconciliationReport: + """Complete reconciliation report across all selected ORM-managed tables.""" + + backend: str + table_results: tuple[TableReconciliationResult, ...] + issues: tuple[ReconciliationIssue, ...] + + +def _schema_table(table: sa.Table, db_schema: str | None) -> sa.Table: + """Return table unchanged when db_schema is None, or a schema-qualified copy when a schema is specified.""" + if db_schema is None: + return table + + metadata = sa.MetaData() + return table.to_metadata( + metadata, + schema=db_schema, + referred_schema_fn=( + lambda _table, to_schema, _constraint, _referred_schema: to_schema + ), + ) + + +def _normalized_type(type_: sa.types.TypeEngine[object], dialect: sa.engine.Dialect) -> str: + """Compile a SQLAlchemy type to its dialect-specific string and normalise whitespace/case for comparison.""" + return type_.compile(dialect=dialect).lower().replace(" ", "") + + +def _expected_foreign_keys( + table: sa.Table, +) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint]: + """Index ORM-defined FK constraints by (constrained_cols, referred_table, referred_cols) for diffing.""" + expected: dict[tuple[tuple[str, ...], str, tuple[str, ...]], sa.ForeignKeyConstraint] = {} + for constraint in table.foreign_key_constraints: + constrained_columns = tuple(element.parent.name for element in constraint.elements) + referred_columns = tuple(element.column.name for element in constraint.elements) + referred_table = constraint.referred_table.name + expected[(constrained_columns, referred_table, referred_columns)] = constraint + return expected + + +def _actual_foreign_keys( + inspector: sa.Inspector, + table_name: str, + db_schema: str | None, +) -> dict[tuple[tuple[str, ...], str, tuple[str, ...]], ReflectedForeignKeyConstraint]: + """Index live FK constraints from the database inspector by the same key tuple used by _expected_foreign_keys.""" + actual: dict[tuple[tuple[str, ...], str, tuple[str, ...]], ReflectedForeignKeyConstraint] = {} + for foreign_key in inspector.get_foreign_keys(table_name, schema=db_schema): + constrained_columns = tuple(foreign_key.get("constrained_columns") or []) + referred_columns = tuple(foreign_key.get("referred_columns") or []) + referred_table = str(foreign_key.get("referred_table")) + actual[(constrained_columns, referred_table, referred_columns)] = foreign_key + return actual + + +def _expected_indexes(table: sa.Table) -> dict[str, sa.Index]: + """Return ORM-defined named indexes for a table, keyed by index name.""" + return { + str(index.name): index + for index in table.indexes + if index.name is not None + } + + +def _actual_indexes( + inspector: sa.Inspector, + table_name: str, + db_schema: str | None, +) -> dict[str, ReflectedIndex]: + """Return live named indexes from the database inspector, keyed by index name.""" + return { + str(index["name"]): index + for index in inspector.get_indexes(table_name, schema=db_schema) + if index.get("name") is not None + } + + +def reconcile_schema( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = False, +) -> SchemaReconciliationReport: + """Compare ORM metadata against the live database schema; reports missing columns, indexes, FKs, and cluster state.""" + excluded_categories: tuple[TableCategory, ...] = ( + () if vocabulary_included else (TableCategory.VOCABULARY,) + ) + _backend = resolve_backend(engine) + selected_tables = select_maintenance_tables(exclude_categories=excluded_categories) + inspector = sa.inspect(engine) + all_issues: list[ReconciliationIssue] = [] + table_results: list[TableReconciliationResult] = [] + + with engine.connect() as connection: + for maintenance_table in selected_tables: + table_issues: list[ReconciliationIssue] = [] + exists = inspector.has_table(maintenance_table.table_name, schema=db_schema) + if not exists: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="table", + object_name=maintenance_table.table_name, + status="missing", + expected="present", + actual="absent", + detail="ORM-managed table is missing from the target database.", + ) + ) + table_results.append( + TableReconciliationResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + status="missing", + issue_count=1, + detail="Table is missing from the target database.", + ) + ) + all_issues.extend(table_issues) + continue + + expected_table = _schema_table(maintenance_table.table, db_schema) + expected_columns = {column.name: column for column in expected_table.columns} + actual_columns = { + str(column["name"]): column + for column in inspector.get_columns(maintenance_table.table_name, schema=db_schema) + } + actual_pk_names = tuple( + inspector.get_pk_constraint(maintenance_table.table_name, schema=db_schema).get("constrained_columns") or [] + ) + expected_pk_names = tuple(column.name for column in expected_table.primary_key.columns) + + for column_name, column in expected_columns.items(): + if column_name not in actual_columns: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="missing", + expected=_normalized_type(column.type, engine.dialect), + actual=None, + detail="Column is defined in ORM metadata but missing from the database.", + ) + ) + + for column_name, column in actual_columns.items(): + if column_name not in expected_columns: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="unexpected", + expected=None, + actual=_normalized_type(column["type"], engine.dialect), + detail="Column exists in the database but is not defined in ORM metadata.", + ) + ) + + for column_name in sorted(set(expected_columns).intersection(actual_columns)): + expected_column = expected_columns[column_name] + actual_column = actual_columns[column_name] + expected_type = _normalized_type(expected_column.type, engine.dialect) + actual_type = _normalized_type(actual_column["type"], engine.dialect) + if expected_type != actual_type: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="mismatch", + expected=expected_type, + actual=actual_type, + detail="Column type differs from ORM metadata.", + ) + ) + + expected_nullable = False if column_name in expected_pk_names else bool(expected_column.nullable) + actual_nullable = False if column_name in actual_pk_names else bool(actual_column["nullable"]) + if expected_nullable != actual_nullable: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="column", + object_name=column_name, + status="mismatch", + expected="nullable" if expected_nullable else "not nullable", + actual="nullable" if actual_nullable else "not nullable", + detail="Column nullability differs from ORM metadata.", + ) + ) + + if expected_pk_names != actual_pk_names: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="primary_key", + object_name=maintenance_table.table_name, + status="mismatch", + expected=", ".join(expected_pk_names), + actual=", ".join(actual_pk_names) if actual_pk_names else None, + detail="Primary key columns differ from ORM metadata.", + ) + ) + + expected_fks = _expected_foreign_keys(expected_table) + actual_fks = _actual_foreign_keys(inspector, maintenance_table.table_name, db_schema) + + for signature, constraint in expected_fks.items(): + if signature not in actual_fks: + constrained_columns, referred_table, referred_columns = signature + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="foreign_key", + object_name=constraint.name if isinstance(constraint.name, str) else ",".join(constrained_columns), + status="missing", + expected=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", + actual=None, + detail="Foreign key is defined in ORM metadata but missing from the database.", + ) + ) + + for signature, foreign_key in actual_fks.items(): + if signature not in expected_fks: + constrained_columns, referred_table, referred_columns = signature + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="foreign_key", + object_name=str(foreign_key.get("name") or ",".join(constrained_columns)), + status="unexpected", + expected=None, + actual=f"{','.join(constrained_columns)} -> {referred_table}({','.join(referred_columns)})", + detail="Foreign key exists in the database but is not defined in ORM metadata.", + ) + ) + + expected_idxs = _expected_indexes(expected_table) + actual_idxs = _actual_indexes(inspector, maintenance_table.table_name, db_schema) + + for index_name, index in expected_idxs.items(): + if index_name not in actual_idxs: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="missing", + expected=", ".join(column.name for column in index.columns), + actual=None, + detail="Index is defined in ORM metadata but missing from the database.", + ) + ) + continue + + actual_index = actual_idxs[index_name] + expected_columns_for_index = tuple(column.name for column in index.columns) + actual_columns_for_index = tuple(c for c in (actual_index.get("column_names") or []) if c is not None) + if expected_columns_for_index != actual_columns_for_index: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="mismatch", + expected=", ".join(expected_columns_for_index), + actual=", ".join(actual_columns_for_index) if actual_columns_for_index else None, + detail="Index columns differ from ORM metadata.", + ) + ) + if bool(index.unique) != bool(actual_index.get("unique")): + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="mismatch", + expected="unique" if index.unique else "non-unique", + actual="unique" if actual_index.get("unique") else "non-unique", + detail="Index uniqueness differs from ORM metadata.", + ) + ) + + for index_name, index in actual_idxs.items(): + if index_name not in expected_idxs: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="index", + object_name=index_name, + status="unexpected", + expected=None, + actual=", ".join(c for c in (index.get("column_names") or []) if c is not None), + detail="Index exists in the database but is not defined in ORM metadata.", + ) + ) + + if engine.dialect.name == SupportedDialect.POSTGRESQL: + expected_cluster = _cluster_target_name(maintenance_table) + actual_cluster = _backend.get_clustered_index_name( + connection, + maintenance_table.table_name, + db_schema, + ) + if expected_cluster != actual_cluster: + table_issues.append( + ReconciliationIssue( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + component="cluster", + object_name=maintenance_table.table_name, + status=( + "missing" + if expected_cluster and not actual_cluster + else "unexpected" + if actual_cluster and not expected_cluster + else "mismatch" + ), + expected=expected_cluster, + actual=actual_cluster, + detail="Table clustering differs from ORM metadata.", + ) + ) + + table_status = "matched" if not table_issues else "drifted" + table_results.append( + TableReconciliationResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + status=table_status, + issue_count=len(table_issues), + detail=( + "No differences detected." + if not table_issues + else f"{len(table_issues)} difference(s) detected." + ), + ) + ) + all_issues.extend(table_issues) + + return SchemaReconciliationReport( + backend=engine.dialect.name, + table_results=tuple(table_results), + issues=tuple(all_issues), + ) diff --git a/omop_alchemy/maintenance/cli_schema_summary.py b/omop_alchemy/maintenance/cli_schema_summary.py new file mode 100644 index 0000000..bae8c40 --- /dev/null +++ b/omop_alchemy/maintenance/cli_schema_summary.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import sqlalchemy as sa + +from .tables import TableCategory, qualified_table_name, select_omop_tables + + +@dataclass(frozen=True) +class TableSummaryResult: + """Row count and existence data for one ORM-managed OMOP table.""" + + table_name: str + category: TableCategory + model_name: str + model_module: str + primary_key_columns: tuple[str, ...] + exists: bool + row_count: int | None + + +def collect_data_summary( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = False, + existing_only: bool = True, +) -> list[TableSummaryResult]: + """Return row counts and existence state for each ORM-managed table in the target database.""" + inspector = sa.inspect(engine) + tables = select_omop_tables(vocabulary_included=vocabulary_included) + + results: list[TableSummaryResult] = [] + with engine.connect() as connection: + for table in tables: + exists = inspector.has_table(table.table_name, schema=db_schema) + if not exists and existing_only: + continue + + row_count: int | None = None + if exists: + row_count = int( + connection.execute( + sa.text( + f"SELECT COUNT(*) FROM {qualified_table_name(table.table_name, db_schema)}" + ) + ).scalar_one() + ) + + results.append( + TableSummaryResult( + table_name=table.table_name, + category=table.category, + model_name=table.model_name, + model_module=table.model_module, + primary_key_columns=table.primary_key_names, + exists=exists, + row_count=row_count, + ) + ) + + return results diff --git a/omop_alchemy/maintenance/cli_schema_tables.py b/omop_alchemy/maintenance/cli_schema_tables.py new file mode 100644 index 0000000..46b99e6 --- /dev/null +++ b/omop_alchemy/maintenance/cli_schema_tables.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import sqlalchemy as sa + +from .tables import ( + MaintenanceTable, + TableCategory, + collect_maintenance_tables, + missing_maintenance_tables, + schema_adjusted_metadata, +) + + +@dataclass(frozen=True) +class TableCreationResult: + """Outcome of attempting to create one missing ORM-managed table from SQLAlchemy metadata.""" + + table_name: str + category: TableCategory + model_name: str + model_module: str + status: str + detail: str + + +def _table_dependencies(table: MaintenanceTable) -> tuple[str, ...]: + """Return the sorted names of tables that this table's ORM FK constraints refer to.""" + return tuple( + sorted( + { + constraint.referred_table.name + for constraint in table.table.foreign_key_constraints + } + ) + ) + + +def collect_missing_tables( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = True, +) -> list[MaintenanceTable]: + """Return ORM-managed tables that are absent from the target database.""" + inspector = sa.inspect(engine) + return missing_maintenance_tables( + inspector, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + + +def create_missing_tables( + engine: sa.Engine, + *, + db_schema: str | None = None, + vocabulary_included: bool = True, + dry_run: bool = False, +) -> list[TableCreationResult]: + """Create any ORM-managed tables missing from the target database; skips tables with unresolved FK dependencies.""" + inspector = sa.inspect(engine) + missing_tables = collect_missing_tables( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + existing_table_names = set(inspector.get_table_names(schema=db_schema)) + missing_table_names = {table.table_name for table in missing_tables} + + blocked_dependencies: dict[str, tuple[str, ...]] = {} + for maintenance_table in missing_tables: + unresolved_dependencies = tuple( + dependency_name + for dependency_name in _table_dependencies(maintenance_table) + if dependency_name not in existing_table_names + and dependency_name not in missing_table_names + ) + if unresolved_dependencies: + blocked_dependencies[maintenance_table.table_name] = unresolved_dependencies + + creatable_tables = [ + table + for table in missing_tables + if table.table_name not in blocked_dependencies + ] + + results: list[TableCreationResult] = [] + with engine.begin() as connection: + if creatable_tables and not dry_run: + metadata, adjusted_tables = schema_adjusted_metadata( + collect_maintenance_tables(), + db_schema=db_schema, + ) + metadata.create_all( + bind=connection, + tables=[adjusted_tables[table.table_name] for table in creatable_tables], + checkfirst=True, + ) + + for maintenance_table in missing_tables: + blocked = blocked_dependencies.get(maintenance_table.table_name) + results.append( + TableCreationResult( + table_name=maintenance_table.table_name, + category=maintenance_table.category, + model_name=maintenance_table.model_name, + model_module=maintenance_table.model_module, + status=( + "blocked" + if blocked is not None + else "planned" + if dry_run + else "created" + ), + detail=( + "table blocked by unresolved dependencies: " + ", ".join(blocked) + if blocked is not None + else "table would be created from ORM metadata" + if dry_run + else "table created from ORM metadata" + ), + ) + ) + + return results From d93cb54800bc1784184fc6bd6e33e15d85e50a5d Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 23:15:52 +0000 Subject: [PATCH 05/25] Utilise backend_supports in schema_reconcile instead of dialect --- omop_alchemy/maintenance/cli_schema_reconcile.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/omop_alchemy/maintenance/cli_schema_reconcile.py b/omop_alchemy/maintenance/cli_schema_reconcile.py index f458639..4c43fd5 100644 --- a/omop_alchemy/maintenance/cli_schema_reconcile.py +++ b/omop_alchemy/maintenance/cli_schema_reconcile.py @@ -5,8 +5,7 @@ import sqlalchemy as sa from sqlalchemy.engine.interfaces import ReflectedForeignKeyConstraint, ReflectedIndex -from ..backends.resolve import SupportedDialect -from ..backends import resolve_backend +from ..backends import backend_supports, resolve_backend from .cli_indexes import _cluster_target_name from .tables import ( TableCategory, @@ -356,7 +355,7 @@ def reconcile_schema( ) ) - if engine.dialect.name == SupportedDialect.POSTGRESQL: + if backend_supports(_backend, "get_clustered_index_name"): expected_cluster = _cluster_target_name(maintenance_table) actual_cluster = _backend.get_clustered_index_name( connection, From 46f435da343cfb2464b99fadaa4d95f357db1fa1 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 23:28:28 +0000 Subject: [PATCH 06/25] Get rid of redundant IndexAction --- omop_alchemy/maintenance/__init__.py | 2 -- omop_alchemy/maintenance/cli_indexes.py | 38 ++++++++++--------------- omop_alchemy/maintenance/cli_vocab.py | 6 ++-- omop_alchemy/maintenance/ui.py | 23 ++++++++------- 4 files changed, 30 insertions(+), 39 deletions(-) diff --git a/omop_alchemy/maintenance/__init__.py b/omop_alchemy/maintenance/__init__.py index e7d58f2..a00068f 100644 --- a/omop_alchemy/maintenance/__init__.py +++ b/omop_alchemy/maintenance/__init__.py @@ -17,7 +17,6 @@ validate_foreign_key_constraints, ) from .cli_indexes import ( - IndexAction, IndexManagementResult, IndexTarget, collect_index_targets, @@ -83,7 +82,6 @@ "ForeignKeyStatusResult", "ForeignKeyValidationReport", "ForeignKeyValidationResult", - "IndexAction", "IndexManagementResult", "IndexTarget", "ConnectionDefaults", diff --git a/omop_alchemy/maintenance/cli_indexes.py b/omop_alchemy/maintenance/cli_indexes.py index 1d7e858..4ba0290 100644 --- a/omop_alchemy/maintenance/cli_indexes.py +++ b/omop_alchemy/maintenance/cli_indexes.py @@ -1,7 +1,6 @@ from __future__ import annotations from dataclasses import dataclass -from enum import StrEnum import sqlalchemy as sa import typer @@ -24,13 +23,6 @@ ) -class IndexAction(StrEnum): - """Whether to create or drop ORM-defined secondary indexes.""" - - DISABLE = "disable" - ENABLE = "enable" - - @dataclass(frozen=True) class IndexTarget: """An ORM-defined index that currently exists in the target database.""" @@ -58,7 +50,7 @@ class IndexManagementResult: column_names: tuple[str, ...] unique: bool clustered: bool - action: IndexAction + enable: bool status: str detail: str @@ -155,7 +147,7 @@ def collect_index_targets( def manage_indexes( engine: sa.Engine, *, - action: IndexAction, + enable: bool, db_schema: str | None = None, vocabulary_included: bool = False, dry_run: bool = False, @@ -183,9 +175,9 @@ def manage_indexes( index_name = str(metadata_index.name) exists = index_name in existing_index_names should_apply = ( - action is IndexAction.DISABLE and exists + not enable and exists ) or ( - action is IndexAction.ENABLE and not exists + enable and not exists ) if not should_apply: @@ -193,7 +185,7 @@ def manage_indexes( schema_index = metadata_indexes[(table.table_name, index_name)] if not dry_run: - if action is IndexAction.DISABLE: + if not enable: schema_index.drop(bind=connection, checkfirst=True) else: schema_index.create(bind=connection, checkfirst=True) @@ -209,13 +201,13 @@ def manage_indexes( column_names=tuple(column.name for column in metadata_index.columns), unique=bool(metadata_index.unique), clustered=metadata_index.info.get(OMOP_CLUSTER_INDEX_INFO_KEY) is True, - action=action, + enable=enable, status="planned" if dry_run else "applied", detail=( "metadata-defined index would be dropped" - if action is IndexAction.DISABLE and dry_run + if not enable and dry_run else "metadata-defined index dropped" - if action is IndexAction.DISABLE + if not enable else "metadata-defined index would be created" if dry_run else "metadata-defined index created" @@ -223,7 +215,7 @@ def manage_indexes( ) ) - if action is IndexAction.ENABLE: + if enable: cluster_index_name = _cluster_target_name(table) if cluster_index_name is None: continue @@ -241,7 +233,7 @@ def manage_indexes( column_names=cluster_columns, unique=False, clustered=True, - action=action, + enable=enable, status="skipped", detail=( "cluster metadata present but unsupported on " @@ -265,7 +257,7 @@ def manage_indexes( column_names=cluster_columns, unique=False, clustered=True, - action=action, + enable=enable, status="planned" if dry_run else "applied", detail=( "table would be clustered using ORM-defined metadata" @@ -300,14 +292,14 @@ def disable_indexes_command( with console.status("Managing metadata-defined indexes..."): results = manage_indexes( engine, - action=IndexAction.DISABLE, + enable=False, db_schema=conn.db_schema, vocabulary_included=vocabulary_included, dry_run=dry_run, ) console.print(render_index_results(results)) console.print(render_index_summary(results, dry_run=dry_run)) - console.print(render_index_note(IndexAction.DISABLE)) + console.print(render_index_note(enable=False)) @app.command("enable") @@ -326,11 +318,11 @@ def enable_indexes_command( with console.status("Managing metadata-defined indexes..."): results = manage_indexes( engine, - action=IndexAction.ENABLE, + enable=True, db_schema=conn.db_schema, vocabulary_included=vocabulary_included, dry_run=dry_run, ) console.print(render_index_results(results)) console.print(render_index_summary(results, dry_run=dry_run)) - console.print(render_index_note(IndexAction.ENABLE)) + console.print(render_index_note(enable=True)) diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 18dcf92..ee542cd 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -28,7 +28,7 @@ from ..backends import resolve_backend from ._cli_utils import omop_command from .cli_foreign_keys import manage_foreign_key_triggers -from .cli_indexes import IndexAction, manage_indexes +from .cli_indexes import manage_indexes from .cli_tables import reset_model_sequences from .tables import TableCategory, schema_adjusted_metadata, select_maintenance_tables from .ui import ( @@ -381,7 +381,7 @@ def load_vocab_source( ) manage_indexes( engine, - action=IndexAction.DISABLE, + enable=False, vocabulary_included=True, db_schema=db_schema, dry_run=False, @@ -550,7 +550,7 @@ def load_vocab_source( if _use_bulk_mode: manage_indexes( engine, - action=IndexAction.ENABLE, + enable=True, vocabulary_included=True, db_schema=db_schema, dry_run=False, diff --git a/omop_alchemy/maintenance/ui.py b/omop_alchemy/maintenance/ui.py index bfe1244..4617690 100644 --- a/omop_alchemy/maintenance/ui.py +++ b/omop_alchemy/maintenance/ui.py @@ -13,12 +13,6 @@ from ..backends.resolve import _DIALECT_TO_BACKEND_MAP, SupportedDialect as _SupportedDialect - -def __backend_label(dialect_name: str) -> str: - try: - return _DIALECT_TO_BACKEND_MAP[_SupportedDialect(dialect_name)].name - except (ValueError, KeyError): - return dialect_name from .ascii import render_banner from .tables import TableCategory @@ -32,7 +26,7 @@ def __backend_label(dialect_name: str) -> str: ForeignKeyValidationReport, ForeignKeyValidationResult, ) - from .cli_indexes import IndexAction, IndexManagementResult + from .cli_indexes import IndexManagementResult from .cli_schema import ( CommandSupport, DoctorCheck, @@ -82,6 +76,13 @@ def __backend_label(dialect_name: str) -> str: } +def _backend_label(dialect_name: str) -> str: + try: + return _DIALECT_TO_BACKEND_MAP[_SupportedDialect(dialect_name)].name + except (ValueError, KeyError): + return dialect_name + + def _bool_label(value: bool) -> Text: return Text("yes" if value else "no", style="green" if value else "dim") @@ -897,7 +898,7 @@ def render_index_results(results: Iterable[IndexManagementResult]) -> Renderable table.add_row( Text(result.status.upper(), style=style), result.operation, - result.action.value, + "Enable" if result.enable else "Disable", result.table_name, result.index_name, _category_label(result.category), @@ -906,10 +907,10 @@ def render_index_results(results: Iterable[IndexManagementResult]) -> Renderable return table -def render_index_note(action: IndexAction) -> Panel: +def render_index_note(enable: bool) -> Panel: body = ( "This command drops SQLAlchemy metadata-defined secondary indexes that currently exist in the database. Primary keys and constraints are not removed." - if action == "disable" + if not enable else "This command recreates SQLAlchemy metadata-defined secondary indexes that are currently missing from the database and applies PostgreSQL clustering declared in ORM metadata when the backend supports it." ) return Panel.fit(body, title="[bold]Note[/bold]", border_style="yellow") @@ -930,7 +931,7 @@ def render_index_summary(results: Iterable[IndexManagementResult], *, dry_run: b grid.add_row("Tables", str(len({item.table_name for item in items}))) grid.add_row( "Summary", - f"{'Planned' if dry_run else 'Applied'} {(items[0].action.value if items else 'manage')} on {len(items)} metadata operation(s).", + f"{'Planned' if dry_run else 'Applied'} {(items[0].enable if items else 'manage')} on {len(items)} metadata operation(s).", ) return Panel.fit(grid, title="[bold]Summary[/bold]", border_style="green" if not dry_run else "cyan") From 00e106d93f76ac18fa5759012ef458b33a4b19c0 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 23:50:22 +0000 Subject: [PATCH 07/25] Update the docs and docstrings --- docs/cli/index.md | 114 ++++++++ docs/cli/reference.md | 270 ++++++++++++++++++ mkdocs.yml | 6 +- omop_alchemy/maintenance/_cli_utils.py | 4 +- omop_alchemy/maintenance/cli.py | 2 + omop_alchemy/maintenance/cli_backup.py | 6 +- omop_alchemy/maintenance/cli_foreign_keys.py | 6 +- omop_alchemy/maintenance/cli_fulltext.py | 2 + omop_alchemy/maintenance/cli_indexes.py | 10 +- omop_alchemy/maintenance/cli_schema.py | 2 + omop_alchemy/maintenance/cli_schema_doctor.py | 2 + omop_alchemy/maintenance/cli_schema_info.py | 2 + .../maintenance/cli_schema_reconcile.py | 4 +- .../maintenance/cli_schema_summary.py | 2 + omop_alchemy/maintenance/cli_schema_tables.py | 4 +- omop_alchemy/maintenance/cli_tables.py | 6 +- omop_alchemy/maintenance/cli_vocab.py | 14 +- omop_alchemy/maintenance/ui.py | 3 +- tests/test_cli_config.py | 7 +- tests/test_indexes.py | 11 +- 20 files changed, 446 insertions(+), 31 deletions(-) create mode 100644 docs/cli/index.md create mode 100644 docs/cli/reference.md diff --git a/docs/cli/index.md b/docs/cli/index.md new file mode 100644 index 0000000..56b0db5 --- /dev/null +++ b/docs/cli/index.md @@ -0,0 +1,114 @@ +# CLI Overview + +The `omop-alchemy` command-line interface provides a suite of maintenance utilities for OMOP CDM databases. It is installed as part of the `omop-alchemy` package and is available on `PATH` after installation. + +```bash +pip install omop-alchemy +omop-alchemy --help +``` + +## Command groups and flat commands + +| Group / Command | What it covers | +|---|---| +| `info` | Environment inspection: package version, dependency status, connection state, per-command readiness | +| `doctor` | Health check: connection, schema drift, FK trigger state, FK violations, backup tooling | +| `reconcile-schema` | Compare ORM metadata against live column types, indexes, FK constraints, and cluster state | +| `create-missing-tables` | Detect and create ORM-managed OMOP tables that are absent from the database | +| `data-summary` | Row counts and existence state for ORM-managed tables | +| `load-vocab-source` | Load Athena CDM vocabulary CSV files | +| `analyze-tables` | ANALYZE or VACUUM ANALYZE selected tables to refresh planner statistics | +| `reset-sequences` | Reset owned PostgreSQL sequences to MAX(pk) + 1 | +| `truncate-tables` | Truncate selected ORM-managed tables | +| `indexes disable` / `enable` | Drop or recreate ORM-defined secondary indexes | +| `foreign-keys disable` / `enable` / `status` / `validate` | Manage PostgreSQL RI trigger enforcement | +| `fulltext install` / `populate` / `drop` | Manage tsvector sidecar columns on vocabulary tables | +| `backup-database` | Create a pg_dump backup artifact | +| `restore-database` | Restore a pg_dump or psql backup artifact | +| `config show` / `override` | View and persist saved connection defaults | + +See the [Command Reference](reference.md) for full parameter details. + +--- + +## The `@omop_command` decorator + +Most commands are decorated with `@omop_command`. This decorator handles all connection boilerplate so the command function body only needs to work with `conn` and `engine`. + +### What it injects + +Every decorated command receives three additional CLI flags, wired to identical Typer `Option` definitions across all commands: + +| Flag | Type | Description | +|---|---|---| +| `--dotenv` | `str` (optional) | Path to a `.env` file loaded before connection resolution. Overrides the saved `DOTENV` default. | +| `--engine-schema` | `str` (optional) | Named engine configuration (e.g. `cdm`, `results`). Resolves to the `ENGINE_` environment variable group. | +| `--db-schema` | `str` (optional) | Database schema to target (e.g. `cdm5`, `vocab`). Sets `search_path` on PostgreSQL. Not supported on SQLite. | + +Commands that support preview mode also receive `--dry-run` via the decorator. + +### What it does behind the scenes + +When a decorated command is invoked: + +1. The decorator pops `dotenv`, `engine_schema`, and `db_schema` from the Typer kwargs. +2. It calls `resolve_connection(...)` to produce a `conn` object carrying those values merged with any saved defaults. +3. It prints a header showing the command name, engine schema, database schema, and mode label (apply / dry-run / inspect). +4. It calls `build_engine(...)` to create a SQLAlchemy `Engine`. +5. It calls the original function body with `(conn, engine, **remaining_kwargs)`. +6. Any `RuntimeError`, `SQLAlchemyError`, or `BackendNotSupportedError` raised by the body is caught and rendered as a formatted error, then exits with code 1. + +### Before and after + +Without the decorator, every command would need this boilerplate: + +```python +def my_command( + dotenv: str | None = typer.Option(None, help="..."), + engine_schema: str | None = typer.Option(None, help="..."), + db_schema: str | None = typer.Option(None, help="..."), +) -> None: + conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + console.print(render_command_header(...)) + try: + engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) + # actual work here + except Exception as exc: + handle_error(exc) +``` + +With the decorator, the function body is all that matters: + +```python +@app.command("my-command") +@omop_command("my-command") +def my_command(conn, engine) -> None: + # conn and engine are ready to use + results = do_work(engine, db_schema=conn.db_schema) + console.print(render_results(results)) +``` + +--- + +## The `conn` object + +`conn` is a `ConnectionDefaults` instance. It exposes: + +| Attribute | Description | +|---|---| +| `conn.dotenv` | Resolved dotenv path (from CLI flag or saved default) | +| `conn.engine_schema` | Resolved engine schema name | +| `conn.db_schema` | Resolved database schema name | +| `conn.athena_source` | Resolved Athena vocabulary CSV directory path | + +--- + +## Connection resolution order + +When the CLI resolves a connection parameter, it uses this precedence (highest to lowest): + +1. Explicit CLI flag (e.g. `--db-schema cdm5`) +2. Saved default in the nearest `.omop-maint.toml` file +3. Command default (e.g. `vocabulary_included` defaults to `False` on most commands) + +Use `omop-alchemy config override` to persist defaults so you do not need to repeat connection flags on every invocation. diff --git a/docs/cli/reference.md b/docs/cli/reference.md new file mode 100644 index 0000000..0557e72 --- /dev/null +++ b/docs/cli/reference.md @@ -0,0 +1,270 @@ +# Command Reference + +Every command listed here also accepts `--dotenv`, `--engine-schema`, and `--db-schema`. These are injected by the `@omop_command` decorator and control connection resolution. See the [CLI Overview](index.md) for a full description of those flags. + +--- + +## Schema inspection + +### `info` + +Inspect maintenance CLI readiness, backend compatibility, and current installation state. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the managed-table count. | + +--- + +### `doctor` + +Run a read-only maintenance health check across connection readiness, schema drift, and FK state. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | +| `--deep` | bool | `False` | Include heavier checks: FK validation scans every constraint for referential integrity violations. | + +--- + +### `reconcile-schema` + +Compare ORM-managed SQLAlchemy metadata against the current target database schema. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the reconciliation. | + +--- + +### `create-missing-tables` + +Create missing ORM-managed OMOP tables from metadata. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `True` | Include OMOP vocabulary tables in the selection. Enabled by default. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `data-summary` + +Summarise ORM-managed OMOP tables present in the target database. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the summary. | +| `--include-missing` | bool | `False` | Also list ORM-managed tables that are absent from the target database. | + +--- + +## Vocabulary + +### `load-vocab-source` + +Load all Athena vocabulary CSVs from the configured source path, optionally toggling indexes and FK triggers for speed. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--athena-source` | str (optional) | (saved default) | Path to the unzipped Athena vocabulary CSV directory. Falls back to the saved athena-source default. | +| `--merge-strategy` | `replace` / `upsert` / `insert_if_empty` | `replace` | CSV merge strategy. `replace` keeps the DB in sync with the source. `upsert` is incremental and non-destructive. `insert_if_empty` is the fast path for a fresh empty target. | +| `--chunksize` | int (optional) | `100000` | Chunk size for fallback ORM CSV loading. Defaults to 100 000 rows. Pass `0` to disable chunking. | +| `--bulk-mode` / `--no-bulk-mode` | bool | `True` | Disable FK triggers and drop indexes globally before loading, then rebuild after. Much faster for a full vocabulary reload. Ignored on backends that do not support it. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +## Tables + +### `analyze-tables` + +Analyse selected ORM-managed tables to update planner statistics. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--scope` | `clinical` / `vocabulary` / `derived` / ... (optional) | all tables | CDM category scope to analyze. Defaults to all ORM-managed tables when omitted. | +| `--table` | str (repeatable, optional) | (none) | Specific ORM-managed table name to analyze. Repeat to target multiple tables. | +| `--vacuum` | bool | `False` | Use VACUUM ANALYZE instead of plain ANALYZE to also reclaim dead tuples. Not available on all backends. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `reset-sequences` + +Reset each owned sequence to MAX(pk) + 1 to prevent insert conflicts after bulk loads. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `truncate-tables` + +Truncate selected ORM-managed OMOP tables. Aborts if external FK references would block unless `--cascade` is set. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--scope` | str (optional) | (none) | CDM category scope to truncate (e.g. `clinical`, `vocabulary`). Must specify scope or `--table`. | +| `--table` | str (repeatable, optional) | (none) | Specific ORM-managed table name to truncate. Repeat to target multiple tables. | +| `--restart-identities` | bool | `False` | Reset owned sequences to 1 after truncation (`TRUNCATE ... RESTART IDENTITY`). | +| `--cascade` | bool | `False` | Automatically truncate dependent tables via PostgreSQL CASCADE. Use with care. | +| `--yes` | bool | `False` | Confirm the destructive operation. Required when not using `--dry-run`. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +## Indexes + +### `indexes disable` + +Drop all ORM-defined secondary indexes from the target database. Useful before bulk data loads. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `indexes enable` + +Recreate all ORM-defined secondary indexes. Also CLUSTERs tables on PostgreSQL where metadata specifies it. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +## Foreign keys + +### `foreign-keys disable` + +Disable PostgreSQL RI trigger enforcement for all participating OMOP tables. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | +| `--strict` | bool | `False` | Validate all FK relationships and report violations before disabling trigger enforcement. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `foreign-keys enable` + +Re-enable PostgreSQL RI trigger enforcement. Use `--strict` to abort if any violations exist first. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | +| `--strict` | bool | `False` | Validate all FK relationships before enabling trigger enforcement. Aborts if any violations are found. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `foreign-keys status` + +Show the current enabled/disabled state of RI triggers for each participating OMOP table. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | + +--- + +### `foreign-keys validate` + +Validate FK constraints on selected tables and report any rows that violate referential integrity. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--vocab` / `--no-vocab` | bool | `False` | Include OMOP vocabulary tables in the selection. | + +--- + +## Full-text search + +### `fulltext install` + +Add tsvector sidecar columns to vocabulary tables and optionally create GIN indexes for fast full-text search. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--create-indexes` / `--no-create-indexes` | bool | `True` | Create GIN indexes alongside the tsvector columns for fast full-text search. | +| `--fastupdate` / `--no-fastupdate` | bool | `False` | Enable PostgreSQL GIN fastupdate on newly created indexes (trades write speed for query latency). | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `fulltext populate` + +Fill tsvector sidecar columns with pre-computed search vectors using the specified PostgreSQL text search configuration. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--regconfig` | str | `english` | PostgreSQL text search configuration to use when building tsvector values (e.g. `english`, `simple`). | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `fulltext drop` + +Remove tsvector sidecar columns and their associated GIN indexes from vocabulary tables. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--drop-indexes` / `--no-drop-indexes` | bool | `True` | Drop managed GIN indexes before dropping the tsvector columns. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +## Backup and restore + +### `backup-database` + +Create a database backup that can be restored with `restore-database`. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--output-path` | str (optional) | timestamped file in cwd | Output path for the backup artifact. Defaults to a timestamped file in the current directory. | +| `--format` | `custom` / `plain` | `custom` | pg_dump output format. `custom` produces a binary `.dump` file. `plain` produces a plain SQL `.sql` file. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +### `restore-database` + +Restore a database backup that was created with `backup-database`. + +| Argument / Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `PATH` (argument) | str | (required) | Path to the backup artifact (`.dump` or `.sql`) to restore. | +| `--format` | `custom` / `plain` | (required) | Format of the artifact to restore. Must match the format used when the backup was created. | +| `--dry-run` | bool | `False` | Preview planned actions without applying any changes to the database. | + +--- + +## Configuration + +### `config show` + +Display current saved connection defaults from the nearest `.omop-maint.toml` file. + +No additional options beyond the connection flags. + +--- + +### `config override` + +Persist one or more connection overrides to `.omop-maint.toml` for future CLI invocations. + +| Flag | Type / Choices | Default | Description | +|---|---|---|---| +| `--dotenv` | str (optional) | (none) | Path to a `.env` file. Saved relative to `.omop-maint.toml` and resolved back to absolute on load. | +| `--engine-schema` | str (optional) | (none) | Named engine configuration to use (e.g. `cdm`, `results`). | +| `--db-schema` | str (optional) | (none) | Database schema to target (e.g. `cdm5`, `vocab`). | +| `--athena-source` | str (optional) | (none) | Path to the unzipped Athena vocabulary CSV directory. Used by `load-vocab-source` when `--athena-source` is omitted. | diff --git a/mkdocs.yml b/mkdocs.yml index b25719b..18b14e4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -56,7 +56,11 @@ nav: - Maintenance CLI: getting-started/maintenance.md - Quickstart: getting-started/quickstart.md - Sessions: getting-started/sessions.md - + + - CLI Reference: + - Overview: cli/index.md + - Command Reference: cli/reference.md + - API Reference: - Overview: api/index.md - CDM Base: api/base.md diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index 2a690ff..e7625ed 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -1,3 +1,5 @@ +"""Shared utilities: the @omop_command decorator, error handling, connection resolution, and injected CLI parameter definitions.""" + from __future__ import annotations import functools @@ -75,7 +77,7 @@ def omop_command( and wraps the body in ``try/except handle_error``. The decorated function must accept ``(conn, engine, ...)`` as its first - two positional parameters; the decorator supplies them. Any + two positional parameters. The decorator supplies them. Any ``vocabulary_included`` or ``athena_source`` parameter declared in the function is automatically forwarded to :func:`setup_cli_cmd`. """ diff --git a/omop_alchemy/maintenance/cli.py b/omop_alchemy/maintenance/cli.py index 517a1b6..b62baba 100644 --- a/omop_alchemy/maintenance/cli.py +++ b/omop_alchemy/maintenance/cli.py @@ -1,3 +1,5 @@ +"""Entry point that assembles all command subgroups and flat commands into the top-level omop-alchemy Typer app.""" + from __future__ import annotations import typer diff --git a/omop_alchemy/maintenance/cli_backup.py b/omop_alchemy/maintenance/cli_backup.py index e8dfb40..c8985bf 100644 --- a/omop_alchemy/maintenance/cli_backup.py +++ b/omop_alchemy/maintenance/cli_backup.py @@ -1,3 +1,5 @@ +"""Backup and restore commands wrapping pg_dump, pg_restore, and psql.""" + from __future__ import annotations from dataclasses import dataclass @@ -62,7 +64,7 @@ def create_database_backup( db_schema: str | None = None, dry_run: bool = False, ) -> BackupResult: - """Create a database backup artifact at output_path; runs the subprocess unless dry_run is True.""" + """Create a database backup artifact at output_path. Runs the subprocess unless dry_run is True.""" backend = resolve_backend(engine) require_backend_support(backend, "prepare_backup", "Database backup") resolved_output_path = Path(output_path) if output_path is not None else _default_output_path(backup_format) @@ -107,7 +109,7 @@ def restore_database_backup( db_schema: str | None = None, dry_run: bool = False, ) -> BackupResult: - """Restore a database backup; runs the subprocess unless dry_run is True.""" + """Restore a database backup. Runs the subprocess unless dry_run is True.""" backend = resolve_backend(engine) require_backend_support(backend, "prepare_restore", "Database restore") resolved_input_path = Path(input_path).expanduser().resolve() diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index 8e99691..70a06f8 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -1,3 +1,5 @@ +"""Foreign key trigger management commands for PostgreSQL RI trigger enforcement.""" + from __future__ import annotations from dataclasses import dataclass @@ -316,7 +318,7 @@ def manage_foreign_key_triggers( dry_run: bool = False, strict: bool = False, ) -> list[ForeignKeyManagementResult]: - """Enable or disable RI trigger enforcement; with strict=True, aborts on any FK violation.""" + """Enable or disable RI trigger enforcement. With strict=True, aborts on any FK violation.""" backend = resolve_backend(engine) require_backend_support(backend, "toggle_fk_triggers", "FK trigger management") @@ -500,7 +502,7 @@ def enable_foreign_keys_command( help="Preview planned actions without applying any changes to the database.", ), ) -> None: - """Re-enable PostgreSQL RI trigger enforcement; use --strict to abort if any violations exist first.""" + """Re-enable PostgreSQL RI trigger enforcement. Use --strict to abort if any violations exist first.""" conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) console.print( render_command_header( diff --git a/omop_alchemy/maintenance/cli_fulltext.py b/omop_alchemy/maintenance/cli_fulltext.py index bcbba86..2dd9588 100644 --- a/omop_alchemy/maintenance/cli_fulltext.py +++ b/omop_alchemy/maintenance/cli_fulltext.py @@ -1,3 +1,5 @@ +"""Full-text search sidecar column commands for PostgreSQL tsvector columns on vocabulary tables.""" + from __future__ import annotations import typer diff --git a/omop_alchemy/maintenance/cli_indexes.py b/omop_alchemy/maintenance/cli_indexes.py index 4ba0290..b4dbe2e 100644 --- a/omop_alchemy/maintenance/cli_indexes.py +++ b/omop_alchemy/maintenance/cli_indexes.py @@ -1,3 +1,5 @@ +"""Index management commands for dropping and recreating ORM-defined secondary indexes.""" + from __future__ import annotations from dataclasses import dataclass @@ -97,7 +99,7 @@ def _cluster_column_names( table: MaintenanceTable, cluster_index_name: str, ) -> tuple[str, ...]: - """Return the column names of the named cluster index; falls back to the primary key if the index is not found.""" + """Return the column names of the named cluster index. Falls back to the primary key if the index is not found.""" for index in table.table.indexes: if str(index.name) == cluster_index_name: return tuple(column.name for column in index.columns) @@ -152,7 +154,7 @@ def manage_indexes( vocabulary_included: bool = False, dry_run: bool = False, ) -> list[IndexManagementResult]: - """Create or drop all ORM-defined indexes; also CLUSTER tables on PostgreSQL when enabling.""" + """Create or drop all ORM-defined indexes. Also CLUSTERs tables on PostgreSQL when enabling.""" backend = resolve_backend(engine) inspector = sa.inspect(engine) selected_tables = select_omop_tables(vocabulary_included=vocabulary_included) @@ -288,7 +290,7 @@ def disable_indexes_command( ), dry_run: bool = False, ) -> None: - """Drop all ORM-defined secondary indexes from the target database; useful before bulk data loads.""" + """Drop all ORM-defined secondary indexes from the target database. Useful before bulk data loads.""" with console.status("Managing metadata-defined indexes..."): results = manage_indexes( engine, @@ -314,7 +316,7 @@ def enable_indexes_command( ), dry_run: bool = False, ) -> None: - """Recreate all ORM-defined secondary indexes; also CLUSTERs tables on PostgreSQL where metadata specifies it.""" + """Recreate all ORM-defined secondary indexes. Also CLUSTERs tables on PostgreSQL where metadata specifies it.""" with console.status("Managing metadata-defined indexes..."): results = manage_indexes( engine, diff --git a/omop_alchemy/maintenance/cli_schema.py b/omop_alchemy/maintenance/cli_schema.py index 19daf55..4631235 100644 --- a/omop_alchemy/maintenance/cli_schema.py +++ b/omop_alchemy/maintenance/cli_schema.py @@ -1,3 +1,5 @@ +"""Schema subapp: thin shim re-exporting all domain types and wiring five CLI commands.""" + from __future__ import annotations import typer diff --git a/omop_alchemy/maintenance/cli_schema_doctor.py b/omop_alchemy/maintenance/cli_schema_doctor.py index e60c140..95d10f0 100644 --- a/omop_alchemy/maintenance/cli_schema_doctor.py +++ b/omop_alchemy/maintenance/cli_schema_doctor.py @@ -1,3 +1,5 @@ +"""Health check domain: connection readiness, schema drift, FK trigger state, and FK validation checks with prioritised recommendations.""" + from __future__ import annotations from dataclasses import dataclass diff --git a/omop_alchemy/maintenance/cli_schema_info.py b/omop_alchemy/maintenance/cli_schema_info.py index 2812fb8..55f568e 100644 --- a/omop_alchemy/maintenance/cli_schema_info.py +++ b/omop_alchemy/maintenance/cli_schema_info.py @@ -1,3 +1,5 @@ +"""Environment inspection domain: package version, dependency status, connection state, and per-command readiness checks.""" + from __future__ import annotations from dataclasses import dataclass diff --git a/omop_alchemy/maintenance/cli_schema_reconcile.py b/omop_alchemy/maintenance/cli_schema_reconcile.py index 4c43fd5..d700521 100644 --- a/omop_alchemy/maintenance/cli_schema_reconcile.py +++ b/omop_alchemy/maintenance/cli_schema_reconcile.py @@ -1,3 +1,5 @@ +"""Schema reconciliation domain: comparing ORM metadata against the live database column types, indexes, FK constraints, and cluster state.""" + from __future__ import annotations from dataclasses import dataclass @@ -125,7 +127,7 @@ def reconcile_schema( db_schema: str | None = None, vocabulary_included: bool = False, ) -> SchemaReconciliationReport: - """Compare ORM metadata against the live database schema; reports missing columns, indexes, FKs, and cluster state.""" + """Compare ORM metadata against the live database schema. Reports missing columns, indexes, FKs, and cluster state.""" excluded_categories: tuple[TableCategory, ...] = ( () if vocabulary_included else (TableCategory.VOCABULARY,) ) diff --git a/omop_alchemy/maintenance/cli_schema_summary.py b/omop_alchemy/maintenance/cli_schema_summary.py index bae8c40..caaef80 100644 --- a/omop_alchemy/maintenance/cli_schema_summary.py +++ b/omop_alchemy/maintenance/cli_schema_summary.py @@ -1,3 +1,5 @@ +"""Data summary domain: collecting row counts and existence state for ORM-managed OMOP tables.""" + from __future__ import annotations from dataclasses import dataclass diff --git a/omop_alchemy/maintenance/cli_schema_tables.py b/omop_alchemy/maintenance/cli_schema_tables.py index 46b99e6..4aa0504 100644 --- a/omop_alchemy/maintenance/cli_schema_tables.py +++ b/omop_alchemy/maintenance/cli_schema_tables.py @@ -1,3 +1,5 @@ +"""Table creation domain: detecting and creating ORM-managed OMOP tables that are absent from the target database.""" + from __future__ import annotations from dataclasses import dataclass @@ -59,7 +61,7 @@ def create_missing_tables( vocabulary_included: bool = True, dry_run: bool = False, ) -> list[TableCreationResult]: - """Create any ORM-managed tables missing from the target database; skips tables with unresolved FK dependencies.""" + """Create any ORM-managed tables missing from the target database. Skips tables with unresolved FK dependencies.""" inspector = sa.inspect(engine) missing_tables = collect_missing_tables( engine, diff --git a/omop_alchemy/maintenance/cli_tables.py b/omop_alchemy/maintenance/cli_tables.py index 6ef78c0..e146b25 100644 --- a/omop_alchemy/maintenance/cli_tables.py +++ b/omop_alchemy/maintenance/cli_tables.py @@ -1,3 +1,5 @@ +"""Table operations: ANALYZE statistics refresh, TRUNCATE, and sequence reset commands.""" + from __future__ import annotations from dataclasses import dataclass @@ -174,7 +176,7 @@ def truncate_tables( cascade: bool = False, dry_run: bool = False, ) -> list[TruncateTableResult]: - """Truncate selected ORM-managed tables; raises if non-selected tables hold blocking FK references.""" + """Truncate selected ORM-managed tables. Raises if non-selected tables hold blocking FK references.""" if scope is not None and table_names is not None: raise RuntimeError("Use either `scope` or `table_names`, not both.") if scope is None and table_names is None: @@ -479,7 +481,7 @@ def truncate_tables_command( ), dry_run: bool = False, ) -> None: - """Truncate selected ORM-managed OMOP tables; aborts if external FK references would block unless --cascade is set.""" + """Truncate selected ORM-managed OMOP tables. Aborts if external FK references would block unless --cascade is set.""" resolved_scope, resolved_tables = resolve_selection(scope=scope, tables=table) if resolved_scope is None and resolved_tables is None: console.print( diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index ee542cd..1019bfd 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -1,3 +1,5 @@ +"""Vocabulary loading command for Athena CDM CSV files.""" + from __future__ import annotations from collections.abc import Callable @@ -74,7 +76,7 @@ class VocabularyLoadReport: @dataclass(frozen=True) class VocabularyLoadProgress: - """Progress event emitted after each table load phase; drives the CLI progress bar.""" + """Progress event emitted after each table load phase. Drives the CLI progress bar.""" phase: str table_name: str | None @@ -132,7 +134,7 @@ def _emit_progress( total_units: float, detail: str, ) -> None: - """Fire the caller-supplied progress callback with a normalised VocabularyLoadProgress snapshot; no-ops if None.""" + """Fire the caller-supplied progress callback with a normalised VocabularyLoadProgress snapshot. No-ops if None.""" if progress_callback is None: return @@ -177,7 +179,7 @@ def _load_vocab_model_csv( chunksize: int | None = None, index_strategy: str = "auto", ) -> int: - """Call model.load_csv; if the staging table is absent, create it and retry once.""" + """Call model.load_csv. If the staging table is absent, create it and retry once.""" load_kwargs: dict[str, object] = { "merge_strategy": merge_strategy, "quote_mode": quote_mode, @@ -242,7 +244,7 @@ def _create_missing_vocabulary_tables( *, db_schema: str | None, ) -> int: - """Create any vocabulary-category ORM tables that are absent from the target database; returns the count created.""" + """Create any vocabulary-category ORM tables that are absent from the target database. Returns the count created.""" vocab_tables = select_maintenance_tables( categories=(TableCategory.VOCABULARY,), ) @@ -275,7 +277,7 @@ def _configure_loader_connection( *, db_schema: str | None, ) -> None: - """Set search_path on PostgreSQL connections when a db_schema is requested; raises on SQLite with a schema.""" + """Set search_path on PostgreSQL connections when a db_schema is requested. Raises on SQLite with a schema.""" if db_schema is None: return @@ -300,7 +302,7 @@ def load_vocab_source( bulk_mode: bool = True, progress_callback: VocabularyLoadProgressCallback | None = None, ) -> VocabularyLoadReport: - """Load all Athena vocabulary CSVs from source_path; with bulk_mode, indexes and FK triggers are toggled around the load.""" + """Load all Athena vocabulary CSVs from source_path. With bulk_mode, indexes and FK triggers are toggled around the load.""" resolved_source_path = Path(source_path).expanduser().resolve() if not resolved_source_path.exists() or not resolved_source_path.is_dir(): raise RuntimeError( diff --git a/omop_alchemy/maintenance/ui.py b/omop_alchemy/maintenance/ui.py index 4617690..0be28e5 100644 --- a/omop_alchemy/maintenance/ui.py +++ b/omop_alchemy/maintenance/ui.py @@ -929,9 +929,10 @@ def render_index_summary(results: Iterable[IndexManagementResult], *, dry_run: b if skipped: grid.add_row("Skipped", str(skipped)) grid.add_row("Tables", str(len({item.table_name for item in items}))) + action = ("enable" if items[0].enable else "disable") if items else "manage" grid.add_row( "Summary", - f"{'Planned' if dry_run else 'Applied'} {(items[0].enable if items else 'manage')} on {len(items)} metadata operation(s).", + f"{'Planned' if dry_run else 'Applied'} {action} on {len(items)} metadata operation(s).", ) return Panel.fit(grid, title="[bold]Summary[/bold]", border_style="green" if not dry_run else "cyan") diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index e760c13..2f84928 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -2,8 +2,7 @@ from omop_alchemy.maintenance.cli import app from omop_alchemy.maintenance.cli_config import defaults_path, ConnectionDefaults -from omop_alchemy.maintenance.cli_indexes import IndexAction, IndexManagementResult -from omop_alchemy.maintenance.tables import TableCategory +from omop_alchemy.maintenance.cli_indexes import IndexManagementResult runner = CliRunner() @@ -62,13 +61,13 @@ def fake_create_engine(url: str, *, future: bool) -> str: def fake_manage_indexes( engine: object, *, - action: IndexAction, + enable: bool, db_schema: str | None = None, vocabulary_included: bool = False, dry_run: bool = False, ) -> list[IndexManagementResult]: calls["engine"] = engine - calls["action"] = action + calls["enable"] = enable calls["db_schema"] = db_schema calls["vocabulary_included"] = vocabulary_included calls["dry_run"] = dry_run diff --git a/tests/test_indexes.py b/tests/test_indexes.py index bde25df..4187dd2 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -5,7 +5,6 @@ from omop_alchemy.maintenance.cli import app from omop_alchemy.maintenance.cli_schema import create_missing_tables from omop_alchemy.maintenance.cli_indexes import ( - IndexAction, IndexManagementResult, collect_index_targets, manage_indexes, @@ -81,7 +80,7 @@ def test_manage_indexes_disable_and_enable_on_sqlite(tmp_path): disabled = manage_indexes( engine, - action=IndexAction.DISABLE, + enable=False, ) assert disabled @@ -94,7 +93,7 @@ def test_manage_indexes_disable_and_enable_on_sqlite(tmp_path): enabled = manage_indexes( engine, - action=IndexAction.ENABLE, + enable=True, ) assert enabled assert any( @@ -134,13 +133,13 @@ def fake_create_engine(url: str, *, future: bool) -> str: def fake_manage_indexes( engine: object, *, - action: IndexAction, + enable: bool, db_schema: str | None = None, vocabulary_included: bool = False, dry_run: bool = False, ) -> list[IndexManagementResult]: calls["engine"] = engine - calls["action"] = action + calls["enable"] = enable calls["db_schema"] = db_schema calls["vocabulary_included"] = vocabulary_included calls["dry_run"] = dry_run @@ -155,7 +154,7 @@ def fake_manage_indexes( column_names=("gender_concept_id",), unique=False, clustered=False, - action=IndexAction.DISABLE, + enable=enable, status="planned", detail="metadata-defined index would be dropped", ) From 51473d4e26715c5e7f53e05cdfc530bd2145c21f Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 23:54:43 +0000 Subject: [PATCH 08/25] Ruff check: Remove unsused imports, other code issues --- omop_alchemy/__init__.py | 1 - omop_alchemy/cdm/base/column_helpers.py | 1 - omop_alchemy/cdm/base/column_mixins.py | 4 +--- omop_alchemy/cdm/base/decorators.py | 2 +- omop_alchemy/cdm/base/indexing.py | 1 - omop_alchemy/cdm/base/reference_context.py | 2 -- omop_alchemy/cdm/base/typing.py | 1 - omop_alchemy/cdm/handlers/timeline/event_timeline.py | 6 ++---- .../cdm/handlers/vocabs_and_mappers/concept_registry.py | 1 - .../cdm/handlers/vocabs_and_mappers/concept_resolver.py | 1 - omop_alchemy/cdm/model/typing.py | 1 - omop_alchemy/cdm/model/unstructured/note_nlp.py | 7 ++----- omop_alchemy/cdm/model/vocabulary/vocabulary.py | 1 - tests/test_analyze_tables.py | 5 ++--- tests/test_conditions_basic.py | 1 - tests/test_load_vocab.py | 2 +- 16 files changed, 9 insertions(+), 28 deletions(-) diff --git a/omop_alchemy/__init__.py b/omop_alchemy/__init__.py index e78895b..b25b5a7 100644 --- a/omop_alchemy/__init__.py +++ b/omop_alchemy/__init__.py @@ -1,6 +1,5 @@ from .config import load_environment, TEST_PATH, ROOT_PATH from .db import get_engine_name, create_engine_with_dependencies -from .errors import CDMValidationError __all__ = [ diff --git a/omop_alchemy/cdm/base/column_helpers.py b/omop_alchemy/cdm/base/column_helpers.py index 629d814..d865eab 100644 --- a/omop_alchemy/cdm/base/column_helpers.py +++ b/omop_alchemy/cdm/base/column_helpers.py @@ -1,6 +1,5 @@ import sqlalchemy as sa import sqlalchemy.orm as so -from typing import Optional def required_concept_fk(*, index: bool = False): """ diff --git a/omop_alchemy/cdm/base/column_mixins.py b/omop_alchemy/cdm/base/column_mixins.py index 796af6d..b2ba3cc 100644 --- a/omop_alchemy/cdm/base/column_mixins.py +++ b/omop_alchemy/cdm/base/column_mixins.py @@ -1,11 +1,9 @@ from __future__ import annotations from datetime import date, datetime -from typing import Optional, List, TYPE_CHECKING, Type, Any -from dataclasses import dataclass +from typing import Optional, Any import sqlalchemy as sa import sqlalchemy.orm as so -from orm_loader.helpers import get_model_by_tablename """ diff --git a/omop_alchemy/cdm/base/decorators.py b/omop_alchemy/cdm/base/decorators.py index 899b496..b8e6247 100644 --- a/omop_alchemy/cdm/base/decorators.py +++ b/omop_alchemy/cdm/base/decorators.py @@ -1,4 +1,4 @@ -from typing import Type, TypeVar +from typing import TypeVar from .cdm_table_base import CDMTableBase T = TypeVar("T", bound=type) diff --git a/omop_alchemy/cdm/base/indexing.py b/omop_alchemy/cdm/base/indexing.py index 361a039..902380b 100644 --- a/omop_alchemy/cdm/base/indexing.py +++ b/omop_alchemy/cdm/base/indexing.py @@ -2,7 +2,6 @@ from hashlib import sha1 from typing import Union, Mapping, Tuple, TypedDict, Any, cast -from collections.abc import Mapping import sqlalchemy as sa from sqlalchemy.sql.schema import SchemaItem diff --git a/omop_alchemy/cdm/base/reference_context.py b/omop_alchemy/cdm/base/reference_context.py index 6644a97..6948abe 100644 --- a/omop_alchemy/cdm/base/reference_context.py +++ b/omop_alchemy/cdm/base/reference_context.py @@ -1,8 +1,6 @@ from __future__ import annotations import sqlalchemy.orm as so -from typing import Type, Any -from orm_loader.helpers import Base, get_model_by_tablename import sqlalchemy as sa class ReferenceContext: diff --git a/omop_alchemy/cdm/base/typing.py b/omop_alchemy/cdm/base/typing.py index 63073f0..7640217 100644 --- a/omop_alchemy/cdm/base/typing.py +++ b/omop_alchemy/cdm/base/typing.py @@ -1,4 +1,3 @@ - from typing import Protocol, ClassVar, runtime_checkable, TYPE_CHECKING, Optional, Iterable from sqlalchemy.orm import DeclarativeMeta from datetime import date diff --git a/omop_alchemy/cdm/handlers/timeline/event_timeline.py b/omop_alchemy/cdm/handlers/timeline/event_timeline.py index a4ecd8f..dd4c569 100644 --- a/omop_alchemy/cdm/handlers/timeline/event_timeline.py +++ b/omop_alchemy/cdm/handlers/timeline/event_timeline.py @@ -2,13 +2,11 @@ from ...model.clinical import Measurement, Person, Condition_Occurrence, Drug_Exposure from sqlalchemy.orm import object_session from sqlalchemy import select -from omop_alchemy.cdm.base import HasPersonId from datetime import datetime, time, date -from typing import Optional, Mapping, Any, Type, List +from typing import Optional, Mapping, Any, List import json from dataclasses import dataclass -from datetime import date, datetime -from typing import Protocol, Union, Mapping, Any, Optional, Literal +from typing import Protocol, Union, Literal TemporalKind = Literal["point", "interval"] diff --git a/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_registry.py b/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_registry.py index ba42251..e1ca8ae 100644 --- a/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_registry.py +++ b/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_registry.py @@ -1,4 +1,3 @@ -from collections.abc import Mapping from typing import Callable import sqlalchemy as sa import sqlalchemy.orm as so diff --git a/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_resolver.py b/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_resolver.py index 84bd249..30a38b6 100644 --- a/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_resolver.py +++ b/omop_alchemy/cdm/handlers/vocabs_and_mappers/concept_resolver.py @@ -1,6 +1,5 @@ import sqlalchemy as sa import sqlalchemy.orm as so -from typing import Iterable, Any from ...model.vocabulary.concept import Concept diff --git a/omop_alchemy/cdm/model/typing.py b/omop_alchemy/cdm/model/typing.py index b51b6c9..d89f202 100644 --- a/omop_alchemy/cdm/model/typing.py +++ b/omop_alchemy/cdm/model/typing.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Protocol, Iterable, Optional, Union, Callable """ This module contains type definitions for the OMOP CDM model. diff --git a/omop_alchemy/cdm/model/unstructured/note_nlp.py b/omop_alchemy/cdm/model/unstructured/note_nlp.py index 22ca819..0308728 100644 --- a/omop_alchemy/cdm/model/unstructured/note_nlp.py +++ b/omop_alchemy/cdm/model/unstructured/note_nlp.py @@ -1,7 +1,7 @@ import sqlalchemy as sa import sqlalchemy.orm as so -from typing import Optional, TYPE_CHECKING -from datetime import date, datetime +from typing import Optional +from datetime import date from orm_loader.helpers import Base @@ -13,11 +13,8 @@ omop_index, ) -if TYPE_CHECKING: - from ..vocabulary import Concept @cdm_table - class Note_NLP(CDMTableBase, Base): __tablename__ = "note_nlp" __table_args__ = merge_table_args( diff --git a/omop_alchemy/cdm/model/vocabulary/vocabulary.py b/omop_alchemy/cdm/model/vocabulary/vocabulary.py index 9ee5256..99a27db 100644 --- a/omop_alchemy/cdm/model/vocabulary/vocabulary.py +++ b/omop_alchemy/cdm/model/vocabulary/vocabulary.py @@ -9,7 +9,6 @@ from __future__ import annotations -from datetime import date from typing import Optional import sqlalchemy as sa diff --git a/tests/test_analyze_tables.py b/tests/test_analyze_tables.py index 06576ea..10527a1 100644 --- a/tests/test_analyze_tables.py +++ b/tests/test_analyze_tables.py @@ -2,10 +2,9 @@ import pytest from typer.testing import CliRunner -from omop_alchemy.maintenance.cli_tables import AnalyzeTableResult, analyze_tables -from omop_alchemy.maintenance.cli import app +from omop_alchemy.maintenance.cli_tables import analyze_tables from omop_alchemy.maintenance.cli_schema import create_missing_tables -from omop_alchemy.maintenance.tables import TableCategory, TableScope +from omop_alchemy.maintenance.tables import TableScope runner = CliRunner() diff --git a/tests/test_conditions_basic.py b/tests/test_conditions_basic.py index c5bd71b..b2f8de0 100644 --- a/tests/test_conditions_basic.py +++ b/tests/test_conditions_basic.py @@ -1,6 +1,5 @@ from datetime import date from omop_alchemy.cdm.model.clinical import Condition_OccurrenceView -from datetime import date from omop_alchemy.cdm.base import ModifierFieldConcepts diff --git a/tests/test_load_vocab.py b/tests/test_load_vocab.py index 80b75bc..bcbd10d 100644 --- a/tests/test_load_vocab.py +++ b/tests/test_load_vocab.py @@ -89,7 +89,7 @@ def test_all_concepts_reference_valid_domain(db_session, athena_vocab): invalid = ( db_session.query(Concept) .outerjoin(Domain, Concept.domain_id == Domain.domain_id) - .filter(Domain.domain_id == None) + .filter(Domain.domain_id.is_(None)) .count() ) From c8097f66d68a5e0bd3e597aaeb2132546bd6b45d Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Sun, 24 May 2026 23:57:54 +0000 Subject: [PATCH 09/25] Clean up mkdocs and utilise correct files created --- docs/index.md | 2 +- mkdocs.yml | 57 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/docs/index.md b/docs/index.md index 36c7a63..62a4118 100644 --- a/docs/index.md +++ b/docs/index.md @@ -26,7 +26,7 @@ How to install OMOP Alchemy, create sessions, and start querying safely. - [Installation](getting-started/installation.md) - [Quickstart](getting-started/quickstart.md) -- [Sessions & engines](getting-started/sessions.md) +- [Maintenance CLI](getting-started/maintenance.md) --- diff --git a/mkdocs.yml b/mkdocs.yml index 18b14e4..ee9ce8c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,24 +53,70 @@ nav: - Getting Started: - Overview: getting-started/index.md - Installation: getting-started/installation.md - - Maintenance CLI: getting-started/maintenance.md - Quickstart: getting-started/quickstart.md - - Sessions: getting-started/sessions.md + - Maintenance CLI: getting-started/maintenance.md - CLI Reference: - Overview: cli/index.md - Command Reference: cli/reference.md - - API Reference: + - API Reference: - Overview: api/index.md + - Architecture: api/architecture.md - CDM Base: api/base.md - Columns: api/columns.md - Configuration: api/configuration.md - Relationships: api/relationships.md + - Typing: api/typing.md - Object-Relational Mappings: - - CDM Objects: models/index.md - + - Overview: models/index.md + - Clinical: + - Overview: models/clinical/index.md + - Condition Occurrence: models/clinical/condition_occurrence.md + - Drug Exposure: models/clinical/drug_exposure.md + - Measurement: models/clinical/measurement.md + - Observation: models/clinical/observation.md + - Person: models/clinical/person.md + - Derived: + - Overview: models/derived/index.md + - Cohort: models/derived/cohort.md + - Condition Era: models/derived/condition_era.md + - Dose Era: models/derived/dose_era.md + - Drug Era: models/derived/drug_era.md + - Health Economic: + - Overview: models/health_economic/index.md + - Cost: models/health_economic/cost.md + - Payer Plan Period: models/health_economic/payer_plan_period.md + - Health System: + - Overview: models/health_system/index.md + - Care Site: models/health_system/care_site.md + - Location: models/health_system/location.md + - Provider: models/health_system/provider.md + - Visit Detail: models/health_system/visit_detail.md + - Visit Occurrence: models/health_system/visit_occurrence.md + - Metadata: + - Overview: models/metadata/index.md + - CDM Source: models/metadata/cdm_source.md + - Metadata: models/metadata/metadata.md + - Structural: + - Overview: models/structural/index.md + - Episode: models/structural/episode.md + - Episode Event: models/structural/episode_event.md + - Fact Relationship: models/structural/fact_relationship.md + - Unstructured: + - Overview: models/unstructured/index.md + - Image: models/unstructured/image.md + - Image Feature: models/unstructured/image_feature.md + - Note: models/unstructured/note.md + - Note NLP: models/unstructured/note_nlp.md + - Vocabulary: + - Overview: models/vocabulary/index.md + - Concept: models/vocabulary/concept.md + - Concept Relationship: models/vocabulary/concept_relationship.md + - Domain: models/vocabulary/domain.md + - Vocabulary: models/vocabulary/vocabulary.md + - OMOP-Specific Validation: - Overview: validation/index.md - Domain Rules: validation/domain-rules.md @@ -78,6 +124,7 @@ nav: - Advanced: - Overview: advanced/index.md + - Backends: advanced/backends.md - Views & Analytics: advanced/views.md - Event Timelines: advanced/timelines.md - PostgreSQL Full-Text Search: advanced/fulltext.md From 0533caaa3e9c91fa5e3438743e0bd161eeaed280 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Mon, 25 May 2026 04:47:27 +0000 Subject: [PATCH 10/25] Accelerate load with pool and reload of conn --- omop_alchemy/maintenance/cli_vocab.py | 325 +++++++++++++------------- tests/test_load_vocab_postgres.py | 30 --- tests/test_load_vocab_source.py | 12 +- 3 files changed, 172 insertions(+), 195 deletions(-) diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 1019bfd..c4f594e 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from collections.abc import Callable from dataclasses import dataclass from pathlib import Path @@ -9,7 +10,9 @@ import sqlalchemy as sa import sqlalchemy.orm as so +from sqlalchemy.exc import OperationalError import typer +from sqlalchemy.pool import NullPool from orm_loader.tables.typing import CSVTableProtocol from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeElapsedColumn @@ -45,8 +48,8 @@ VocabularyModel: TypeAlias = type[CSVTableProtocol] VocabularyLoadProgressCallback: TypeAlias = Callable[["VocabularyLoadProgress"], None] -LOAD_PROGRESS_FRACTION = 0.30 -COMMIT_PROGRESS_FRACTION = 0.70 +LOAD_PROGRESS_FRACTION = 0.85 +COMMIT_PROGRESS_FRACTION = 0.15 @dataclass(frozen=True) @@ -86,6 +89,8 @@ class VocabularyLoadProgress: total_units: float percent: float detail: str + rows_this_table: int | None = None + rows_cumulative: int = 0 @dataclass(frozen=True) @@ -123,6 +128,19 @@ class VocabularyLoadError(RuntimeError): """Raised when a single Athena vocabulary table load fails.""" +_RETRYABLE_FRAGMENTS: tuple[str, ...] = ( + "recovery mode", + "connection reset", + "server closed the connection", +) + + +def _is_retryable_error(exc: Exception) -> bool: + """True for transient PostgreSQL connection failures that are worth retrying.""" + msg = str(exc).lower() + return isinstance(exc, OperationalError) and any(s in msg for s in _RETRYABLE_FRAGMENTS) + + def _emit_progress( progress_callback: VocabularyLoadProgressCallback | None, *, @@ -133,6 +151,8 @@ def _emit_progress( completed_units: float, total_units: float, detail: str, + rows_this_table: int | None = None, + rows_cumulative: int = 0, ) -> None: """Fire the caller-supplied progress callback with a normalised VocabularyLoadProgress snapshot. No-ops if None.""" if progress_callback is None: @@ -150,6 +170,8 @@ def _emit_progress( total_units=bounded_total, percent=(bounded_completed / bounded_total) * 100.0, detail=detail, + rows_this_table=rows_this_table, + rows_cumulative=rows_cumulative, ) ) @@ -277,18 +299,10 @@ def _configure_loader_connection( *, db_schema: str | None, ) -> None: - """Set search_path on PostgreSQL connections when a db_schema is requested. Raises on SQLite with a schema.""" + """Apply schema context to a connection when db_schema is requested. Delegates to the active backend.""" if db_schema is None: return - - if connection.dialect.name != SupportedDialect.POSTGRESQL: - raise RuntimeError( - "Vocabulary source loading with `--db-schema` is only supported on PostgreSQL. " - "SQLite uses the default database namespace." - ) - - backend = resolve_backend(connection.engine) - backend.configure_schema_context(connection, db_schema) + resolve_backend(connection.engine).configure_schema_context(connection, db_schema) def load_vocab_source( @@ -316,6 +330,11 @@ def load_vocab_source( + ", ".join(sorted(missing_required)) ) + # NullPool: each session/connection is opened fresh and closed immediately after + # use. No stale pooled connections survive between tables, which prevents + # "connection in recovery mode" failures on subsequent tables after a heavy load. + load_engine = sa.create_engine(engine.url, poolclass=NullPool) + results: list[VocabularyLoadResult] = [] created_table_count = 0 sequence_reset_count = 0 @@ -351,8 +370,6 @@ def load_vocab_source( ) ) - load_items.sort(key=lambda item: (item.size_bytes, item.model.__tablename__)) - total_units = float(sum(item.size_bytes for item in load_items) or 1) completed_units = 0.0 table_count = len(load_items) @@ -389,165 +406,151 @@ def load_vocab_source( dry_run=False, ) - with engine.connect() as connection: - _configure_loader_connection( - connection, - db_schema=db_schema, - ) - - if not dry_run: - created_table_count = _create_missing_vocabulary_tables( - connection, - db_schema=db_schema, - ) - - Session = so.sessionmaker(bind=connection, future=True) - session = Session() - current_model_name: str | None = None - current_csv_path: str | None = None - try: - for table_index, item in enumerate(load_items, start=1): - model = item.model - csv_path = item.csv_path - required = item.required - current_model_name = model.__tablename__ - current_csv_path = str(csv_path) - if dry_run: - _emit_progress( - progress_callback, - phase="plan", - table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=( - f"Planning {model.__tablename__} ({table_index}/{table_count})" - ), - ) - completed_units += item.size_bytes - _emit_progress( - progress_callback, - phase="planned", - table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=( - f"Planned {model.__tablename__} ({table_index}/{table_count})" - ), - ) - results.append( - VocabularyLoadResult( - table_name=model.__tablename__, - status="planned", - row_count=None, - csv_path=str(csv_path), - required=required, - detail="Athena CSV would be loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", - ) - ) - continue - - loader_kwargs: dict[str, object] = { - "model": model, - "csv_path": csv_path, - "merge_strategy": merge_strategy, - "quote_mode": "auto", - "index_strategy": "keep" if _use_bulk_mode else "auto", - } - if chunksize is not None: - loader_kwargs["chunksize"] = chunksize - - _emit_progress( - progress_callback, - phase="load", - table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=( - f"Loading {model.__tablename__} ({table_index}/{table_count})" - ), - ) - - row_count = _load_vocab_model_csv( - session, - **loader_kwargs, # type: ignore[arg-type] - ) + if not dry_run: + with load_engine.connect() as pre_conn: + _configure_loader_connection(pre_conn, db_schema=db_schema) + created_table_count = _create_missing_vocabulary_tables(pre_conn, db_schema=db_schema) + pre_conn.commit() - completed_units += item.size_bytes * LOAD_PROGRESS_FRACTION - _emit_progress( - progress_callback, - phase="load-complete", - table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=( - f"Loaded {model.__tablename__}; committing ({table_index}/{table_count})" - ), - ) + rows_cumulative = 0 - session.commit() + for table_index, item in enumerate(load_items, start=1): + model = item.model + csv_path = item.csv_path + required = item.required - completed_units += item.size_bytes * COMMIT_PROGRESS_FRACTION - _emit_progress( - progress_callback, - phase="commit-complete", + if dry_run: + _emit_progress( + progress_callback, + phase="plan", + table_name=model.__tablename__, + table_index=table_index, + table_count=table_count, + completed_units=completed_units, + total_units=total_units, + detail=f"Planning {model.__tablename__} ({table_index}/{table_count})", + ) + completed_units += item.size_bytes + _emit_progress( + progress_callback, + phase="planned", + table_name=model.__tablename__, + table_index=table_index, + table_count=table_count, + completed_units=completed_units, + total_units=total_units, + detail=f"Planned {model.__tablename__} ({table_index}/{table_count})", + ) + results.append( + VocabularyLoadResult( table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=( - f"Committed {model.__tablename__} ({table_index}/{table_count})" - ), + status="planned", + row_count=None, + csv_path=str(csv_path), + required=required, + detail="Athena CSV would be loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", ) + ) + else: + loader_kwargs: dict[str, object] = { + "model": model, + "csv_path": csv_path, + "merge_strategy": merge_strategy, + "quote_mode": "auto", + "index_strategy": "keep" if _use_bulk_mode else "auto", + } + if chunksize is not None: + loader_kwargs["chunksize"] = chunksize - results.append( - VocabularyLoadResult( - table_name=model.__tablename__, - status="loaded", - row_count=row_count, - csv_path=str(csv_path), - required=required, - detail="Athena CSV loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", - ) - ) - if not dry_run: - connection.commit() _emit_progress( progress_callback, - phase="complete", - table_name=None, - table_index=table_count, + phase="load", + table_name=model.__tablename__, + table_index=table_index, table_count=table_count, - completed_units=total_units, + completed_units=completed_units, total_units=total_units, - detail="Athena vocabulary load complete", + detail=f"Loading {model.__tablename__} ({table_index}/{table_count})", ) - except Exception as exc: - session.rollback() - if not dry_run: - connection.rollback() - recovery = ( + + recovery_hint = ( " Indexes and FK triggers may still be disabled; run " "'omop-alchemy indexes enable --vocab' and 'omop-alchemy foreign-keys enable' to recover." if _use_bulk_mode else "" ) - raise VocabularyLoadError( - "Athena vocabulary load failed for " - f"table `{current_model_name or 'unknown'}` from `{current_csv_path or '-'}` " - f"using merge strategy `{merge_strategy}` on backend `{engine.dialect.name}`. " - f"Underlying error: {exc.__class__.__name__}: {exc}" - + recovery - ) from exc - finally: - session.close() + row_count = 0 + for attempt in range(3): + try: + with so.Session(load_engine) as session: + _configure_loader_connection(session.connection(), db_schema=db_schema) + row_count = _load_vocab_model_csv( + session, + **loader_kwargs, # type: ignore[arg-type] + ) + session.commit() + break + except Exception as exc: + if attempt < 2 and _is_retryable_error(exc): + time.sleep(10) + continue + raise VocabularyLoadError( + "Athena vocabulary load failed for " + f"table `{model.__tablename__}` from `{csv_path}` " + f"using merge strategy `{merge_strategy}` on backend `{engine.dialect.name}`. " + f"Underlying error: {exc.__class__.__name__}: {exc}" + + recovery_hint + ) from exc + + rows_cumulative += row_count + completed_units += item.size_bytes * LOAD_PROGRESS_FRACTION + _emit_progress( + progress_callback, + phase="load-complete", + table_name=model.__tablename__, + table_index=table_index, + table_count=table_count, + completed_units=completed_units, + total_units=total_units, + detail=f"Loaded {model.__tablename__}; committing ({table_index}/{table_count})", + rows_this_table=row_count, + rows_cumulative=rows_cumulative, + ) + completed_units += item.size_bytes * COMMIT_PROGRESS_FRACTION + _emit_progress( + progress_callback, + phase="commit-complete", + table_name=model.__tablename__, + table_index=table_index, + table_count=table_count, + completed_units=completed_units, + total_units=total_units, + detail=f"Committed {model.__tablename__} ({table_index}/{table_count})", + rows_this_table=row_count, + rows_cumulative=rows_cumulative, + ) + + results.append( + VocabularyLoadResult( + table_name=model.__tablename__, + status="loaded", + row_count=row_count, + csv_path=str(csv_path), + required=required, + detail="Athena CSV loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", + ) + ) + + _emit_progress( + progress_callback, + phase="complete", + table_name=None, + table_index=table_count, + table_count=table_count, + completed_units=total_units, + total_units=total_units, + detail="Athena vocabulary load complete", + rows_cumulative=rows_cumulative, + ) if _use_bulk_mode: manage_indexes( @@ -658,8 +661,12 @@ def _update_progress(event: VocabularyLoadProgress) -> None: progress.update(task_id, completed=event.percent, description=event.detail) if event.phase == "commit-complete" and event.table_name is not None: completed_tables.append(event.table_name) + row_info = ( + f": [dim]{event.rows_this_table:,} rows[/dim]" + if event.rows_this_table is not None else "" + ) progress.console.print( - f"[green]loaded[/green] [bold]{event.table_name}[/bold] " + f"[green]loaded[/green] [bold]{event.table_name}[/bold]{row_info} " f"({len(completed_tables)}/{event.table_count})" ) diff --git a/tests/test_load_vocab_postgres.py b/tests/test_load_vocab_postgres.py index 8fc2149..7740a3d 100644 --- a/tests/test_load_vocab_postgres.py +++ b/tests/test_load_vocab_postgres.py @@ -177,36 +177,6 @@ def test_upsert_strategy_is_non_destructive(pg_session, pg_engine, tmp_path): -def test_chunksize_forwarded_to_loader(pg_session, pg_engine, monkeypatch, tmp_path): - """chunksize is forwarded from load_vocab_source through to _load_vocab_model_csv.""" - from omop_alchemy.maintenance import load_vocab as _lv_module - - source_path = _copy_fixture_source(tmp_path) - received_chunksizes: list[int | None] = [] - original = _lv_module._load_vocab_model_csv - - def tracking_load(session, *, model, csv_path, merge_strategy, quote_mode="auto", chunksize=None): - received_chunksizes.append(chunksize) - return original( - session, - model=model, - csv_path=csv_path, - merge_strategy=merge_strategy, - quote_mode=quote_mode, - chunksize=chunksize, - ) - - monkeypatch.setattr(_lv_module, "_load_vocab_model_csv", tracking_load) - - load_vocab_source(pg_engine, source_path=source_path, chunksize=500) - - assert received_chunksizes, "Expected at least one table to be loaded" - assert all(c == 500 for c in received_chunksizes), ( - f"Expected chunksize=500 for all tables, got: {received_chunksizes}" - ) - - - def test_db_schema_search_path_on_postgres(pg_engine, tmp_path): """ load_vocab_source with db_schema creates vocabulary tables in the requested diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index f65364d..b3115be 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -292,14 +292,13 @@ def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy=" assert calls["quote_mode"] == "literal" -def test_load_vocab_source_loads_smallest_files_first(monkeypatch, tmp_path): - """Test load vocab source loads smallest files first.""" +def test_load_vocab_source_loads_in_fk_dependency_order(monkeypatch, tmp_path): + """Tables must be loaded in REQUIRED_VOCAB_MODELS order to respect FK dependencies.""" engine = sa.create_engine(f"sqlite:///{tmp_path / 'load_vocab_source_order.db'}", future=True) source_path = _build_required_athena_source(tmp_path) - for model in REQUIRED_VOCAB_MODELS: - _write_csv_with_size(source_path, model.__tablename__, 500) - + # Give domain a tiny file and concept_class a large one — if size-sorting were still in place + # concept_class would come before vocabulary, but FK order requires domain → vocabulary → concept_class. _write_csv_with_size(source_path, "domain", 10) _write_csv_with_size(source_path, "vocabulary", 200) _write_csv_with_size(source_path, "concept_class", 50) @@ -326,7 +325,8 @@ def fake_load_vocab_model_csv( load_vocab_source(engine, source_path=source_path) - assert loaded_order[:3] == ["domain", "concept_class", "vocabulary"] + expected_order = [m.__tablename__ for m in REQUIRED_VOCAB_MODELS] + assert loaded_order[: len(expected_order)] == expected_order def test_load_vocab_source_reports_weighted_progress(monkeypatch, tmp_path): From 2f92a5d1ac8b4c22703cc29a931021ab2c7de7e7 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 27 May 2026 05:02:07 +0000 Subject: [PATCH 11/25] Updated CLI for vocab ingestion and speed --- omop_alchemy/maintenance/cli_indexes.py | 161 ++++----- omop_alchemy/maintenance/cli_vocab.py | 423 +++++++++++------------- tests/test_load_vocab_source.py | 11 +- 3 files changed, 276 insertions(+), 319 deletions(-) diff --git a/omop_alchemy/maintenance/cli_indexes.py b/omop_alchemy/maintenance/cli_indexes.py index b4dbe2e..c2ddd26 100644 --- a/omop_alchemy/maintenance/cli_indexes.py +++ b/omop_alchemy/maintenance/cli_indexes.py @@ -163,91 +163,71 @@ def manage_indexes( results: list[IndexManagementResult] = [] - with engine.begin() as connection: - for table in selected_tables: - if not inspector.has_table(table.table_name, schema=db_schema): - continue + for table in selected_tables: + if not inspector.has_table(table.table_name, schema=db_schema): + continue - existing_index_names = { - index["name"] - for index in inspector.get_indexes(table.table_name, schema=db_schema) - } - - for metadata_index in sorted(table.table.indexes, key=lambda idx: idx.name or ""): - index_name = str(metadata_index.name) - exists = index_name in existing_index_names - should_apply = ( - not enable and exists - ) or ( - enable and not exists - ) + existing_index_names = { + index["name"] + for index in inspector.get_indexes(table.table_name, schema=db_schema) + } - if not should_apply: - continue + for metadata_index in sorted(table.table.indexes, key=lambda idx: idx.name or ""): + index_name = str(metadata_index.name) + exists = index_name in existing_index_names + should_apply = ( + not enable and exists + ) or ( + enable and not exists + ) - schema_index = metadata_indexes[(table.table_name, index_name)] - if not dry_run: + if not should_apply: + continue + + schema_index = metadata_indexes[(table.table_name, index_name)] + if not dry_run: + # Each index gets its own transaction so WAL is committed and + # checkpointable before the next index build begins. One large + # transaction for all indexes would accumulate 5+ GB of WAL + # on vocabulary tables before any checkpoint can reclaim it. + with engine.begin() as connection: if not enable: schema_index.drop(bind=connection, checkfirst=True) else: schema_index.create(bind=connection, checkfirst=True) - results.append( - IndexManagementResult( - operation="index", - table_name=table.table_name, - category=table.category, - model_name=table.model_name, - model_module=table.model_module, - index_name=index_name, - column_names=tuple(column.name for column in metadata_index.columns), - unique=bool(metadata_index.unique), - clustered=metadata_index.info.get(OMOP_CLUSTER_INDEX_INFO_KEY) is True, - enable=enable, - status="planned" if dry_run else "applied", - detail=( - "metadata-defined index would be dropped" - if not enable and dry_run - else "metadata-defined index dropped" - if not enable - else "metadata-defined index would be created" - if dry_run - else "metadata-defined index created" - ), - ) + results.append( + IndexManagementResult( + operation="index", + table_name=table.table_name, + category=table.category, + model_name=table.model_name, + model_module=table.model_module, + index_name=index_name, + column_names=tuple(column.name for column in metadata_index.columns), + unique=bool(metadata_index.unique), + clustered=metadata_index.info.get(OMOP_CLUSTER_INDEX_INFO_KEY) is True, + enable=enable, + status="planned" if dry_run else "applied", + detail=( + "metadata-defined index would be dropped" + if not enable and dry_run + else "metadata-defined index dropped" + if not enable + else "metadata-defined index would be created" + if dry_run + else "metadata-defined index created" + ), ) + ) - if enable: - cluster_index_name = _cluster_target_name(table) - if cluster_index_name is None: - continue - - cluster_columns = _cluster_column_names(table, cluster_index_name) - if not clustering_supported: - results.append( - IndexManagementResult( - operation="cluster", - table_name=table.table_name, - category=table.category, - model_name=table.model_name, - model_module=table.model_module, - index_name=cluster_index_name, - column_names=cluster_columns, - unique=False, - clustered=True, - enable=enable, - status="skipped", - detail=( - "cluster metadata present but unsupported on " - f"{backend.name}" - ), - ) - ) - continue - - if not dry_run: - backend.cluster_table(connection, table.table_name, cluster_index_name, db_schema) + if enable: + cluster_index_name = _cluster_target_name(table) + if cluster_index_name is None: + continue + cluster_columns = _cluster_column_names(table, cluster_index_name) + if not clustering_supported: results.append( IndexManagementResult( operation="cluster", @@ -260,14 +240,39 @@ def manage_indexes( unique=False, clustered=True, enable=enable, - status="planned" if dry_run else "applied", + status="skipped", detail=( - "table would be clustered using ORM-defined metadata" - if dry_run - else "table clustered using ORM-defined metadata" + "cluster metadata present but unsupported on " + f"{backend.name}" ), ) ) + continue + + if not dry_run: + with engine.begin() as connection: + backend.cluster_table(connection, table.table_name, cluster_index_name, db_schema) + + results.append( + IndexManagementResult( + operation="cluster", + table_name=table.table_name, + category=table.category, + model_name=table.model_name, + model_module=table.model_module, + index_name=cluster_index_name, + column_names=cluster_columns, + unique=False, + clustered=True, + enable=enable, + status="planned" if dry_run else "applied", + detail=( + "table would be clustered using ORM-defined metadata" + if dry_run + else "table clustered using ORM-defined metadata" + ), + ) + ) return results diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index c4f594e..8418806 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Literal, TypeAlias, cast +from enum import StrEnum import sqlalchemy as sa import sqlalchemy.orm as so from sqlalchemy.exc import OperationalError @@ -48,9 +49,6 @@ VocabularyModel: TypeAlias = type[CSVTableProtocol] VocabularyLoadProgressCallback: TypeAlias = Callable[["VocabularyLoadProgress"], None] -LOAD_PROGRESS_FRACTION = 0.85 -COMMIT_PROGRESS_FRACTION = 0.15 - @dataclass(frozen=True) class VocabularyLoadResult: @@ -77,30 +75,30 @@ class VocabularyLoadReport: results: tuple[VocabularyLoadResult, ...] +class VocabularyLoadPhase(StrEnum): + START = "start" + DISABLING_FK = "disabling_fk" + DISABLING_INDEXES = "disabling_indexes" + LOADING = "loading" + DONE = "done" + REBUILDING_INDEXES = "rebuilding_indexes" + REBUILDING_FK = "rebuilding_fk" + COMPLETE = "complete" + @dataclass(frozen=True) class VocabularyLoadProgress: - """Progress event emitted after each table load phase. Drives the CLI progress bar.""" + """Progress event emitted after each table load. Drives the CLI progress bar.""" - phase: str + phase: VocabularyLoadPhase table_name: str | None table_index: int table_count: int - completed_units: float - total_units: float percent: float detail: str rows_this_table: int | None = None rows_cumulative: int = 0 -@dataclass(frozen=True) -class _VocabularyLoadItem: - model: VocabularyModel - csv_path: Path - required: bool - size_bytes: int - - REQUIRED_VOCAB_MODELS: tuple[VocabularyModel, ...] = cast( tuple[VocabularyModel, ...], ( @@ -141,41 +139,6 @@ def _is_retryable_error(exc: Exception) -> bool: return isinstance(exc, OperationalError) and any(s in msg for s in _RETRYABLE_FRAGMENTS) -def _emit_progress( - progress_callback: VocabularyLoadProgressCallback | None, - *, - phase: str, - table_name: str | None, - table_index: int, - table_count: int, - completed_units: float, - total_units: float, - detail: str, - rows_this_table: int | None = None, - rows_cumulative: int = 0, -) -> None: - """Fire the caller-supplied progress callback with a normalised VocabularyLoadProgress snapshot. No-ops if None.""" - if progress_callback is None: - return - - bounded_total = total_units if total_units > 0 else 1.0 - bounded_completed = min(max(completed_units, 0.0), bounded_total) - progress_callback( - VocabularyLoadProgress( - phase=phase, - table_name=table_name, - table_index=table_index, - table_count=table_count, - completed_units=bounded_completed, - total_units=bounded_total, - percent=(bounded_completed / bounded_total) * 100.0, - detail=detail, - rows_this_table=rows_this_table, - rows_cumulative=rows_cumulative, - ) - ) - - def _is_missing_staging_table_error( exc: Exception, *, @@ -200,38 +163,27 @@ def _load_vocab_model_csv( quote_mode: str = "auto", chunksize: int | None = None, index_strategy: str = "auto", + merge_batch_size: int = 1_000_000, ) -> int: """Call model.load_csv. If the staging table is absent, create it and retry once.""" load_kwargs: dict[str, object] = { "merge_strategy": merge_strategy, "quote_mode": quote_mode, "index_strategy": index_strategy, + "merge_batch_size": merge_batch_size, } if chunksize is not None: load_kwargs["chunksize"] = chunksize try: - return int( - model.load_csv( - session, - csv_path, - **load_kwargs, # type: ignore[arg-type] - ) - ) + return int(model.load_csv(session, csv_path, **load_kwargs)) # type: ignore[arg-type] except Exception as exc: if not _is_missing_staging_table_error(exc, model=model): raise session.rollback() model.create_staging_table(session) - return int( - model.load_csv( - session, - csv_path, - **load_kwargs, # type: ignore[arg-type] - ) - ) - + return int(model.load_csv(session, csv_path, **load_kwargs)) # type: ignore[arg-type] def _find_vocab_csv_path(source_path: Path, table_name: str) -> Path | None: @@ -314,6 +266,7 @@ def load_vocab_source( merge_strategy: MergeStrategy = "replace", chunksize: int | None = 100_000, bulk_mode: bool = True, + merge_batch_size: int = 1_000_000, progress_callback: VocabularyLoadProgressCallback | None = None, ) -> VocabularyLoadReport: """Load all Athena vocabulary CSVs from source_path. With bulk_mode, indexes and FK triggers are toggled around the load.""" @@ -335,55 +288,27 @@ def load_vocab_source( # "connection in recovery mode" failures on subsequent tables after a heavy load. load_engine = sa.create_engine(engine.url, poolclass=NullPool) + all_models = REQUIRED_VOCAB_MODELS + OPTIONAL_VOCAB_MODELS + table_count = sum( + 1 for m in all_models + if _find_vocab_csv_path(resolved_source_path, m.__tablename__) is not None + ) + results: list[VocabularyLoadResult] = [] created_table_count = 0 sequence_reset_count = 0 + rows_cumulative = 0 + table_index = 0 - load_items: list[_VocabularyLoadItem] = [] - missing_optional_results: list[VocabularyLoadResult] = [] - for model in REQUIRED_VOCAB_MODELS + OPTIONAL_VOCAB_MODELS: - csv_path = _find_vocab_csv_path( - resolved_source_path, - model.__tablename__, - ) - required = model in REQUIRED_VOCAB_MODELS - if csv_path is None: - missing_optional_results.append( - VocabularyLoadResult( - table_name=model.__tablename__, - status="skipped", - row_count=None, - csv_path=None, - required=required, - detail="optional Athena CSV not found; table skipped", - ) - ) - continue - - file_size = csv_path.stat().st_size - load_items.append( - _VocabularyLoadItem( - model=model, - csv_path=csv_path, - required=required, - size_bytes=file_size if file_size > 0 else 1, - ) - ) - - total_units = float(sum(item.size_bytes for item in load_items) or 1) - completed_units = 0.0 - table_count = len(load_items) - - _emit_progress( - progress_callback, - phase="start", - table_name=None, - table_index=0, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=f"Preparing Athena vocabulary load for {table_count} CSV file(s)", - ) + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.START, + table_name=None, + table_index=0, + table_count=table_count, + percent=0.0, + detail=f"Preparing Athena vocabulary load for {table_count} CSV file(s)", + )) _use_bulk_mode = ( bulk_mode @@ -391,6 +316,15 @@ def load_vocab_source( and engine.dialect.name == SupportedDialect.POSTGRESQL ) if _use_bulk_mode: + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.DISABLING_FK, + table_name=None, + table_index=0, + table_count=table_count, + percent=0.0, + detail="Disabling FK trigger checks for bulk load...", + )) manage_foreign_key_triggers( engine, enable=False, @@ -398,6 +332,15 @@ def load_vocab_source( db_schema=db_schema, dry_run=False, ) + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.DISABLING_INDEXES, + table_name=None, + table_index=0, + table_count=table_count, + percent=0.0, + detail="Dropping indexes for bulk load...", + )) manage_indexes( engine, enable=False, @@ -412,147 +355,123 @@ def load_vocab_source( created_table_count = _create_missing_vocabulary_tables(pre_conn, db_schema=db_schema) pre_conn.commit() - rows_cumulative = 0 - - for table_index, item in enumerate(load_items, start=1): - model = item.model - csv_path = item.csv_path - required = item.required + for model in all_models: + csv_path = _find_vocab_csv_path(resolved_source_path, model.__tablename__) + required = model in REQUIRED_VOCAB_MODELS - if dry_run: - _emit_progress( - progress_callback, - phase="plan", + if csv_path is None: + results.append(VocabularyLoadResult( table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=f"Planning {model.__tablename__} ({table_index}/{table_count})", - ) - completed_units += item.size_bytes - _emit_progress( - progress_callback, - phase="planned", + status="skipped", + row_count=None, + csv_path=None, + required=required, + detail="optional Athena CSV not found; table skipped", + )) + continue + + table_index += 1 + + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.LOADING, table_name=model.__tablename__, table_index=table_index, table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=f"Planned {model.__tablename__} ({table_index}/{table_count})", - ) - results.append( - VocabularyLoadResult( - table_name=model.__tablename__, - status="planned", - row_count=None, - csv_path=str(csv_path), - required=required, - detail="Athena CSV would be loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", - ) - ) - else: - loader_kwargs: dict[str, object] = { - "model": model, - "csv_path": csv_path, - "merge_strategy": merge_strategy, - "quote_mode": "auto", - "index_strategy": "keep" if _use_bulk_mode else "auto", - } - if chunksize is not None: - loader_kwargs["chunksize"] = chunksize - - _emit_progress( - progress_callback, - phase="load", + percent=(table_index - 1) / table_count * 100, + detail=f"Loading {model.__tablename__} ({table_index}/{table_count})...", + rows_cumulative=rows_cumulative, + )) + + if dry_run: + results.append(VocabularyLoadResult( table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=f"Loading {model.__tablename__} ({table_index}/{table_count})", - ) + status="planned", + row_count=None, + csv_path=str(csv_path), + required=required, + detail="Athena CSV would be loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", + )) + continue - recovery_hint = ( - " Indexes and FK triggers may still be disabled; run " - "'omop-alchemy indexes enable --vocab' and 'omop-alchemy foreign-keys enable' to recover." - if _use_bulk_mode else "" - ) - row_count = 0 - for attempt in range(3): - try: - with so.Session(load_engine) as session: - _configure_loader_connection(session.connection(), db_schema=db_schema) - row_count = _load_vocab_model_csv( - session, - **loader_kwargs, # type: ignore[arg-type] - ) + recovery_hint = ( + " Indexes and FK triggers may still be disabled; run " + "'omop-alchemy indexes enable --vocab' and 'omop-alchemy foreign-keys enable' to recover." + if _use_bulk_mode else "" + ) + + row_count = 0 + _prev_attempt_was_crash = False + for attempt in range(3): + try: + with so.Session(load_engine) as session: + _configure_loader_connection(session.connection(), db_schema=db_schema) + if _prev_attempt_was_crash and merge_strategy == "insert_if_empty": + # A DB crash left partial data committed in this table. + # Truncate so insert_if_empty can retry cleanly. Safe because + # bulk_mode's manage_foreign_key_triggers ran ALTER TABLE ... + # DISABLE TRIGGER ALL on all vocabulary tables, and that state + # persists across crash+recovery in pg_trigger.tgenabled. + session.execute(sa.text(f'TRUNCATE TABLE "{model.__tablename__}"')) session.commit() - break - except Exception as exc: - if attempt < 2 and _is_retryable_error(exc): - time.sleep(10) - continue - raise VocabularyLoadError( - "Athena vocabulary load failed for " - f"table `{model.__tablename__}` from `{csv_path}` " - f"using merge strategy `{merge_strategy}` on backend `{engine.dialect.name}`. " - f"Underlying error: {exc.__class__.__name__}: {exc}" - + recovery_hint - ) from exc - - rows_cumulative += row_count - completed_units += item.size_bytes * LOAD_PROGRESS_FRACTION - _emit_progress( - progress_callback, - phase="load-complete", + row_count = _load_vocab_model_csv( + session, + model=model, + csv_path=csv_path, + merge_strategy=merge_strategy, + quote_mode="auto", + index_strategy="keep" if _use_bulk_mode else "auto", + chunksize=chunksize, + merge_batch_size=merge_batch_size, + ) + session.commit() + break + except Exception as exc: + if attempt < 2 and _is_retryable_error(exc): + _prev_attempt_was_crash = True + time.sleep(10) + continue + raise VocabularyLoadError( + "Athena vocabulary load failed for " + f"table `{model.__tablename__}` from `{csv_path}` " + f"using merge strategy `{merge_strategy}` on backend `{engine.dialect.name}`. " + f"Underlying error: {exc.__class__.__name__}: {exc}" + + recovery_hint + ) from exc + + rows_cumulative += row_count + results.append(VocabularyLoadResult( + table_name=model.__tablename__, + status="loaded", + row_count=row_count, + csv_path=str(csv_path), + required=required, + detail="Athena CSV loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", + )) + + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.DONE, table_name=model.__tablename__, table_index=table_index, table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=f"Loaded {model.__tablename__}; committing ({table_index}/{table_count})", + percent=table_index / table_count * 100, + detail=f"Loaded {model.__tablename__} ({table_index}/{table_count})", rows_this_table=row_count, rows_cumulative=rows_cumulative, - ) - completed_units += item.size_bytes * COMMIT_PROGRESS_FRACTION - _emit_progress( - progress_callback, - phase="commit-complete", - table_name=model.__tablename__, - table_index=table_index, - table_count=table_count, - completed_units=completed_units, - total_units=total_units, - detail=f"Committed {model.__tablename__} ({table_index}/{table_count})", - rows_this_table=row_count, - rows_cumulative=rows_cumulative, - ) - - results.append( - VocabularyLoadResult( - table_name=model.__tablename__, - status="loaded", - row_count=row_count, - csv_path=str(csv_path), - required=required, - detail="Athena CSV loaded via staged ORM CSV loader using tab-delimited input and auto-detected quote mode", - ) - ) - - _emit_progress( - progress_callback, - phase="complete", - table_name=None, - table_index=table_count, - table_count=table_count, - completed_units=total_units, - total_units=total_units, - detail="Athena vocabulary load complete", - rows_cumulative=rows_cumulative, - ) + )) if _use_bulk_mode: + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.REBUILDING_INDEXES, + table_name=None, + table_index=table_count, + table_count=table_count, + percent=100.0, + detail="Rebuilding indexes on vocabulary tables (may take 15+ min)...", + rows_cumulative=rows_cumulative, + )) manage_indexes( engine, enable=True, @@ -560,6 +479,16 @@ def load_vocab_source( db_schema=db_schema, dry_run=False, ) + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.REBUILDING_FK, + table_name=None, + table_index=table_count, + table_count=table_count, + percent=100.0, + detail="Re-enabling FK trigger checks...", + rows_cumulative=rows_cumulative, + )) manage_foreign_key_triggers( engine, enable=True, @@ -568,7 +497,16 @@ def load_vocab_source( dry_run=False, ) - results.extend(missing_optional_results) + if progress_callback is not None: + progress_callback(VocabularyLoadProgress( + phase=VocabularyLoadPhase.COMPLETE, + table_name=None, + table_index=table_count, + table_count=table_count, + percent=100.0, + detail="Athena vocabulary load complete", + rows_cumulative=rows_cumulative, + )) if not dry_run and engine.dialect.name == SupportedDialect.POSTGRESQL: sequence_results = reset_model_sequences( @@ -630,6 +568,14 @@ def load_vocab_source_command( "If the load fails mid-way, run `indexes enable --vocab` and `foreign-keys enable` to recover." ), ), + merge_batch_size: int = typer.Option( + 1_000_000, + help=( + "Maximum rows per INSERT/DELETE transaction during the staging-to-target merge. " + "Lower values reduce peak WAL pressure (safer on memory-constrained systems). " + "Raise to 5 000 000+ on machines with ample RAM for faster throughput." + ), + ), dry_run: bool = False, ) -> None: """Load all Athena vocabulary CSVs from the configured source path, optionally toggling indexes and FK triggers for speed.""" @@ -659,7 +605,7 @@ def load_vocab_source_command( def _update_progress(event: VocabularyLoadProgress) -> None: progress.update(task_id, completed=event.percent, description=event.detail) - if event.phase == "commit-complete" and event.table_name is not None: + if event.phase == VocabularyLoadPhase.DONE and event.table_name is not None: completed_tables.append(event.table_name) row_info = ( f": [dim]{event.rows_this_table:,} rows[/dim]" @@ -678,6 +624,7 @@ def _update_progress(event: VocabularyLoadProgress) -> None: merge_strategy=merge_strategy, chunksize=None if chunksize == 0 else chunksize, bulk_mode=bulk_mode, + merge_batch_size=merge_batch_size, progress_callback=_update_progress, ) progress.update(task_id, completed=100.0, description="Athena vocabulary load complete") diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index b3115be..4e44175 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -71,6 +71,7 @@ def fake_load_vocab_model_csv( quote_mode="auto", chunksize=None, index_strategy="auto", + merge_batch_size: int = 1_000_000, ) -> int: loaded_tables.append((model.__tablename__, merge_strategy, quote_mode, csv_path)) return 1 @@ -173,6 +174,7 @@ def fake_load_vocab_source( merge_strategy: MergeStrategy = "replace", chunksize: int | None = None, bulk_mode: bool = True, + merge_batch_size: int = 1_000_000, progress_callback=None, ): from omop_alchemy.maintenance.cli_vocab import VocabularyLoadReport, VocabularyLoadResult @@ -269,7 +271,7 @@ def create_staging_table(session): calls: dict[str, object] = {} - def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto"): + def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto", merge_batch_size: int = 1_000_000): calls["merge_strategy"] = merge_strategy calls["quote_mode"] = quote_mode calls["path"] = path @@ -314,6 +316,7 @@ def fake_load_vocab_model_csv( quote_mode="auto", chunksize=None, index_strategy="auto", + merge_batch_size: int = 1_000_000, ) -> int: loaded_order.append(model.__tablename__) return 1 @@ -348,6 +351,7 @@ def fake_load_vocab_model_csv( quote_mode="auto", chunksize=None, index_strategy="auto", + merge_batch_size: int = 1_000_000, ) -> int: return 1 @@ -374,7 +378,7 @@ def test_load_vocab_source_wraps_failed_table_load(monkeypatch, tmp_path): engine = sa.create_engine(f"sqlite:///{tmp_path / 'load_vocab_source_error.db'}", future=True) source_path = _build_required_athena_source(tmp_path) - def fake_load_vocab_model_csv(session, *, model, csv_path, merge_strategy, quote_mode="auto", chunksize=None, index_strategy="auto"): + def fake_load_vocab_model_csv(session, *, model, csv_path, merge_strategy, quote_mode="auto", chunksize=None, index_strategy="auto", merge_batch_size: int = 1_000_000): if model.__tablename__ == "domain": raise sa.exc.ProgrammingError( "COPY domain FROM STDIN", @@ -421,7 +425,7 @@ def create_staging_table(session): calls = {"load_csv": 0, "create_staging_table": 0} - def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto"): + def fake_load_csv(session, path, *, merge_strategy, quote_mode, index_strategy="auto", merge_batch_size: int = 1_000_000): calls["load_csv"] += 1 if calls["load_csv"] == 1: raise sa.exc.ProgrammingError( @@ -524,6 +528,7 @@ def fake_load_vocab_model_csv( quote_mode="auto", chunksize=None, index_strategy="auto", + merge_batch_size: int = 1_000_000, ) -> int: received_quote_modes.append(quote_mode) return 1 From 624c936e5bba4b6a874c2aa53c1fcdc26861aa59 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 27 May 2026 22:41:44 +0000 Subject: [PATCH 12/25] First pass inclusion of oa-configurator --- docs/getting-started/configuration.md | 54 +++++ docs/getting-started/installation.md | 70 +----- mkdocs.yml | 1 + omop_alchemy/__init__.py | 11 +- omop_alchemy/config.py | 32 +-- omop_alchemy/db.py | 208 +----------------- omop_alchemy/logger_config.py | 27 +-- omop_alchemy/maintenance/__init__.py | 6 - omop_alchemy/maintenance/_cli_utils.py | 109 ++++----- omop_alchemy/maintenance/cli.py | 13 +- omop_alchemy/maintenance/cli_config.py | 66 +----- omop_alchemy/maintenance/cli_foreign_keys.py | 25 +-- omop_alchemy/maintenance/cli_schema.py | 19 +- omop_alchemy/maintenance/cli_schema_doctor.py | 20 +- omop_alchemy/maintenance/cli_schema_info.py | 85 +++---- omop_alchemy/maintenance/cli_vocab.py | 19 +- omop_alchemy/maintenance/ui.py | 27 +-- pyproject.toml | 5 +- tests/test_cli_config.py | 131 +---------- tests/test_foreign_keys.py | 98 +++------ tests/test_fulltext.py | 25 ++- tests/test_indexes.py | 41 ++-- tests/test_load_vocab_source.py | 102 ++++----- tests/test_truncate_tables.py | 51 ++--- 24 files changed, 352 insertions(+), 893 deletions(-) create mode 100644 docs/getting-started/configuration.md diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 0000000..2843030 --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,54 @@ +# Configuration + +OMOP_Alchemy reads all database connection and schema settings from +[oa_configurator](https://github.com/AustralianCancerDataNetwork/oa-configurator) — no +`.env` files or `ENGINE` environment variables needed. + +## Minimal config + +Create `~/.config/omop/config.toml` with at least one connection and one resource: + +```toml +[connections.cdm] +dialect = "postgresql+psycopg2" +host = "localhost" +port = 5432 +user = "omop" +password = "changeme" +database = "omop_cdm" + +[resources.default] +primary_db = "cdm" +cdm_schema = "omop" +``` + +Run `omop-config init` to create this file interactively, or write it manually. + +## Vocabulary loading + +If you plan to load OMOP vocabulary from Athena CSV files, add the path to the package +extras section: + +```toml +[tools.omop_alchemy.extra] +athena_source_path = "/path/to/athena/csvs" +``` + +Or set it interactively: + +```bash +omop-config configure omop_alchemy +``` + +## Verify + +```bash +omop-alchemy info +``` + +This prints the resolved config file path, connection details, and schema. A successful +run confirms that OMOP_Alchemy can reach your database. + +## Further reading + +- [oa_configurator quickstart](https://AustralianCancerDataNetwork.github.io/oa-configurator/) — full config reference, multiple profiles, env var export diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index d8b7a47..48730f8 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -33,75 +33,9 @@ with so.Session(engine) as session: session.commit() ``` -## Connecting with OMOP_Alchemy-specific helpers +**Database configuration** is handled by oa_configurator. See [Configuration](configuration.md) for how to set up `~/.config/omop/config.toml`. -### Environment-based config - -```python -def load_environment(dotenv: str = '') -> None: -``` - -Loads environment variables from a .env file into the process environment. - -* If a specific .env path is provided, it is loaded first -* Otherwise, a default .env file is searched for - -```python -load_environment() - -load_environment("/etc/myapp/.env") -``` - -### Database engine resolution - - -```python -def get_engine_name(schema: str | None = None) -> str: -``` -Resolves a SQLAlchemy database engine URI from environment variables. - -If a `schema` is provided, resolution proceeds as follows: - -1. `ENGINE_` -2. `ENGINE` as fallback (if only one) - -Single DB .env example: - -``` -ENGINE=postgresql+psycopg://user:password@localhost:5432/omop - -engine_url = get_engine_name() -``` - -Multi-schema routing - -``` -ENGINE_CDM=postgresql+psycopg://user:password@localhost:5432/cdm -ENGINE_SOURCE=postgresql+psycopg://user:password@localhost:5432/source -ENGINE=postgresql+psycopg://user:password@localhost:5432/default - -cdm_engine = get_engine_name("cdm") -source_engine = get_engine_name("source") -default_engine = get_engine_name() -``` - -### Recommended patterns - -```python -from orm_loader.helpers import configure_logging, bootstrap -from omop_alchemy import get_engine_name, load_environment -import sqlalchemy as sa - -configure_logging() -load_environment() - -engine_string = get_engine_name('cdm') -engine = sa.create_engine(engine_string, future=True, echo=False) - -bootstrap(engine, create=True) -``` - -### Session & Engine Management for Bulk Operations +## Session & Engine Management for Bulk Operations ORM-loader module provides context managers for safely relaxing database constraints during high-volume operations such as CSV loads, staging-table merges, and backfills. diff --git a/mkdocs.yml b/mkdocs.yml index ee9ce8c..c4ce4b7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,6 +53,7 @@ nav: - Getting Started: - Overview: getting-started/index.md - Installation: getting-started/installation.md + - Configuration: getting-started/configuration.md - Quickstart: getting-started/quickstart.md - Maintenance CLI: getting-started/maintenance.md diff --git a/omop_alchemy/__init__.py b/omop_alchemy/__init__.py index b25b5a7..fd3bad5 100644 --- a/omop_alchemy/__init__.py +++ b/omop_alchemy/__init__.py @@ -1,11 +1,12 @@ -from .config import load_environment, TEST_PATH, ROOT_PATH -from .db import get_engine_name, create_engine_with_dependencies +from .config import ROOT_PATH, TEST_PATH, OmopAlchemyConfig, get_resolver, get_config +from .db import create_engine_with_dependencies __all__ = [ + "OmopAlchemyConfig", "create_engine_with_dependencies", - "load_environment", - "get_engine_name", - "TEST_PATH", + "get_config", + "get_resolver", "ROOT_PATH", + "TEST_PATH", ] diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index 95149dd..d8946a0 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -1,21 +1,27 @@ -from dotenv import load_dotenv +from __future__ import annotations + from pathlib import Path +from typing import ClassVar -from orm_loader.helpers import get_logger +from pydantic import Field +from oa_configurator import PackageConfigBase, Resolver, load_stack_config ROOT_PATH = Path(__file__).parent TEST_PATH = Path(__file__).parent.parent / "tests" -logger = get_logger(__name__) -def load_environment(dotenv: str = '') -> None: - """ - Explicitly load environment variables for the application. - Safe: does not log sensitive values. - """ - # Dotenv values should take precedence over inherited shell env vars. - if load_dotenv(dotenv, override=True) or load_dotenv(override=True): - logger.info("Environment variables loaded from .env file") - else: - logger.debug("No .env file loaded") +class OmopAlchemyConfig(PackageConfigBase): + tool_name: ClassVar[str] = "omop_alchemy" + + athena_source_path: str | None = Field( + default=None, + description="Path to Athena vocabulary CSV files.", + ) + + +def get_resolver() -> Resolver: + return Resolver(load_stack_config()) + +def get_config() -> OmopAlchemyConfig: + return OmopAlchemyConfig.from_stack(load_stack_config()) diff --git a/omop_alchemy/db.py b/omop_alchemy/db.py index da6517e..dc97d1e 100644 --- a/omop_alchemy/db.py +++ b/omop_alchemy/db.py @@ -1,200 +1,9 @@ from __future__ import annotations -from dataclasses import dataclass, asdict -from typing import Optional + from collections.abc import Mapping -import json -import os -from pathlib import Path -import tomllib -from sqlalchemy.engine import Engine import sqlalchemy as sa - -from .config import load_environment -import logging -logger = logging.getLogger(__name__) - -DEFAULTS_FILENAME = ".omop-maint.toml" -DEFAULTS_ENV_VAR = "OMOP_MAINT_DEFAULTS_FILE" -DEFAULTS_SECTION = "defaults" -LEGACY_DEFAULTS_SECTION = "connection" -PROJECT_MARKER = "pyproject.toml" - - -def defaults_path() -> Path: - configured_path = os.getenv(DEFAULTS_ENV_VAR) - if configured_path: - return Path(configured_path).expanduser().resolve() - - current = Path.cwd().resolve() - for directory in (current, *current.parents): - if (directory / PROJECT_MARKER).exists(): - return (directory / DEFAULTS_FILENAME).resolve() - - return (current / DEFAULTS_FILENAME).resolve() - -def _clean(value: object) -> str | None: - if value is None: - return None - value_str = str(value).strip() - return value_str or None - -def _relative_path_for_storage(config_path: Path, value: str | None) -> str | None: - cleaned = _clean(value) - if cleaned is None: - return None - - path_value = Path(cleaned).expanduser() - if not path_value.is_absolute(): - path_value = (Path.cwd() / path_value).resolve() - - return path_value.relative_to(config_path.parent).as_posix() - -def _resolve_relative_path(config_path: Path, value: object) -> str | None: - cleaned = _clean(value) - if cleaned is None: - return None - - path_value = Path(cleaned).expanduser() - if path_value.is_absolute(): - return str(path_value) - - return str((config_path.parent / path_value).resolve()) - -@dataclass(frozen=True) -class ConnectionDefaults: - """ - - Returns: - _type_: _description_ - """ - dotenv: Optional[str] = None - engine_schema: Optional[str] = None - db_schema: Optional[str] = None - athena_source: Optional[str] = None - logging: Optional[str] = None - - def to_dict(self) -> dict[str, Optional[str]]: - return asdict(self) - - def save(self) -> Path: - path = defaults_path() - path.parent.mkdir(parents=True, exist_ok=True) - - lines = [f"[{DEFAULTS_SECTION}]"] - dotenv = _relative_path_for_storage(path, self.dotenv) - if dotenv is not None: - lines.append(f"dotenv = {json.dumps(dotenv)}") - if self.engine_schema is not None: - lines.append(f"engine_schema = {json.dumps(self.engine_schema)}") - if self.db_schema is not None: - lines.append(f"db_schema = {json.dumps(self.db_schema)}") - athena_source = _relative_path_for_storage(path, self.athena_source) - if athena_source is not None: - lines.append(f"athena_source = {json.dumps(athena_source)}") - if self.logging is not None: - lines.append(f"logging = {json.dumps(self.logging)}") - lines.append("") - path.write_text("\n".join(lines), encoding="utf-8") - return path - - @classmethod - def load(cls) -> ConnectionDefaults: - path = defaults_path() - if not path.exists(): - return ConnectionDefaults() - - data = tomllib.loads(path.read_text(encoding="utf-8")) - defaults = data.get(DEFAULTS_SECTION, {}) - connection = data.get(LEGACY_DEFAULTS_SECTION, {}) - - if not isinstance(defaults, dict): - defaults = {} - if not isinstance(connection, dict): - connection = {} - - return ConnectionDefaults( - dotenv=_resolve_relative_path( - path, - defaults.get("dotenv", connection.get("dotenv")), - ), - engine_schema=_clean(defaults.get("engine_schema", connection.get("engine_schema"))), - db_schema=_clean(defaults.get("db_schema", connection.get("db_schema"))), - athena_source=_resolve_relative_path( - path, - defaults.get("athena_source", connection.get("athena_source")), - ), - logging=_clean(defaults.get("logging", connection.get("logging"))), - ) - - @classmethod - def update_and_save_defaults( - cls, - *, - dotenv: Optional[str] = None, - engine_schema: Optional[str] = None, - db_schema: Optional[str] = None, - athena_source: Optional[str] = None, - logging: Optional[str] = None, - ) -> tuple[ConnectionDefaults, Path]: - """Loads current defaults, allows update of any subset of values, and returns updated defaults after it has been saved.""" - current = cls.load() - updated = ConnectionDefaults( - dotenv=dotenv if dotenv is not None else current.dotenv, - engine_schema=engine_schema if engine_schema is not None else current.engine_schema, - db_schema=db_schema if db_schema is not None else current.db_schema, - athena_source=athena_source if athena_source is not None else current.athena_source, - logging=logging if logging is not None else current.logging, - ) - path = updated.save() - return updated, path - - -def resolve_connection( - *, - dotenv: str | None, - engine_schema: str | None, - db_schema: str | None, - athena_source: str | None = None, -) -> ConnectionDefaults: - saved = ConnectionDefaults.load() - return ConnectionDefaults( - dotenv=dotenv if dotenv is not None else saved.dotenv, - engine_schema=engine_schema if engine_schema is not None else saved.engine_schema, - db_schema=db_schema if db_schema is not None else saved.db_schema, - athena_source=athena_source if athena_source is not None else saved.athena_source, - ) - - -def get_engine_name(schema: str | None = None) -> str: - """ - Resolve database engine URI. - - Resolution order: - 1. ENGINE_ (if schema provided) - 2. ENGINE (fallback / legacy) - - Raises if nothing is configured. - """ - if schema: - key = f"ENGINE_{schema.upper()}" - engine = os.getenv(key) - if engine: - logger.info("Database engine configured for schema '%s'", schema) - return engine - else: - logger.debug( - "No schema-specific engine found for '%s' (%s)", - schema, - key, - ) - - engine = os.getenv("ENGINE") - if engine: - logger.info("Default database engine configured") - return engine - - raise RuntimeError(f"No database engine configured for {'schema ' + schema if schema else 'default'}. ") +from sqlalchemy.engine import Engine def _missing_driver_message( @@ -222,18 +31,12 @@ def _missing_driver_message( "`pip install -e '.[postgres]'`." ) -def build_engine(*, dotenv: str | None, engine_schema: str | None) -> Engine: - load_environment(dotenv or "") - return create_engine_with_dependencies(get_engine_name(engine_schema), future=True) - def create_engine_with_dependencies( engine_name: str, **engine_kwargs, ) -> sa.Engine: - """ - Create a SQLAlchemy engine with clearer dependency errors for postgres. - """ + """Create a SQLAlchemy engine with clearer dependency errors for postgres.""" try: return sa.create_engine(engine_name, **engine_kwargs) except ModuleNotFoundError as exc: @@ -242,12 +45,11 @@ def create_engine_with_dependencies( raise RuntimeError(message) from exc raise -# from orm-loader 0.4.0 onwards, implicit psycopg2 dependency has been removed in favor of explicit driver modules. + +# from orm-loader 0.4.0 onwards, implicit psycopg2 dependency has been removed in favor of explicit driver modules. # This mapping is used to provide clearer error messages when a required driver is missing. POSTGRES_DRIVER_MODULES: Mapping[str, str] = { "postgresql": "psycopg", # bare URL aliased to psycopg "postgresql+psycopg": "psycopg", "postgresql+psycopg2": "psycopg2", # retained so missing-driver message is clear } - - diff --git a/omop_alchemy/logger_config.py b/omop_alchemy/logger_config.py index 64e3375..36870f7 100644 --- a/omop_alchemy/logger_config.py +++ b/omop_alchemy/logger_config.py @@ -1,26 +1,5 @@ -import functools -import logging +from oa_configurator import configure_logging as _configure_logging -from .db import ConnectionDefaults, defaults_path -@functools.lru_cache(maxsize=None) -def configure_logging() -> None: - mode = (ConnectionDefaults.load().logging or "file").strip().lower() - if mode not in {"file", "console", "off"}: - mode = "file" - if mode == "off": - return - - formatter = logging.Formatter("%(asctime)s | %(levelname)-8s | %(name)s | %(message)s") - if mode == "file": - log_path = defaults_path().parent / "logging" / "omop-alchemy.log" - log_path.parent.mkdir(parents=True, exist_ok=True) - handler: logging.Handler = logging.FileHandler(log_path, encoding="utf-8") - else: - handler = logging.StreamHandler() - handler.setFormatter(formatter) - - root_logger = logging.getLogger() - root_logger.addHandler(handler) - if root_logger.level in {logging.NOTSET, logging.WARNING, logging.ERROR, logging.CRITICAL}: - root_logger.setLevel(logging.INFO) \ No newline at end of file +def configure_logging(verbosity: int = 0) -> None: + _configure_logging(verbosity=verbosity, extra_namespaces=["omop_alchemy"]) diff --git a/omop_alchemy/maintenance/__init__.py b/omop_alchemy/maintenance/__init__.py index a00068f..8d3d442 100644 --- a/omop_alchemy/maintenance/__init__.py +++ b/omop_alchemy/maintenance/__init__.py @@ -2,10 +2,6 @@ BackupFormat, BackupResult, ) -from .cli_config import ( - ConnectionDefaults, - defaults_path, -) from .cli_foreign_keys import ( ForeignKeyConstraintViolation, ForeignKeyManagementResult, @@ -73,7 +69,6 @@ "reset_model_sequences", "select_maintenance_tables", "truncate_tables", - "defaults_path", "AnalyzeTableResult", "BackupFormat", "BackupResult", @@ -84,7 +79,6 @@ "ForeignKeyValidationResult", "IndexManagementResult", "IndexTarget", - "ConnectionDefaults", "CommandSupport", "DoctorCheck", "DoctorRecommendation", diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index e7625ed..925d9df 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -1,64 +1,32 @@ -"""Shared utilities: the @omop_command decorator, error handling, connection resolution, and injected CLI parameter definitions.""" +"""Shared utilities: the @omop_command decorator, error handling, and injected CLI parameter definitions.""" from __future__ import annotations import functools import inspect -from typing import Any, Callable, Optional, TypeVar +from dataclasses import dataclass +from typing import Any, Callable, TypeVar import typer from sqlalchemy.exc import SQLAlchemyError +from oa_configurator import Resolver, load_stack_config + from .tables import TableScope from .ui import console, render_error, render_command_header -from ..db import build_engine, resolve_connection from ..backends import BackendNotSupportedError _F = TypeVar("_F", bound=Callable[..., Any]) -# ── Shared injected CLI params ──────────────────────────────────────────────── -# Built once and reused so every decorated command gets identical help text. - -_DOTENV_PARAM = inspect.Parameter( - "dotenv", - inspect.Parameter.POSITIONAL_OR_KEYWORD, - default=typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - annotation=Optional[str], -) -_ENGINE_SCHEMA_PARAM = inspect.Parameter( - "engine_schema", - inspect.Parameter.POSITIONAL_OR_KEYWORD, - default=typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - annotation=Optional[str], -) -_DB_SCHEMA_PARAM = inspect.Parameter( - "db_schema", - inspect.Parameter.POSITIONAL_OR_KEYWORD, - default=typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), - annotation=Optional[str], -) -_DRY_RUN_PARAM = inspect.Parameter( - "dry_run", - inspect.Parameter.POSITIONAL_OR_KEYWORD, - default=typer.Option( - False, - "--dry-run", - help="Preview planned actions without applying any changes to the database.", - ), - annotation=bool, -) - -_INJECTED_NAMES = {"dotenv", "engine_schema", "db_schema"} + +@dataclass(frozen=True) +class _ConnContext: + """Connection context derived from the oa_configurator resolved resource.""" + db_schema: str | None + engine_schema: str = "default" + dotenv: None = None # kept so existing conn.dotenv references compile + athena_source: str | None = None # from OmopAlchemyConfig.athena_source_path # ── Decorator ───────────────────────────────────────────────────────────────── @@ -72,31 +40,27 @@ def omop_command( ) -> Callable[[_F], _F]: """Decorator that eliminates CLI boilerplate for every omop-alchemy command. - Injects ``dotenv``, ``engine_schema``, ``db_schema`` (and optionally - ``dry_run``) into the Typer CLI signature, calls :func:`setup_cli_cmd`, - and wraps the body in ``try/except handle_error``. + Resolves the database connection from oa_configurator, calls + :func:`render_command_header`, and wraps the body in ``try/except handle_error``. - The decorated function must accept ``(conn, engine, ...)`` as its first - two positional parameters. The decorator supplies them. Any - ``vocabulary_included`` or ``athena_source`` parameter declared in the - function is automatically forwarded to :func:`setup_cli_cmd`. + The decorated function must accept ``(conn, engine, ...)`` as its first two + positional parameters. The decorator supplies them. """ def decorator(func: _F) -> _F: @functools.wraps(func) def wrapper(**kwargs: Any) -> Any: - dotenv = kwargs.pop("dotenv", None) - engine_schema = kwargs.pop("engine_schema", None) - db_schema = kwargs.pop("db_schema", None) - athena_source = kwargs.pop("athena_source", None) _dry_run = kwargs.pop("dry_run", False) if dry_run else False _vocab = kwargs.get("vocabulary_included", vocabulary_included) _mode = mode_label if mode_label is not None else ("dry-run" if _dry_run else "apply") try: - conn = resolve_connection( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - athena_source=athena_source, + from ..config import get_config + resolver = Resolver(load_stack_config()) + resolved = resolver.resolve_resource("default") + pkg_config = get_config() + engine = resolved.create_engine() + conn = _ConnContext( + db_schema=resolved.cdm_schema, + athena_source=pkg_config.athena_source_path, ) console.print( render_command_header( @@ -107,7 +71,6 @@ def wrapper(**kwargs: Any) -> Any: mode_label=_mode, ) ) - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) if dry_run: return func(conn, engine, dry_run=_dry_run, **kwargs) # type: ignore[arg-type] return func(conn, engine, **kwargs) # type: ignore[arg-type] @@ -116,17 +79,26 @@ def wrapper(**kwargs: Any) -> Any: # Rebuild the Typer-visible signature: # • skip conn/engine (decorator supplies them) - # • skip dotenv/engine_schema/db_schema (decorator injects them) - # • skip dry_run if the decorator owns it (to avoid duplication) + # • skip dry_run if the decorator owns it orig_params = list(inspect.signature(func).parameters.values()) func_params = [ p for p in orig_params[2:] - if p.name not in _INJECTED_NAMES - and not (dry_run and p.name == "dry_run") + if not (dry_run and p.name == "dry_run") ] - new_params = [_DOTENV_PARAM, _ENGINE_SCHEMA_PARAM, _DB_SCHEMA_PARAM] + func_params + new_params = func_params[:] if dry_run: - new_params.append(_DRY_RUN_PARAM) + new_params.append( + inspect.Parameter( + "dry_run", + inspect.Parameter.POSITIONAL_OR_KEYWORD, + default=typer.Option( + False, + "--dry-run", + help="Preview planned actions without applying any changes to the database.", + ), + annotation=bool, + ) + ) wrapper.__signature__ = inspect.signature(func).replace(parameters=new_params) # type: ignore[attr-defined] return wrapper # type: ignore[return-value] @@ -164,4 +136,3 @@ def resolve_selection( if selected is not None: return None, selected return scope or default_scope, None - diff --git a/omop_alchemy/maintenance/cli.py b/omop_alchemy/maintenance/cli.py index b62baba..0e5cffd 100644 --- a/omop_alchemy/maintenance/cli.py +++ b/omop_alchemy/maintenance/cli.py @@ -2,11 +2,12 @@ from __future__ import annotations +from typing import Annotated + import typer from . import ( cli_backup as backup, - cli_config as config, cli_foreign_keys as foreign_keys, cli_fulltext as fulltext, cli_indexes as indexes, @@ -28,7 +29,6 @@ ) # Subgroups -app.add_typer(config.app, name="config") app.add_typer(foreign_keys.app, name="foreign-keys") app.add_typer(indexes.app, name="indexes") app.add_typer(fulltext.app, name="fulltext") @@ -40,8 +40,13 @@ @app.callback() -def app_callback() -> None: - configure_logging() +def app_callback( + verbose: Annotated[ + int, + typer.Option("--verbose", "-v", count=True, help="Increase verbosity: -v=INFO, -vv=DEBUG"), + ] = 0, +) -> None: + configure_logging(verbose) def main() -> None: diff --git a/omop_alchemy/maintenance/cli_config.py b/omop_alchemy/maintenance/cli_config.py index 5c56e01..7e7056b 100644 --- a/omop_alchemy/maintenance/cli_config.py +++ b/omop_alchemy/maintenance/cli_config.py @@ -1,64 +1,4 @@ -from __future__ import annotations -import typer -from typing import Optional +"""Connection config management is now handled by omop-config (oa_configurator). -from ..db import ( - ConnectionDefaults, - defaults_path, -) -from .ui import console, render_connection_defaults - - -DEFAULTS_FILENAME = ".omop-maint.toml" -DEFAULTS_ENV_VAR = "OMOP_MAINT_DEFAULTS_FILE" -DEFAULTS_SECTION = "defaults" -LEGACY_DEFAULTS_SECTION = "connection" -PROJECT_MARKER = "pyproject.toml" - - -app = typer.Typer( - help="Manage persisted maintenance CLI connection overrides stored in .omop-maint.toml.", - rich_markup_mode="rich", -) - - -@app.command("show") -def config_show_command() -> None: - """Display current saved connection defaults from the nearest .omop-maint.toml file.""" - defaults = ConnectionDefaults.load() - console.print(render_connection_defaults(defaults, path=str(defaults_path()))) - - -@app.command("override") -def config_override_command( - dotenv: Optional[str] = typer.Option( - None, - help=( - "Path to a .env file to load before resolving the connection. " - "Saved as a path relative to .omop-maint.toml and resolved back to absolute on load." - ), - ), - engine_schema: Optional[str] = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: Optional[str] = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), - athena_source: Optional[str] = typer.Option( - None, - help=( - "Path to the unzipped Athena vocabulary CSV directory. " - "Saved relative to .omop-maint.toml; used by load-vocab-source when --athena-source is omitted." - ), - ), -) -> None: - """Persist one or more connection overrides to .omop-maint.toml for future CLI invocations.""" - updated, path = ConnectionDefaults.update_and_save_defaults( - dotenv=dotenv, - engine_schema=engine_schema, - db_schema=db_schema, - athena_source=athena_source, - ) - console.print(render_connection_defaults(updated, path=str(path), title="Saved Overrides")) +Use `omop-config configure omop_alchemy` to set package-specific options. +""" diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index 70a06f8..5c67631 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -7,7 +7,8 @@ import sqlalchemy as sa import typer -from ..db import build_engine, resolve_connection +from oa_configurator import Resolver, load_stack_config +from ._cli_utils import _ConnContext from ..backends import Backend, resolve_backend, require_backend_support, backend_support_note from ._cli_utils import handle_error, omop_command from .tables import ( @@ -474,18 +475,6 @@ def disable_foreign_keys_command( @app.command("enable") def enable_foreign_keys_command( - dotenv: str | None = typer.Option( - None, - help="Path to a .env file to load before resolving the connection. Overrides the saved DOTENV default.", - ), - engine_schema: str | None = typer.Option( - None, - help="Named engine configuration to use (e.g. 'cdm', 'results'). Resolves to the ENGINE_ environment variable group.", - ), - db_schema: str | None = typer.Option( - None, - help="Database schema to target (e.g. 'cdm5', 'vocab'). Sets search_path on PostgreSQL; not supported on SQLite.", - ), vocabulary_included: bool = typer.Option( False, "--vocab/--no-vocab", @@ -503,7 +492,14 @@ def enable_foreign_keys_command( ), ) -> None: """Re-enable PostgreSQL RI trigger enforcement. Use --strict to abort if any violations exist first.""" - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) + try: + resolver = Resolver(load_stack_config()) + resolved = resolver.resolve_resource("default") + conn = _ConnContext(db_schema=resolved.cdm_schema) + engine = resolved.create_engine() + except Exception as exc: + handle_error(exc) + return console.print( render_command_header( command_name="foreign-keys enable --strict" if strict else "foreign-keys enable", @@ -514,7 +510,6 @@ def enable_foreign_keys_command( ) ) try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) status_msg = ( "Validating and enabling PostgreSQL foreign key trigger enforcement..." if strict diff --git a/omop_alchemy/maintenance/cli_schema.py b/omop_alchemy/maintenance/cli_schema.py index 4631235..7dbc7e4 100644 --- a/omop_alchemy/maintenance/cli_schema.py +++ b/omop_alchemy/maintenance/cli_schema.py @@ -4,8 +4,6 @@ import typer -from omop_alchemy import load_environment - from ._cli_utils import omop_command from .cli_schema_doctor import ( DoctorCheck as DoctorCheck, @@ -74,14 +72,8 @@ def info_command( ), ) -> None: """Inspect maintenance CLI readiness, backend compatibility, and current installation state.""" - load_environment(conn.dotenv or "") with console.status("Inspecting maintenance environment..."): - info = collect_maintenance_info( - dotenv=conn.dotenv, - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - ) + info = collect_maintenance_info(vocabulary_included=vocabulary_included) console.print(render_info_environment(info)) console.print(render_info_database(info)) console.print(render_info_dependencies(info)) @@ -106,15 +98,8 @@ def doctor_command( ), ) -> None: """Run a read-only maintenance health check across connection readiness, schema drift, and FK state.""" - load_environment(conn.dotenv or "") with console.status("Running maintenance doctor checks..."): - report = collect_doctor_report( - dotenv=conn.dotenv, - engine_schema=conn.engine_schema, - db_schema=conn.db_schema, - vocabulary_included=vocabulary_included, - deep=deep, - ) + report = collect_doctor_report(vocabulary_included=vocabulary_included, deep=deep) console.print(render_info_environment(report.info)) console.print(render_info_database(report.info)) console.print(render_doctor_checks(report.checks)) diff --git a/omop_alchemy/maintenance/cli_schema_doctor.py b/omop_alchemy/maintenance/cli_schema_doctor.py index 95d10f0..887f8de 100644 --- a/omop_alchemy/maintenance/cli_schema_doctor.py +++ b/omop_alchemy/maintenance/cli_schema_doctor.py @@ -4,9 +4,9 @@ from dataclasses import dataclass -from omop_alchemy import create_engine_with_dependencies, load_environment +from oa_configurator import Resolver, load_stack_config from omop_alchemy.backends.resolve import SupportedDialect -from omop_alchemy.db import get_engine_name +from omop_alchemy.db import create_engine_with_dependencies from .cli_foreign_keys import ( ForeignKeyStatusResult, @@ -155,20 +155,11 @@ def _build_recommendations( def collect_doctor_report( *, - engine_schema: str | None = None, - db_schema: str | None = None, - dotenv: str | None = None, vocabulary_included: bool = True, deep: bool = False, ) -> DoctorReport: """Run all maintenance health checks and return a prioritised report with recommendations.""" - load_environment(dotenv or "") - info = collect_maintenance_info( - engine_schema=engine_schema, - db_schema=db_schema, - dotenv=dotenv, - vocabulary_included=vocabulary_included, - ) + info = collect_maintenance_info(vocabulary_included=vocabulary_included) checks = [ DoctorCheck( @@ -187,7 +178,10 @@ def collect_doctor_report( foreign_key_validation: ForeignKeyValidationReport | None = None if info.connection_ready: - engine = create_engine_with_dependencies(get_engine_name(engine_schema), future=True) + resolver = Resolver(load_stack_config()) + resolved = resolver.resolve_resource("default") + engine = resolved.create_engine() + db_schema = resolved.cdm_schema try: missing_table_count = info.missing_table_count or 0 checks.append( diff --git a/omop_alchemy/maintenance/cli_schema_info.py b/omop_alchemy/maintenance/cli_schema_info.py index 55f568e..143a5de 100644 --- a/omop_alchemy/maintenance/cli_schema_info.py +++ b/omop_alchemy/maintenance/cli_schema_info.py @@ -5,17 +5,16 @@ from dataclasses import dataclass import importlib.metadata import importlib.util -import os import shutil import sqlalchemy as sa from sqlalchemy.exc import SQLAlchemyError -from omop_alchemy import create_engine_with_dependencies, load_environment +from oa_configurator import Resolver, load_stack_config +from oa_configurator.loader import DEFAULT_CONFIG_PATH from omop_alchemy.backends.resolve import SupportedDialect -from omop_alchemy.db import get_engine_name +from omop_alchemy.db import create_engine_with_dependencies -from .cli_config import defaults_path from .cli_schema_tables import collect_missing_tables from .tables import ( TableCategory, @@ -63,11 +62,9 @@ class MaintenanceInfo: pg_dump_path: str | None pg_restore_path: str | None psql_path: str | None - defaults_file: str - defaults_exists: bool - dotenv_path: str | None - dotenv_exists: bool | None - engine_schema: str | None + config_file: str + config_exists: bool + resource_name: str db_schema: str | None engine_url: str | None backend: str | None @@ -306,17 +303,13 @@ def _command_support_for_backend( def collect_maintenance_info( *, - engine_schema: str | None = None, - db_schema: str | None = None, - dotenv: str | None = None, vocabulary_included: bool = True, ) -> MaintenanceInfo: """Probe the current environment: resolve config, attempt a connection, and assess per-command readiness.""" - load_environment(dotenv or "") pg_dump_path = shutil.which("pg_dump") pg_restore_path = shutil.which("pg_restore") psql_path = shutil.which("psql") - defaults_file = defaults_path() + config_file = DEFAULT_CONFIG_PATH dependencies = ( _dependency_status("sqlalchemy", "sqlalchemy"), _dependency_status("typer", "typer"), @@ -331,9 +324,9 @@ def collect_maintenance_info( exclude_categories=(() if vocabulary_included else (TableCategory.VOCABULARY,)) ) cli_path = shutil.which("omop-alchemy") - dotenv_exists = None if dotenv is None else os.path.exists(dotenv) - engine_name: str | None = None + resource_name = "default" + db_schema: str | None = None engine_url: str | None = None backend: str | None = None engine_created = False @@ -344,41 +337,37 @@ def collect_maintenance_info( missing_table_count: int | None = None try: - engine_name = get_engine_name(engine_schema) - url = sa.engine.make_url(engine_name) - engine_url = url.render_as_string(hide_password=True) - backend = url.get_backend_name() + resolver = Resolver(load_stack_config()) + resolved = resolver.resolve_resource(resource_name) + db_schema = resolved.cdm_schema + raw_url = sa.engine.make_url(resolved.primary_db.url) + engine_url = raw_url.render_as_string(hide_password=True) + backend = raw_url.get_backend_name() + engine = resolved.create_engine() + engine_created = True except RuntimeError as exc: engine_error = str(exc) except Exception as exc: engine_error = f"Could not resolve engine configuration: {exc}" - if engine_name is not None: + if engine_created: try: - engine = create_engine_with_dependencies(engine_name, future=True) - engine_created = True - except RuntimeError as exc: - engine_error = str(exc) + with engine.connect() as connection: + connection.exec_driver_sql("SELECT 1") + connection_ready = True + missing_tables = collect_missing_tables( + engine, + db_schema=db_schema, + vocabulary_included=vocabulary_included, + ) + missing_table_count = len(missing_tables) + existing_table_count = len(managed_tables) - missing_table_count + except SQLAlchemyError as exc: + connection_error = f"{exc.__class__.__name__}: {exc}" except Exception as exc: - engine_error = f"Could not create engine: {exc}" - else: - try: - with engine.connect() as connection: - connection.exec_driver_sql("SELECT 1") - connection_ready = True - missing_tables = collect_missing_tables( - engine, - db_schema=db_schema, - vocabulary_included=vocabulary_included, - ) - missing_table_count = len(missing_tables) - existing_table_count = len(managed_tables) - missing_table_count - except SQLAlchemyError as exc: - connection_error = f"{exc.__class__.__name__}: {exc}" - except Exception as exc: - connection_error = str(exc) - finally: - engine.dispose() + connection_error = str(exc) + finally: + engine.dispose() if backend is None: command_support = _command_support_for_unavailable_engine( @@ -402,11 +391,9 @@ def collect_maintenance_info( pg_dump_path=pg_dump_path, pg_restore_path=pg_restore_path, psql_path=psql_path, - defaults_file=str(defaults_file), - defaults_exists=defaults_file.exists(), - dotenv_path=dotenv, - dotenv_exists=dotenv_exists, - engine_schema=engine_schema, + config_file=str(config_file), + config_exists=config_file.exists(), + resource_name=resource_name, db_schema=db_schema, engine_url=engine_url, backend=backend, diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 8418806..2249f3b 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -579,15 +579,16 @@ def load_vocab_source_command( dry_run: bool = False, ) -> None: """Load all Athena vocabulary CSVs from the configured source path, optionally toggling indexes and FK triggers for speed.""" - if conn.athena_source is None: - console.print( - render_error( - "No Athena vocabulary source path is configured. " - "Set it with `omop-alchemy config override --athena-source ` " - "or pass `--athena-source`." - ) + effective_athena_source = athena_source or conn.athena_source + if effective_athena_source is None: + console.print( + render_error( + "No Athena vocabulary source path is configured. " + "Set it with `omop-config configure omop_alchemy` " + "or pass `--athena-source`." ) - raise typer.Exit(code=1) + ) + raise typer.Exit(code=1) with Progress( SpinnerColumn(), @@ -618,7 +619,7 @@ def _update_progress(event: VocabularyLoadProgress) -> None: report = load_vocab_source( engine, - source_path=conn.athena_source, + source_path=effective_athena_source, db_schema=conn.db_schema, dry_run=dry_run, merge_strategy=merge_strategy, diff --git a/omop_alchemy/maintenance/ui.py b/omop_alchemy/maintenance/ui.py index 0be28e5..4f67cc8 100644 --- a/omop_alchemy/maintenance/ui.py +++ b/omop_alchemy/maintenance/ui.py @@ -18,7 +18,6 @@ if TYPE_CHECKING: from .cli_backup import BackupResult - from .cli_config import ConnectionDefaults from .cli_foreign_keys import ( ForeignKeyConstraintViolation, ForeignKeyManagementResult, @@ -140,20 +139,6 @@ def render_error(message: str, *, title: str = "Error") -> Panel: ) -def render_connection_defaults( - defaults: ConnectionDefaults, - *, - path: str, - title: str = "Connection Defaults", -) -> Panel: - grid = Table.grid(padding=(0, 2)) - grid.add_column(style="bold cyan") - grid.add_column() - grid.add_row("File", path) - for param, value in defaults.to_dict().items(): - grid.add_row(param.replace("_", " ").title(), value or "-") - return Panel.fit(grid, title=f"[bold]{title}[/bold]", border_style="blue") - def _status_text(status: str) -> Text: return Text(status.upper(), style=STATUS_STYLES.get(status, "white")) @@ -187,16 +172,14 @@ def render_info_environment(info: MaintenanceInfo) -> Panel: grid.add_row("pg_restore", info.pg_restore_path or "not on PATH") grid.add_row("psql", info.psql_path or "not on PATH") grid.add_row( - "Defaults file", + "Config file", Text( - info.defaults_file, - style="green" if info.defaults_exists else "yellow", + info.config_file, + style="green" if info.config_exists else "yellow", ), ) - grid.add_row("dotenv", info.dotenv_path or "-") - grid.add_row("dotenv exists", _optional_bool_label(info.dotenv_exists)) - grid.add_row("engine_schema", info.engine_schema or "-") - grid.add_row("db_schema", info.db_schema or "default search_path") + grid.add_row("Resource", info.resource_name) + grid.add_row("CDM schema", info.db_schema or "default search_path") return Panel.fit(grid, title="[bold]Environment[/bold]", border_style="magenta") diff --git a/pyproject.toml b/pyproject.toml index 965ad27..32a79b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "sqlalchemy>=2.0.45", "pandas>=2.0", "pyyaml>=6.0", - "python-dotenv>=1.2.2", + "oa-configurator>=0.2.0", "typer>=0.12", "rich>=13.0", "orm-loader>=0.4.1", @@ -70,6 +70,9 @@ Issues = "https://github.com/AustralianCancerDataNetwork/OMOP_Alchemy/issues" [project.scripts] omop-alchemy = "omop_alchemy.maintenance.cli:main" +[project.entry-points."omop.config"] +omop_alchemy = "omop_alchemy.config:OmopAlchemyConfig" + [build-system] requires = ["setuptools>=68", "wheel"] diff --git a/tests/test_cli_config.py b/tests/test_cli_config.py index 2f84928..469bed2 100644 --- a/tests/test_cli_config.py +++ b/tests/test_cli_config.py @@ -1,132 +1,19 @@ +"""Tests for omop_alchemy CLI — entry point smoke tests.""" + from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.cli_config import defaults_path, ConnectionDefaults -from omop_alchemy.maintenance.cli_indexes import IndexManagementResult - runner = CliRunner() -def test_config_set_overrides_and_show(): - """Config override persists values and config show surfaces them.""" - with runner.isolated_filesystem(): - result = runner.invoke( - app, - [ - "config", - "override", - "--dotenv", - ".env.test", - "--engine-schema", - "cdm", - "--db-schema", - "public", - "--athena-source", - "athena_source", - ], - ) - - assert result.exit_code == 0 - assert defaults_path().exists() - loaded_defaults = ConnectionDefaults.load() - assert loaded_defaults.dotenv == str((defaults_path().parent / ".env.test").resolve()) - assert loaded_defaults.engine_schema == "cdm" - assert loaded_defaults.db_schema == "public" - assert loaded_defaults.athena_source == str((defaults_path().parent / "athena_source").resolve()) - - show_result = runner.invoke(app, ["config", "show"]) - assert show_result.exit_code == 0 - assert "cdm" in show_result.stdout - assert "public" in show_result.stdout - assert "athena_source" in show_result.stdout - - -def test_cli_uses_saved_connection_defaults(monkeypatch): - """CLI commands consume persisted connection defaults when flags are omitted.""" - calls: dict[str, object] = {} - - def fake_load_environment(dotenv: str) -> None: - calls["dotenv"] = dotenv - - def fake_get_engine_name(schema: str | None = None) -> str: - calls["engine_schema"] = schema - return "postgresql+psycopg://example" - - def fake_create_engine(url: str, *, future: bool) -> str: - calls["engine_url"] = url - calls["future"] = future - return "ENGINE" - - def fake_manage_indexes( - engine: object, - *, - enable: bool, - db_schema: str | None = None, - vocabulary_included: bool = False, - dry_run: bool = False, - ) -> list[IndexManagementResult]: - calls["engine"] = engine - calls["enable"] = enable - calls["db_schema"] = db_schema - calls["vocabulary_included"] = vocabulary_included - calls["dry_run"] = dry_run - return [] - - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, - ) - monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, - ) - monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, - ) - monkeypatch.setattr( - "omop_alchemy.maintenance.cli_indexes.manage_indexes", - fake_manage_indexes, - ) - - with runner.isolated_filesystem(): - set_result = runner.invoke( - app, - [ - "config", - "override", - "--dotenv", - ".env.saved", - "--engine-schema", - "cdm", - "--db-schema", - "public", - ], - ) - assert set_result.exit_code == 0 - expected_dotenv = str((defaults_path().parent / ".env.saved").resolve()) - - result = runner.invoke(app, ["indexes", "disable", "--dry-run"]) - +def test_help_exits_cleanly(): + """--help exits 0 without raising.""" + result = runner.invoke(app, ["--help"]) assert result.exit_code == 0 - assert calls["dotenv"] == expected_dotenv - assert calls["engine_schema"] == "cdm" - assert calls["db_schema"] == "public" - - -def test_config_show_surfaces_manual_logging_setting() -> None: - """Config show surfaces manually configured logging mode from defaults file.""" - with runner.isolated_filesystem(): - defaults_path().write_text( - "[defaults]\nlogging = \"off\"\n", - encoding="utf-8", - ) - loaded_defaults = ConnectionDefaults.load() - assert loaded_defaults.logging == "off" - show_result = runner.invoke(app, ["config", "show"]) - assert show_result.exit_code == 0 - assert "Logging" in show_result.stdout - assert "off" in show_result.stdout +def test_no_config_subcommand(): + """The old 'config' subcommand no longer exists; config is managed via omop-config.""" + result = runner.invoke(app, ["config", "--help"]) + assert result.exit_code != 0 diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py index d97691b..368a736 100644 --- a/tests/test_foreign_keys.py +++ b/tests/test_foreign_keys.py @@ -72,26 +72,19 @@ def test_validate_foreign_key_constraints_is_safe_on_sqlite(tmp_path): def test_disable_foreign_keys_cli_fails_gracefully_for_sqlite(monkeypatch): """Test disable foreign keys cli fails gracefully for sqlite.""" - def fake_load_environment(dotenv: str) -> None: - return None + from oa_configurator import StackConfig - def fake_get_engine_name(schema: str | None = None) -> str: - return "sqlite:///:memory:" - - def fake_create_engine(url: str, *, future: bool) -> sa.Engine: - return sa.create_engine(url, future=future) - - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, ) monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, + "omop_alchemy.config.load_stack_config", + lambda: cfg, ) result = runner.invoke( @@ -252,19 +245,22 @@ def begin(self): def test_enable_foreign_keys_strict_cli_invokes_strict_management(monkeypatch): """Test enable foreign keys strict cli invokes strict management.""" - calls: dict[str, object] = {} + from oa_configurator import StackConfig - def fake_load_environment(dotenv: str) -> None: - calls["dotenv"] = dotenv - - def fake_get_engine_name(schema: str | None = None) -> str: - calls["engine_schema"] = schema - return "postgresql+psycopg://example" + calls: dict[str, object] = {} - def fake_create_engine(url: str, *, future: bool) -> str: - calls["engine_url"] = url - calls["future"] = future - return "ENGINE" + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + ) + monkeypatch.setattr( + "omop_alchemy.maintenance.cli_foreign_keys.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fake_manage_foreign_key_triggers( engine: object, @@ -283,18 +279,6 @@ def fake_manage_foreign_key_triggers( calls["strict"] = strict return [] - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, - ) - monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, - ) - monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_foreign_keys.manage_foreign_key_triggers", fake_manage_foreign_key_triggers, @@ -374,19 +358,22 @@ def connect(self): def test_foreign_keys_validate_cli_invokes_validation(monkeypatch): """Test foreign keys validate cli invokes validation.""" - calls: dict[str, object] = {} + from oa_configurator import StackConfig - def fake_load_environment(dotenv: str) -> None: - calls["dotenv"] = dotenv - - def fake_get_engine_name(schema: str | None = None) -> str: - calls["engine_schema"] = schema - return "postgresql+psycopg://example" + calls: dict[str, object] = {} - def fake_create_engine(url: str, *, future: bool) -> str: - calls["engine_url"] = url - calls["future"] = future - return "ENGINE" + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + ) + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fake_validate_foreign_key_constraints( engine: object, @@ -429,18 +416,6 @@ def fake_validate_foreign_key_constraints( ), ) - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, - ) - monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, - ) - monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_foreign_keys.validate_foreign_key_constraints", fake_validate_foreign_key_constraints, @@ -452,7 +427,6 @@ def fake_validate_foreign_key_constraints( ) assert result.exit_code == 0 - assert calls["engine"] == "ENGINE" assert "foreign-keys validate" in result.stdout assert "Violations" in result.stdout assert "visit_occurrence" in result.stdout diff --git a/tests/test_fulltext.py b/tests/test_fulltext.py index c22bc9d..c13f53f 100644 --- a/tests/test_fulltext.py +++ b/tests/test_fulltext.py @@ -198,12 +198,22 @@ def test_fulltext_management_requires_postgresql(tmp_path, fn_name): def test_fulltext_install_cli_passes_options(monkeypatch): """CLI forwards install options to the fulltext handler implementation.""" + from oa_configurator import StackConfig + calls: dict[str, object] = {} - def fake_build_engine(*, dotenv: str | None, engine_schema: str | None): - calls["dotenv"] = dotenv - calls["engine_schema"] = engine_schema - return "ENGINE" + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "public"}}, + ) + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fake_install_fulltext_columns( engine: object, @@ -231,10 +241,6 @@ def fake_install_fulltext_columns( ), ) - monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.build_engine", - fake_build_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_fulltext.install_fulltext_columns", fake_install_fulltext_columns, @@ -246,14 +252,11 @@ def fake_install_fulltext_columns( "fulltext", "install", "--dry-run", - "--db-schema", - "public", "--fastupdate", ], ) assert result.exit_code == 0 - assert calls["engine"] == "ENGINE" assert calls["db_schema"] == "public" assert calls["fastupdate"] is True assert calls["dry_run"] is True diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 4187dd2..06a74d9 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -116,19 +116,22 @@ def test_manage_indexes_disable_and_enable_on_sqlite(tmp_path): def test_disable_indexes_cli_invokes_management(monkeypatch): """Test disable indexes cli invokes management.""" - calls: dict[str, object] = {} - - def fake_load_environment(dotenv: str) -> None: - calls["dotenv"] = dotenv + from oa_configurator import StackConfig - def fake_get_engine_name(schema: str | None = None) -> str: - calls["engine_schema"] = schema - return "postgresql+psycopg://example" + calls: dict[str, object] = {} - def fake_create_engine(url: str, *, future: bool) -> str: - calls["engine_url"] = url - calls["future"] = future - return "ENGINE" + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + ) + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fake_manage_indexes( engine: object, @@ -160,18 +163,6 @@ def fake_manage_indexes( ) ] - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, - ) - monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, - ) - monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_indexes.manage_indexes", fake_manage_indexes, @@ -182,10 +173,6 @@ def fake_manage_indexes( [ "indexes", "disable", - "--dotenv", - ".env.test", - "--engine-schema", - "cdm", "--dry-run", ], ) diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index 4e44175..fa6cc56 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -6,7 +6,6 @@ from typer.testing import CliRunner from omop_alchemy.maintenance.cli import app -from omop_alchemy.maintenance.cli_config import defaults_path from omop_alchemy.maintenance.cli_vocab import ( OPTIONAL_VOCAB_MODELS, REQUIRED_VOCAB_MODELS, @@ -149,21 +148,29 @@ def test_load_vocab_source_dry_run_does_not_create_tables(tmp_path): assert not inspector.has_table("concept") -def test_load_vocab_source_cli_uses_saved_athena_source(monkeypatch): - """Test load vocab source cli uses saved athena source.""" - calls: dict[str, object] = {} +def test_load_vocab_source_cli_uses_configured_athena_source(monkeypatch, tmp_path): + """load-vocab-source CLI uses athena_source_path from OmopAlchemyConfig when --athena-source is omitted.""" + from oa_configurator import StackConfig, Resolver + from omop_alchemy.maintenance.cli_vocab import VocabularyLoadReport, VocabularyLoadResult - def fake_load_environment(dotenv: str) -> None: - calls["dotenv"] = dotenv + calls: dict[str, object] = {} + athena_dir = tmp_path / "athena_source" + athena_dir.mkdir() - def fake_get_engine_name(schema: str | None = None) -> str: - calls["engine_schema"] = schema - return "sqlite:///:memory:" + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + tools={"omop_alchemy": {"extra": {"athena_source_path": str(athena_dir)}}}, + ) - def fake_create_engine(url: str, *, future: bool) -> str: - calls["engine_url"] = url - calls["future"] = future - return "ENGINE" + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fake_load_vocab_source( engine: object, @@ -177,13 +184,8 @@ def fake_load_vocab_source( merge_batch_size: int = 1_000_000, progress_callback=None, ): - from omop_alchemy.maintenance.cli_vocab import VocabularyLoadReport, VocabularyLoadResult - - calls["engine"] = engine calls["source_path"] = str(source_path) - calls["db_schema"] = db_schema calls["dry_run"] = dry_run - calls["merge_strategy"] = merge_strategy return VocabularyLoadReport( source_path=str(source_path), backend="sqlite", @@ -203,51 +205,17 @@ def fake_load_vocab_source( ), ) - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, - ) - monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, - ) - monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_vocab.load_vocab_source", fake_load_vocab_source, ) - with runner.isolated_filesystem(): - athena_dir = Path("athena_source") - athena_dir.mkdir() - set_result = runner.invoke( - app, - [ - "config", - "override", - "--athena-source", - str(athena_dir), - "--engine-schema", - "cdm", - ], - ) - assert set_result.exit_code == 0 + result = runner.invoke(app, ["load-vocab-source", "--dry-run"]) - result = runner.invoke( - app, - ["load-vocab-source", "--dry-run"], - ) - - expected_source_path = str((defaults_path().parent / "athena_source").resolve()) - assert result.exit_code == 0 - assert calls["engine"] == "ENGINE" - assert calls["source_path"] == expected_source_path - assert calls["merge_strategy"] == "replace" - assert "load-vocab-source" in result.stdout - assert "concept" in result.stdout + assert result.exit_code == 0, result.output + assert calls["source_path"] == str(athena_dir) + assert calls["dry_run"] is True + assert "load-vocab-source" in result.stdout def test_load_vocab_model_csv_passes_quote_mode(monkeypatch, tmp_path): @@ -457,8 +425,20 @@ def fake_create_staging_table(session): def test_load_vocab_source_cli_surfaces_database_error_detail(monkeypatch): """Test load vocab source cli surfaces database error detail.""" - def fake_build_engine(*, dotenv: str | None, engine_schema: str | None): - return "ENGINE" + from oa_configurator import StackConfig + + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + ) + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fail_load_vocab_source(*args, **kwargs): raise sa.exc.ProgrammingError( @@ -467,10 +447,6 @@ def fail_load_vocab_source(*args, **kwargs): Exception("value too long for type character varying(255)"), ) - monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.build_engine", - fake_build_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_vocab.load_vocab_source", fail_load_vocab_source, diff --git a/tests/test_truncate_tables.py b/tests/test_truncate_tables.py index ab9a29c..24dc0d5 100644 --- a/tests/test_truncate_tables.py +++ b/tests/test_truncate_tables.py @@ -40,13 +40,19 @@ def test_truncate_tables_reports_blocking_foreign_key_references(monkeypatch, tm def test_truncate_tables_cli_requires_confirmation(monkeypatch): """Test truncate tables cli requires confirmation.""" + from oa_configurator import StackConfig + + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.build_engine", - lambda *, dotenv, engine_schema: "ENGINE", + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, ) monkeypatch.setattr( - "omop_alchemy.db.resolve_connection", - lambda **kwargs: type("C", (), {"dotenv": None, "engine_schema": None, "db_schema": None, "athena_source": None})(), + "omop_alchemy.config.load_stack_config", + lambda: cfg, ) result = runner.invoke(app, ["truncate-tables", "--scope", "clinical"]) @@ -56,19 +62,22 @@ def test_truncate_tables_cli_requires_confirmation(monkeypatch): def test_truncate_tables_cli_invokes_management(monkeypatch): """Test truncate tables cli invokes management.""" - calls: dict[str, object] = {} - - def fake_load_environment(dotenv: str) -> None: - calls["dotenv"] = dotenv + from oa_configurator import StackConfig - def fake_get_engine_name(schema: str | None = None) -> str: - calls["engine_schema"] = schema - return "postgresql+psycopg://example" + calls: dict[str, object] = {} - def fake_create_engine(url: str, *, future: bool) -> str: - calls["engine_url"] = url - calls["future"] = future - return "ENGINE" + cfg = StackConfig.for_session( + connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, + resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + ) + monkeypatch.setattr( + "omop_alchemy.maintenance._cli_utils.load_stack_config", + lambda: cfg, + ) + monkeypatch.setattr( + "omop_alchemy.config.load_stack_config", + lambda: cfg, + ) def fake_truncate_tables( engine: object, @@ -99,18 +108,6 @@ def fake_truncate_tables( ) ] - monkeypatch.setattr( - "omop_alchemy.db.load_environment", - fake_load_environment, - ) - monkeypatch.setattr( - "omop_alchemy.db.get_engine_name", - fake_get_engine_name, - ) - monkeypatch.setattr( - "omop_alchemy.db.create_engine_with_dependencies", - fake_create_engine, - ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_tables.truncate_tables", fake_truncate_tables, From 91cfd158f972b2372f4c7cd3001698cc55e896fb Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 00:27:55 +0000 Subject: [PATCH 13/25] Absorb DB connection config, update docs for config --- docs/cli/index.md | 63 +++++++------------ docs/cli/reference.md | 21 +------ docs/getting-started/configuration.md | 17 ++++- docs/getting-started/maintenance.md | 47 +------------- mkdocs.yml | 5 -- omop_alchemy/__init__.py | 3 +- omop_alchemy/config.py | 15 ++++- omop_alchemy/maintenance/_cli_utils.py | 11 ++-- omop_alchemy/maintenance/cli_foreign_keys.py | 8 ++- omop_alchemy/maintenance/cli_schema_doctor.py | 8 ++- tests/test_foreign_keys.py | 6 +- tests/test_fulltext.py | 2 +- tests/test_indexes.py | 2 +- tests/test_load_vocab_source.py | 4 +- tests/test_truncate_tables.py | 4 +- 15 files changed, 83 insertions(+), 133 deletions(-) diff --git a/docs/cli/index.md b/docs/cli/index.md index 56b0db5..376b0c5 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -25,7 +25,6 @@ omop-alchemy --help | `fulltext install` / `populate` / `drop` | Manage tsvector sidecar columns on vocabulary tables | | `backup-database` | Create a pg_dump backup artifact | | `restore-database` | Restore a pg_dump or psql backup artifact | -| `config show` / `override` | View and persist saved connection defaults | See the [Command Reference](reference.md) for full parameter details. @@ -37,42 +36,41 @@ Most commands are decorated with `@omop_command`. This decorator handles all con ### What it injects -Every decorated command receives three additional CLI flags, wired to identical Typer `Option` definitions across all commands: +Every decorated command receives: -| Flag | Type | Description | -|---|---|---| -| `--dotenv` | `str` (optional) | Path to a `.env` file loaded before connection resolution. Overrides the saved `DOTENV` default. | -| `--engine-schema` | `str` (optional) | Named engine configuration (e.g. `cdm`, `results`). Resolves to the `ENGINE_` environment variable group. | -| `--db-schema` | `str` (optional) | Database schema to target (e.g. `cdm5`, `vocab`). Sets `search_path` on PostgreSQL. Not supported on SQLite. | +- `conn` — a `_ConnContext` dataclass (see below) +- `engine` — a SQLAlchemy `Engine` ready to use +- `--dry-run` — injected on commands that support preview mode -Commands that support preview mode also receive `--dry-run` via the decorator. +No connection flags are injected; all configuration comes from oa_configurator. ### What it does behind the scenes When a decorated command is invoked: -1. The decorator pops `dotenv`, `engine_schema`, and `db_schema` from the Typer kwargs. -2. It calls `resolve_connection(...)` to produce a `conn` object carrying those values merged with any saved defaults. -3. It prints a header showing the command name, engine schema, database schema, and mode label (apply / dry-run / inspect). -4. It calls `build_engine(...)` to create a SQLAlchemy `Engine`. -5. It calls the original function body with `(conn, engine, **remaining_kwargs)`. -6. Any `RuntimeError`, `SQLAlchemyError`, or `BackendNotSupportedError` raised by the body is caught and rendered as a formatted error, then exits with code 1. +1. Loads `~/.config/omop/config.toml` via `load_stack_config()`. +2. Calls `OmopAlchemyConfig.from_stack(config)` to read package-specific settings and validate that the required `cdm_db` resource (or the `[tools.omop_alchemy] default_resource` override) is present. Raises `ConfigurationError` with a helpful message if it is missing. +3. Resolves the resource: `Resolver(config).resolve_resource("cdm_db")`. +4. Calls `.create_engine()` to build a SQLAlchemy engine with `schema_translate_map` applied. +5. Prints a command header showing the resource name, CDM schema, and run mode. +6. Calls the original function body with `(conn, engine, ...)`. +7. Catches `RuntimeError`, `SQLAlchemyError`, and `BackendNotSupportedError`; renders them as formatted errors and exits with code 1. ### Before and after Without the decorator, every command would need this boilerplate: ```python -def my_command( - dotenv: str | None = typer.Option(None, help="..."), - engine_schema: str | None = typer.Option(None, help="..."), - db_schema: str | None = typer.Option(None, help="..."), -) -> None: - conn = resolve_connection(dotenv=dotenv, engine_schema=engine_schema, db_schema=db_schema) - console.print(render_command_header(...)) +def my_command() -> None: + stack = load_stack_config() + tool = stack.tools.get("omop_alchemy") + resource_name = (tool.default_resource if tool else None) or "cdm_db" + resolved = Resolver(stack).resolve_resource(resource_name) + engine = resolved.create_engine() try: - engine = build_engine(dotenv=conn.dotenv, engine_schema=conn.engine_schema) # actual work here + results = do_work(engine, db_schema=resolved.cdm_schema) + console.print(render_results(results)) except Exception as exc: handle_error(exc) ``` @@ -83,7 +81,6 @@ With the decorator, the function body is all that matters: @app.command("my-command") @omop_command("my-command") def my_command(conn, engine) -> None: - # conn and engine are ready to use results = do_work(engine, db_schema=conn.db_schema) console.print(render_results(results)) ``` @@ -92,23 +89,9 @@ def my_command(conn, engine) -> None: ## The `conn` object -`conn` is a `ConnectionDefaults` instance. It exposes: +`conn` is a `_ConnContext` dataclass. It exposes: | Attribute | Description | |---|---| -| `conn.dotenv` | Resolved dotenv path (from CLI flag or saved default) | -| `conn.engine_schema` | Resolved engine schema name | -| `conn.db_schema` | Resolved database schema name | -| `conn.athena_source` | Resolved Athena vocabulary CSV directory path | - ---- - -## Connection resolution order - -When the CLI resolves a connection parameter, it uses this precedence (highest to lowest): - -1. Explicit CLI flag (e.g. `--db-schema cdm5`) -2. Saved default in the nearest `.omop-maint.toml` file -3. Command default (e.g. `vocabulary_included` defaults to `False` on most commands) - -Use `omop-alchemy config override` to persist defaults so you do not need to repeat connection flags on every invocation. +| `conn.db_schema` | CDM schema name from the resolved resource (e.g. `"omop"`) | +| `conn.athena_source` | Athena vocabulary CSV directory from `[tools.omop_alchemy.extra]`; `None` if not configured | diff --git a/docs/cli/reference.md b/docs/cli/reference.md index 0557e72..88f381b 100644 --- a/docs/cli/reference.md +++ b/docs/cli/reference.md @@ -1,6 +1,6 @@ # Command Reference -Every command listed here also accepts `--dotenv`, `--engine-schema`, and `--db-schema`. These are injected by the `@omop_command` decorator and control connection resolution. See the [CLI Overview](index.md) for a full description of those flags. +Connection and schema configuration comes from `~/.config/omop/config.toml` — no per-command connection flags are needed. See the [CLI Overview](index.md) for how the `@omop_command` decorator resolves the connection, and [Configuration](../getting-started/configuration.md) for setup. --- @@ -250,21 +250,4 @@ Restore a database backup that was created with `backup-database`. ## Configuration -### `config show` - -Display current saved connection defaults from the nearest `.omop-maint.toml` file. - -No additional options beyond the connection flags. - ---- - -### `config override` - -Persist one or more connection overrides to `.omop-maint.toml` for future CLI invocations. - -| Flag | Type / Choices | Default | Description | -|---|---|---|---| -| `--dotenv` | str (optional) | (none) | Path to a `.env` file. Saved relative to `.omop-maint.toml` and resolved back to absolute on load. | -| `--engine-schema` | str (optional) | (none) | Named engine configuration to use (e.g. `cdm`, `results`). | -| `--db-schema` | str (optional) | (none) | Database schema to target (e.g. `cdm5`, `vocab`). | -| `--athena-source` | str (optional) | (none) | Path to the unzipped Athena vocabulary CSV directory. Used by `load-vocab-source` when `--athena-source` is omitted. | +Connection and schema settings are managed via `omop-config` (oa_configurator). Use `omop-config init` to create `~/.config/omop/config.toml` and `omop-config configure omop_alchemy` to set package-specific options such as `athena_source_path`. See [Configuration](../getting-started/configuration.md). diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 2843030..74c09dd 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -6,7 +6,18 @@ OMOP_Alchemy reads all database connection and schema settings from ## Minimal config -Create `~/.config/omop/config.toml` with at least one connection and one resource: +Run the interactive configure command to set up the CDM database connection and write +`~/.config/omop/config.toml`: + +```bash +omop-config configure omop_alchemy +``` + +This prompts for connection details (host, dialect, credentials) and schema names, then +saves them under the canonical resource name `cdm_db` that all OMOP stack packages +recognise. + +The resulting TOML looks like: ```toml [connections.cdm] @@ -17,12 +28,12 @@ user = "omop" password = "changeme" database = "omop_cdm" -[resources.default] +[resources.cdm_db] primary_db = "cdm" cdm_schema = "omop" ``` -Run `omop-config init` to create this file interactively, or write it manually. +You can also write or edit this file manually. ## Vocabulary loading diff --git a/docs/getting-started/maintenance.md b/docs/getting-started/maintenance.md index 86d3e5d..b0888e5 100644 --- a/docs/getting-started/maintenance.md +++ b/docs/getting-started/maintenance.md @@ -13,52 +13,7 @@ use for the ORM. ## Connection setup -Every command accepts three connection flags: - -| Flag | Purpose | -| --- | --- | -| `--dotenv ` | Load a `.env` file before building the engine | -| `--engine-schema ` | Select the engine by name (see below) | -| `--db-schema ` | Override the target schema inside the database | - -**Engine schema selection.** OMOP Alchemy supports multiple named engine configurations. -The `--engine-schema` value maps to an environment variable `ENGINE_`. -For example, `--engine-schema cdm` looks for `ENGINE_CDM`. With no `--engine-schema`, -it falls back to the bare `ENGINE` variable. - -**Database schema (`--db-schema`).** On PostgreSQL this sets the `search_path` for ORM -CSV loading and qualifies table references for schema-aware operations. On SQLite it -is ignored by most commands. - -### Saving defaults - -Instead of typing the same flags on every command, save your defaults once: - -```bash -omop-alchemy config set-overrides \ - --dotenv .env \ - --engine-schema cdm \ - --db-schema public \ - --athena-source ./athena_files -``` - -This writes `.omop-alchemy.toml` into your project root (the nearest ancestor directory -containing `pyproject.toml`). If no project root is found, it writes to the current -directory. You can override the location with `OMOP_MAINT_DEFAULTS_FILE`. - -Inspect or clear saved defaults: - -```bash -omop-alchemy config show -omop-alchemy config clear-overrides # clears everything -omop-alchemy config clear-overrides --db-schema # clears one field -``` - -**Resolution order for each flag:** - -1. Explicit CLI flag (highest priority) -2. Saved `.omop-alchemy.toml` default -3. Command-level fallback (lowest priority) +Database connection and CDM schema come from [oa_configurator](../getting-started/configuration.md) — no per-command flags needed. Run `omop-config init` once to create `~/.config/omop/config.toml`, then every `omop-alchemy` command picks it up automatically. --- diff --git a/mkdocs.yml b/mkdocs.yml index c4ce4b7..5025737 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -66,9 +66,7 @@ nav: - Architecture: api/architecture.md - CDM Base: api/base.md - Columns: api/columns.md - - Configuration: api/configuration.md - Relationships: api/relationships.md - - Typing: api/typing.md - Object-Relational Mappings: - Overview: models/index.md @@ -126,7 +124,4 @@ nav: - Advanced: - Overview: advanced/index.md - Backends: advanced/backends.md - - Views & Analytics: advanced/views.md - - Event Timelines: advanced/timelines.md - PostgreSQL Full-Text Search: advanced/fulltext.md - - Querying Tips: advanced/query_patterns.md diff --git a/omop_alchemy/__init__.py b/omop_alchemy/__init__.py index fd3bad5..947e793 100644 --- a/omop_alchemy/__init__.py +++ b/omop_alchemy/__init__.py @@ -1,8 +1,9 @@ -from .config import ROOT_PATH, TEST_PATH, OmopAlchemyConfig, get_resolver, get_config +from .config import CDM_DB_RESOURCE, ROOT_PATH, TEST_PATH, OmopAlchemyConfig, get_resolver, get_config from .db import create_engine_with_dependencies __all__ = [ + "CDM_DB_RESOURCE", "OmopAlchemyConfig", "create_engine_with_dependencies", "get_config", diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index d8946a0..11a59cb 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -1,17 +1,28 @@ from __future__ import annotations from pathlib import Path -from typing import ClassVar +from typing import ClassVar, Final from pydantic import Field -from oa_configurator import PackageConfigBase, Resolver, load_stack_config +from oa_configurator import PackageConfigBase, ResourceSpec, Resolver, load_stack_config ROOT_PATH = Path(__file__).parent TEST_PATH = Path(__file__).parent.parent / "tests" +CDM_DB_RESOURCE: Final[str] = "cdm_db" + class OmopAlchemyConfig(PackageConfigBase): tool_name: ClassVar[str] = "omop_alchemy" + required_resources: ClassVar[tuple[str, ...]] = (CDM_DB_RESOURCE,) + owned_resources: ClassVar[tuple[ResourceSpec, ...]] = ( + ResourceSpec( + semantic_name=CDM_DB_RESOURCE, + display_name="OMOP CDM Database", + description="Database containing the OMOP CDM tables and vocabulary.", + connection_name_hint="cdm", + ), + ) athena_source_path: str | None = Field( default=None, diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index 925d9df..9ab354f 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -53,10 +53,13 @@ def wrapper(**kwargs: Any) -> Any: _vocab = kwargs.get("vocabulary_included", vocabulary_included) _mode = mode_label if mode_label is not None else ("dry-run" if _dry_run else "apply") try: - from ..config import get_config - resolver = Resolver(load_stack_config()) - resolved = resolver.resolve_resource("default") - pkg_config = get_config() + from ..config import OmopAlchemyConfig, get_config + stack = load_stack_config() + pkg_config = get_config() # validates required_resources — raises ConfigurationError if missing + tool = stack.tools.get("omop_alchemy") + resource_name = (tool.default_resource if tool else None) or OmopAlchemyConfig.required_resources[0] + resolver = Resolver(stack) + resolved = resolver.resolve_resource(resource_name) engine = resolved.create_engine() conn = _ConnContext( db_schema=resolved.cdm_schema, diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index 5c67631..e696e12 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -8,6 +8,7 @@ import typer from oa_configurator import Resolver, load_stack_config +from omop_alchemy.config import OmopAlchemyConfig from ._cli_utils import _ConnContext from ..backends import Backend, resolve_backend, require_backend_support, backend_support_note from ._cli_utils import handle_error, omop_command @@ -493,8 +494,11 @@ def enable_foreign_keys_command( ) -> None: """Re-enable PostgreSQL RI trigger enforcement. Use --strict to abort if any violations exist first.""" try: - resolver = Resolver(load_stack_config()) - resolved = resolver.resolve_resource("default") + stack = load_stack_config() + tool = stack.tools.get("omop_alchemy") + resource_name = (tool.default_resource if tool else None) or OmopAlchemyConfig.required_resources[0] + resolver = Resolver(stack) + resolved = resolver.resolve_resource(resource_name) conn = _ConnContext(db_schema=resolved.cdm_schema) engine = resolved.create_engine() except Exception as exc: diff --git a/omop_alchemy/maintenance/cli_schema_doctor.py b/omop_alchemy/maintenance/cli_schema_doctor.py index 887f8de..0044476 100644 --- a/omop_alchemy/maintenance/cli_schema_doctor.py +++ b/omop_alchemy/maintenance/cli_schema_doctor.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from oa_configurator import Resolver, load_stack_config +from omop_alchemy.config import OmopAlchemyConfig from omop_alchemy.backends.resolve import SupportedDialect from omop_alchemy.db import create_engine_with_dependencies @@ -178,8 +179,11 @@ def collect_doctor_report( foreign_key_validation: ForeignKeyValidationReport | None = None if info.connection_ready: - resolver = Resolver(load_stack_config()) - resolved = resolver.resolve_resource("default") + stack = load_stack_config() + tool = stack.tools.get("omop_alchemy") + resource_name = (tool.default_resource if tool else None) or OmopAlchemyConfig.required_resources[0] + resolver = Resolver(stack) + resolved = resolver.resolve_resource(resource_name) engine = resolved.create_engine() db_schema = resolved.cdm_schema try: diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py index 368a736..7b289d9 100644 --- a/tests/test_foreign_keys.py +++ b/tests/test_foreign_keys.py @@ -76,7 +76,7 @@ def test_disable_foreign_keys_cli_fails_gracefully_for_sqlite(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", @@ -251,7 +251,7 @@ def test_enable_foreign_keys_strict_cli_invokes_strict_management(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance.cli_foreign_keys.load_stack_config", @@ -364,7 +364,7 @@ def test_foreign_keys_validate_cli_invokes_validation(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", diff --git a/tests/test_fulltext.py b/tests/test_fulltext.py index c13f53f..da66a7f 100644 --- a/tests/test_fulltext.py +++ b/tests/test_fulltext.py @@ -204,7 +204,7 @@ def test_fulltext_install_cli_passes_options(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "public"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "public"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 06a74d9..e7f5736 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -122,7 +122,7 @@ def test_disable_indexes_cli_invokes_management(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index fa6cc56..8afe0f2 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -159,7 +159,7 @@ def test_load_vocab_source_cli_uses_configured_athena_source(monkeypatch, tmp_pa cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, tools={"omop_alchemy": {"extra": {"athena_source_path": str(athena_dir)}}}, ) @@ -429,7 +429,7 @@ def test_load_vocab_source_cli_surfaces_database_error_detail(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", diff --git a/tests/test_truncate_tables.py b/tests/test_truncate_tables.py index 24dc0d5..53ee2a2 100644 --- a/tests/test_truncate_tables.py +++ b/tests/test_truncate_tables.py @@ -44,7 +44,7 @@ def test_truncate_tables_cli_requires_confirmation(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", @@ -68,7 +68,7 @@ def test_truncate_tables_cli_invokes_management(monkeypatch): cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, - resources={"default": {"primary_db": "db", "cdm_schema": "main"}}, + resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( "omop_alchemy.maintenance._cli_utils.load_stack_config", From dd4d8c35fdfee379bca4da1685880407d1d9e0d0 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 03:11:57 +0000 Subject: [PATCH 14/25] Updated docs to reflect new state --- docs/advanced/index.md | 7 -- docs/advanced/query_patterns.md | 0 docs/advanced/timelines.md | 108 ++++++++++++++++++++++++++++ docs/advanced/views.md | 0 docs/api/configuration.md | 0 docs/api/typing.md | 81 +++++++++++++++++++++ docs/getting-started/maintenance.md | 19 ++--- docs/index.md | 3 +- mkdocs.yml | 2 + 9 files changed, 199 insertions(+), 21 deletions(-) delete mode 100644 docs/advanced/query_patterns.md delete mode 100644 docs/advanced/views.md delete mode 100644 docs/api/configuration.md diff --git a/docs/advanced/index.md b/docs/advanced/index.md index ae3c33f..c2f08d5 100644 --- a/docs/advanced/index.md +++ b/docs/advanced/index.md @@ -7,12 +7,6 @@ the immutability and interpretability of the underlying CDM tables. --- -## Read-Only Views - -- [Views](views.md) - ---- - ## Timelines & Longitudinal Analysis - [Event Timelines](timelines.md) @@ -23,4 +17,3 @@ the immutability and interpretability of the underlying CDM tables. - [Backend Compatibility](backends.md) - [PostgreSQL Full-Text Search](fulltext.md) -- [Query Patterns](query_patterns.md) diff --git a/docs/advanced/query_patterns.md b/docs/advanced/query_patterns.md deleted file mode 100644 index e69de29..0000000 diff --git a/docs/advanced/timelines.md b/docs/advanced/timelines.md index e69de29..2956a6e 100644 --- a/docs/advanced/timelines.md +++ b/docs/advanced/timelines.md @@ -0,0 +1,108 @@ +# Patient Timelines + +OMOP Alchemy includes a lightweight timeline layer that projects OMOP CDM ORM objects +into a **unified, time-ordered event stream** per patient. + +It is primarily intended for feature construction and exploratory analysis — not for +production query pipelines where raw SQLAlchemy queries are more appropriate. + +--- + +## Core concepts + +### `EventTime` + +A canonical temporal representation. Every clinical event has a start datetime; an end +datetime is optional. The `kind` property returns `"point"` or `"interval"`. + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.EventTime + +--- + +### `EventValue` + +The value associated with a clinical event — numeric, concept, string, or none. + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.EventValue + +--- + +### `EventMapping` + +Declares which ORM fields supply the concept, start/end datetimes, and value for a +particular CDM table. Subclasses of `ClinicalEvent` set `_mapping` to an `EventMapping` +instance at class level. + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.EventMapping + +--- + +## The `ClinicalEvent` mixin + +`ClinicalEvent` is a mixin that adds timeline behaviour to any CDM ORM class. It reads +`_mapping` to implement `event_time`, `event_value`, `event_metadata`, `to_dict`, and +`to_json`. + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.ClinicalEvent + +--- + +## Concrete event classes + +Three CDM tables are pre-wired with `EventMapping`s: + +| Class | CDM table | Concept field | Value fields | +|-------|-----------|---------------|--------------| +| `Condition_Event` | `condition_occurrence` | `condition_concept_id` | — | +| `Measurement_Event` | `measurement` | `measurement_concept_id` | `value_as_number`, `value_as_concept_id`, `value_as_string` | +| `Drug_Exposure_Event` | `drug_exposure` | `drug_concept_id` | `quantity` | + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.Condition_Event + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.Measurement_Event + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.Drug_Exposure_Event + +--- + +## `Person_Timeline` + +Extends the `Person` ORM class with `.events` and `.timeline` properties. Requires an +active SQLAlchemy session (i.e. the object must have been loaded from a session, not +constructed in memory). + +::: omop_alchemy.cdm.handlers.timeline.event_timeline.Person_Timeline + +--- + +## Usage example + +```python +from sqlalchemy.orm import Session +from omop_alchemy.cdm.handlers.timeline import Person_Timeline + +with Session(engine) as session: + person = session.get(Person_Timeline, 42) + for event in person.timeline: # sorted by event_time.start + print(event) + print(event.to_dict()) +``` + +--- + +## Extending to new tables + +To add a new CDM table to the timeline, subclass both `ClinicalEvent` and the target ORM +class and set `_mapping`: + +```python +from omop_alchemy.cdm.handlers.timeline.event_timeline import ClinicalEvent, EventMapping +from omop_alchemy.cdm.model.clinical import Procedure_Occurrence + +class Procedure_Event(Procedure_Occurrence, ClinicalEvent): + _mapping = EventMapping( + concept_field="procedure_concept_id", + start_date_field="procedure_date", + start_datetime_field="procedure_datetime", + ) +``` diff --git a/docs/advanced/views.md b/docs/advanced/views.md deleted file mode 100644 index e69de29..0000000 diff --git a/docs/api/configuration.md b/docs/api/configuration.md deleted file mode 100644 index e69de29..0000000 diff --git a/docs/api/typing.md b/docs/api/typing.md index e69de29..dca5a23 100644 --- a/docs/api/typing.md +++ b/docs/api/typing.md @@ -0,0 +1,81 @@ +# Typing + +OMOP Alchemy exposes a set of **Protocols and typed containers** for code that needs to +interact with CDM classes without coupling to specific ORM implementations. + +These live in two modules: + +| Module | Contents | +|--------|---------| +| `omop_alchemy.cdm.base.typing` | Runtime-checkable Protocols for structural checking | +| `omop_alchemy.cdm.model.typing` | Typed row containers | + +--- + +## Protocols (`cdm.base.typing`) + +### `HasConceptId` + +Satisfied by any object with an integer `concept_id` attribute. + +::: omop_alchemy.cdm.base.typing.HasConceptId + +--- + +### `HasPersonId` + +Satisfied by any object with an integer `person_id` attribute. + +::: omop_alchemy.cdm.base.typing.HasPersonId + +--- + +### `HasEpisodeId` + +Satisfied by any object with an integer `episode_id` attribute. + +::: omop_alchemy.cdm.base.typing.HasEpisodeId + +--- + +### `DomainSemanticTable` + +Structural protocol for CDM ORM classes that participate in domain validation. A class +satisfies this protocol if it has `__tablename__`, `__mapper__`, `__expected_domains__`, +and a `collect_domain_rules()` classmethod. + +::: omop_alchemy.cdm.base.typing.DomainSemanticTable + +--- + +### `ClinicalEvent` (Protocol) + +Minimal protocol for ORM rows that represent a clinical event — a concept, a person, a +start date, and an optional end date. Used as the structural contract for domain-level +utilities that operate across multiple CDM tables. + +!!! note + The concrete mixin of the same name lives in + `omop_alchemy.cdm.handlers.timeline.event_timeline`. The Protocol here is the + structural interface; the mixin there is the implementation. + +::: omop_alchemy.cdm.base.typing.ClinicalEvent + +--- + +### `ConceptResolver` + +Protocol for objects that can look up whether a set of concept IDs are standard. + +::: omop_alchemy.cdm.base.typing.ConceptResolver + +--- + +## Typed row containers (`cdm.model.typing`) + +### `ConceptRow` + +A frozen dataclass representing the core fields of a concept lookup row. Used where a +lightweight, hashable concept record is preferable to a full ORM object. + +::: omop_alchemy.cdm.model.typing.ConceptRow diff --git a/docs/getting-started/maintenance.md b/docs/getting-started/maintenance.md index b0888e5..1ebfe31 100644 --- a/docs/getting-started/maintenance.md +++ b/docs/getting-started/maintenance.md @@ -13,7 +13,7 @@ use for the ORM. ## Connection setup -Database connection and CDM schema come from [oa_configurator](../getting-started/configuration.md) — no per-command flags needed. Run `omop-config init` once to create `~/.config/omop/config.toml`, then every `omop-alchemy` command picks it up automatically. +Database connection and CDM schema come from [oa_configurator](../getting-started/configuration.md) — no per-command flags needed. Run `omop-config configure omop_alchemy` once to create `~/.config/omop/config.toml`, then every `omop-alchemy` command picks it up automatically. --- @@ -239,14 +239,12 @@ be installed and on `PATH`. ```bash # Create a backup (custom format is recommended — smaller and restorable in parallel) omop-alchemy backup-database \ - --engine-schema source \ --output-path ./cdm-backup.dump \ --format custom # Restore into a target database (the DB must already exist and be empty) omop-alchemy restore-database ./cdm-backup.dump \ - --format custom \ - --engine-schema target + --format custom ``` **Format comparison:** @@ -258,8 +256,8 @@ omop-alchemy restore-database ./cdm-backup.dump \ **Restore caveats:** - The target database must already exist. The CLI does not create or drop databases. -- For `plain` format, `--db-schema` has no effect; the schema is embedded in the SQL. -- For `custom` format, `--db-schema` restricts the restore to the named schema only. +- For `plain` format, the schema is embedded in the SQL dump; no selective schema restore is possible. +- For `custom` format, `pg_restore` can be invoked manually with `-n ` for selective schema restore. Use `--dry-run` on `backup-database` to see the `pg_dump` command that would be run without executing it. @@ -319,7 +317,7 @@ it touches and the old/new sequence positions. | Command | Purpose | Key options | Backend | | --- | --- | --- | --- | -| `info` | Inspect CLI readiness, backend, and dependency state | `--engine-schema` | All | +| `info` | Inspect CLI readiness, backend, and dependency state | `--vocab` | All | | `doctor` | Read-only health check: connection, schema, FK state | `--deep`, `--vocab` | All (`--deep` PostgreSQL-focused) | | `data-summary` | Show managed tables and row counts | `--vocab`, `--include-missing` | All | | `reconcile-schema` | Compare ORM metadata vs live schema | `--vocab`, `--dry-run` | All | @@ -337,8 +335,5 @@ it touches and the old/new sequence positions. | `fulltext install` | Add tsvector sidecar columns to vocabulary tables | `--regconfig`, `--no-create-indexes` | PostgreSQL | | `fulltext populate` | Populate sidecar tsvector vectors | `--regconfig` | PostgreSQL | | `fulltext drop` | Remove tsvector sidecar columns and indexes | | PostgreSQL | -| `backup-database` | Create a `pg_dump` backup artifact | `--output-path`, `--format`, `--db-schema`, `--dry-run` | PostgreSQL | -| `restore-database` | Restore a backup artifact into the target DB | `--format` (required), `--db-schema`, `--dry-run` | PostgreSQL | -| `config show` | Print current saved defaults | | All | -| `config set-overrides` | Save connection defaults | `--dotenv`, `--engine-schema`, `--db-schema`, `--athena-source` | All | -| `config clear-overrides` | Remove saved defaults | per-field flags | All | +| `backup-database` | Create a `pg_dump` backup artifact | `--output-path`, `--format`, `--dry-run` | PostgreSQL | +| `restore-database` | Restore a backup artifact into the target DB | `--format` (required), `--dry-run` | PostgreSQL | diff --git a/docs/index.md b/docs/index.md index 62a4118..4192d15 100644 --- a/docs/index.md +++ b/docs/index.md @@ -61,10 +61,9 @@ Object- and model-level validation utilities that help maintain semantic clarity Patterns and techniques for more complex analytical work. -- [Views](advanced/views.md) - [Timelines & longitudinal analysis](advanced/timelines.md) -- [Query patterns](advanced/query_patterns.md) - [Backend considerations](advanced/backends.md) +- [PostgreSQL Full-Text Search](advanced/fulltext.md) --- diff --git a/mkdocs.yml b/mkdocs.yml index 5025737..62ed7af 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,6 +67,7 @@ nav: - CDM Base: api/base.md - Columns: api/columns.md - Relationships: api/relationships.md + - Typing: api/typing.md - Object-Relational Mappings: - Overview: models/index.md @@ -124,4 +125,5 @@ nav: - Advanced: - Overview: advanced/index.md - Backends: advanced/backends.md + - Patient Timelines: advanced/timelines.md - PostgreSQL Full-Text Search: advanced/fulltext.md From 5765561c793187dfe7687def51b3cbd8d63481a6 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 05:02:23 +0000 Subject: [PATCH 15/25] Test configuration inclusion --- docs/getting-started/quickstart.md | 41 +++ omop_alchemy/maintenance/cli.py | 3 +- omop_alchemy/maintenance/cli_config.py | 270 +++++++++++++++++++- omop_alchemy/maintenance/cli_schema_info.py | 12 +- omop_alchemy/maintenance/cli_vocab.py | 12 + pyproject.toml | 2 +- tests/conftest.py | 83 +++++- tests/test_load_vocab_postgres.py | 2 +- uv.lock | 23 -- 9 files changed, 402 insertions(+), 46 deletions(-) diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 03a8036..2e9984b 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -76,3 +76,44 @@ docker compose --profile pgadmin up -d ``` docker compose --profile jupyter up -d ``` + +--- + +## Running PostgreSQL tests locally + +The test suite includes PostgreSQL-specific tests that skip by default unless a test database is configured. + +> **This test database is destructive.** The test suite drops and recreates the entire `public` +> schema on every run. `test_cdm_db` must point to a **dedicated, empty test database** — never +> to a database that contains real data. `configure-test-db` will refuse to save if the connection +> details match any existing resource in your config. + +**Step 1 — Register a test database connection:** + +```bash +omop-alchemy configure-test-db +``` + +Prompts for connection details (defaults: `localhost:55432`, `postgresql+psycopg`, `test/test/test_db`). +Press Enter to accept or supply your own values. + +If the test user and database don't exist yet on your Postgres instance, add `--provision`: + +```bash +omop-alchemy configure-test-db --provision +``` + +`--provision` uses your existing `cdm_db` admin connection to run `CREATE USER ... SUPERUSER` and +`CREATE DATABASE`. Both operations are idempotent — safe to re-run. You need a local PostgreSQL +instance with a superuser admin account. + +> **Note on SUPERUSER**: the test user is created as a PostgreSQL superuser. This is required +> because the test suite disables FK constraint triggers during bulk vocabulary loads — +> an operation PostgreSQL restricts to superusers. This matches CI behaviour exactly. +> On a shared or production Postgres instance, provision the user manually instead. + +**Step 2 — Run the tests:** + +```bash +pytest -v tests/test_load_vocab_postgres.py +``` diff --git a/omop_alchemy/maintenance/cli.py b/omop_alchemy/maintenance/cli.py index 0e5cffd..d8aaa13 100644 --- a/omop_alchemy/maintenance/cli.py +++ b/omop_alchemy/maintenance/cli.py @@ -8,6 +8,7 @@ from . import ( cli_backup as backup, + cli_config as config, cli_foreign_keys as foreign_keys, cli_fulltext as fulltext, cli_indexes as indexes, @@ -34,7 +35,7 @@ app.add_typer(fulltext.app, name="fulltext") # Flat root-level commands lifted from each domain module -for _sub in (schema.app, vocab.app, tables.app, backup.app): +for _sub in (schema.app, vocab.app, tables.app, backup.app, config.app): for _cmd in _sub.registered_commands: app.registered_commands.append(_cmd) diff --git a/omop_alchemy/maintenance/cli_config.py b/omop_alchemy/maintenance/cli_config.py index 7e7056b..c33697e 100644 --- a/omop_alchemy/maintenance/cli_config.py +++ b/omop_alchemy/maintenance/cli_config.py @@ -1,4 +1,268 @@ -"""Connection config management is now handled by omop-config (oa_configurator). +"""Configure a local test database resource for running PostgreSQL tests.""" -Use `omop-config configure omop_alchemy` to set package-specific options. -""" +from __future__ import annotations + +from typing import Annotated + +import sqlalchemy as sa +import typer +from rich.console import Console +from sqlalchemy import text +from sqlalchemy.exc import SQLAlchemyError + +console = Console() +err_console = Console(stderr=True) + +_TEST_RESOURCE = "test_cdm_db" +_CONN_NAME = "pg_test" + +_DEFAULTS = dict( + dialect="postgresql+psycopg", + host="localhost", + port=55432, + user="test", + password="test", + database="test_db", + cdm_schema="public", +) + +app = typer.Typer() + + +def _quote_id(name: str) -> str: + """Double-quote a PostgreSQL identifier, escaping embedded double-quotes.""" + return '"' + name.replace('"', '""') + '"' + + +def _quote_literal(value: str) -> str: + """Single-quote a PostgreSQL string literal, escaping embedded single quotes.""" + return "'" + value.replace("'", "''") + "'" + + +def _get_admin_engine(*, yes: bool) -> sa.engine.Engine: + """Resolve admin connection: suggest cdm_db resource first, then prompt.""" + from oa_configurator import Resolver, load_stack_config + + suggested_url: str | None = None + try: + stack = load_stack_config() + resolved = Resolver(stack).resolve_resource("cdm_db") + suggested_url = resolved.primary_db.url + except Exception: + pass + + if yes: + if suggested_url is None: + err_console.print( + "[red]--provision --yes requires a configured 'cdm_db' resource" + " as the admin connection.[/red]" + ) + raise typer.Exit(1) + return sa.create_engine(suggested_url) + + console.print("\n[bold]Admin connection[/bold] (needs CREATEDB + CREATEROLE):") + if suggested_url: + display = sa.engine.make_url(suggested_url).render_as_string(hide_password=True) + console.print(f" Suggested from [dim]cdm_db[/dim]: [cyan]{display}[/cyan]") + if typer.confirm("Use this connection?", default=True): + return sa.create_engine(suggested_url) + else: + console.print( + " [dim]No PostgreSQL connection found in your config.[/dim]\n" + " Enter a superuser URL, e.g.:" + " [dim]postgresql+psycopg://postgres:@localhost/postgres[/dim]" + ) + + admin_url = typer.prompt("Admin connection URL") + return sa.create_engine(admin_url.strip()) + + +def _provision_test_db( + *, + admin_engine: sa.engine.Engine, + user: str, + password: str, + database: str, + yes: bool, +) -> None: + """Create test user and database on the target Postgres instance if they don't exist.""" + admin_url = sa.engine.make_url(admin_engine.url) + if not admin_url.drivername.startswith("postgresql"): + err_console.print( + f"[red]--provision requires a PostgreSQL admin connection." + f" Got dialect: {admin_url.drivername}[/red]" + ) + raise typer.Exit(1) + + console.print( + f"\n[bold]Provision:[/bold] user=[cyan]{user}[/cyan] database=[cyan]{database}[/cyan]" + ) + if not yes: + typer.confirm("Create these objects now?", default=True, abort=True) + + _superuser_note = ( + "[yellow]Note:[/yellow] SUPERUSER is required to disable FK constraint triggers" + " during bulk loads. This grants full access to all databases on this instance." + ) + with admin_engine.connect() as conn: + role_row = conn.execute( + text("SELECT rolsuper FROM pg_roles WHERE rolname = :n"), {"n": user} + ).fetchone() + if role_row is None: + conn.execute( + text(f"CREATE USER {_quote_id(user)} WITH PASSWORD {_quote_literal(password)} SUPERUSER") + ) + conn.commit() + console.print(f"[green]✓[/green] User [bold]{user!r}[/bold] created (SUPERUSER).") + console.print(_superuser_note) + elif not role_row[0]: + conn.execute(text(f"ALTER USER {_quote_id(user)} SUPERUSER")) + conn.commit() + console.print(f"[green]✓[/green] User [bold]{user!r}[/bold] upgraded to SUPERUSER.") + console.print(_superuser_note) + else: + console.print(f"[dim]User {user!r} already exists with SUPERUSER — skipped.[/dim]") + + with admin_engine.connect() as conn: + db_exists = conn.execute( + text("SELECT 1 FROM pg_database WHERE datname = :n"), {"n": database} + ).fetchone() + + if db_exists: + console.print(f"[dim]Database {database!r} already exists — skipped.[/dim]") + else: + with admin_engine.connect().execution_options(isolation_level="AUTOCOMMIT") as conn: + conn.execute(text(f"CREATE DATABASE {_quote_id(database)} OWNER {_quote_id(user)}")) + console.print( + f"[green]✓[/green] Database [bold]{database!r}[/bold] created (owner: {user!r})." + ) + + +@app.command(name="configure-test-db") +def configure_test_db( + yes: Annotated[ + bool, + typer.Option("--yes", "-y", help="Accept all defaults without prompting."), + ] = False, + provision: Annotated[ + bool, + typer.Option( + "--provision", + help=( + "Also create the PostgreSQL user and database via an admin connection. " + "Uses cdm_db as admin if configured, otherwise prompts for a superuser URL." + ), + ), + ] = False, +) -> None: + """Register a dedicated test database in ~/.config/omop/config.toml. + + Writes a 'test_cdm_db' resource for running local PostgreSQL tests. + Defaults: localhost:55432, postgresql+psycopg, user/password/database = test. + + WARNING: the test suite runs DROP SCHEMA public CASCADE on every run. + 'test_cdm_db' must point to a dedicated, empty database — never to real data. + + Use --provision to also bootstrap the user and database on an existing Postgres instance. + """ + from oa_configurator.io import save_stack_config + from oa_configurator.loader import DEFAULT_CONFIG_PATH, load_stack_config + from oa_configurator.models import ConnectionConfig, ResourceConfig, StackConfig + + try: + config = load_stack_config() + except FileNotFoundError: + config = StackConfig() + console.print(f"[dim]No config found — will create {DEFAULT_CONFIG_PATH}[/dim]") + + if _TEST_RESOURCE in config.resources: + if not yes: + overwrite = typer.confirm( + f"Resource '{_TEST_RESOURCE}' already exists. Overwrite?", + default=False, + ) + if not overwrite: + console.print("[yellow]Aborted.[/yellow]") + raise typer.Exit(0) + else: + console.print(f"[yellow]Overwriting existing '{_TEST_RESOURCE}' resource.[/yellow]") + + if yes: + dialect = _DEFAULTS["dialect"] + host = _DEFAULTS["host"] + port = _DEFAULTS["port"] + user = _DEFAULTS["user"] + password = _DEFAULTS["password"] + database = _DEFAULTS["database"] + cdm_schema = _DEFAULTS["cdm_schema"] + else: + console.print("\n[bold]Test PostgreSQL connection[/bold]") + console.print("[dim]Press Enter to accept each default.[/dim]\n") + dialect = typer.prompt("Dialect", default=_DEFAULTS["dialect"]) + host = typer.prompt("Host", default=_DEFAULTS["host"]) + port = int(typer.prompt("Port", default=str(_DEFAULTS["port"]))) + user = typer.prompt("User", default=_DEFAULTS["user"]) + password = typer.prompt("Password", default=_DEFAULTS["password"], hide_input=True) + database = typer.prompt("Database", default=_DEFAULTS["database"]) + cdm_schema = typer.prompt("CDM schema", default=_DEFAULTS["cdm_schema"]) + + # Safety guard: refuse if the connection details collide with any existing non-test resource + for res_name, existing_res in config.resources.items(): + if res_name == _TEST_RESOURCE: + continue + existing_conn = config.connections.get(existing_res.primary_db) + if ( + existing_conn is not None + and existing_conn.host == host + and existing_conn.port == port + and existing_conn.database == database + ): + err_console.print( + f"\n[red bold]DANGER[/red bold]: these connection details match the" + f" [bold]{res_name!r}[/bold] resource.\n" + f"Tests run DROP SCHEMA public CASCADE — this would destroy your database.\n" + f"Configure a dedicated test database with a different host, port, or database name." + ) + raise typer.Exit(1) + + if provision: + admin_engine = _get_admin_engine(yes=yes) + try: + _provision_test_db( + admin_engine=admin_engine, + user=user, + password=password, + database=database, + yes=yes, + ) + except SQLAlchemyError as exc: + err_console.print(f"[red]Provision failed:[/red] {exc.__class__.__name__}: {exc}") + err_console.print("Check that the admin user has CREATEDB and CREATEROLE privileges.") + raise typer.Exit(1) + except Exception as exc: + err_console.print(f"[red]Provision failed:[/red] {exc}") + raise typer.Exit(1) + finally: + admin_engine.dispose() + + conn = ConnectionConfig( + dialect=dialect, + host=host, + port=port, + user=user, + password=password, + database=database, + ) + resource = ResourceConfig(primary_db=_CONN_NAME, cdm_schema=cdm_schema) + + config.connections[_CONN_NAME] = conn + config.resources[_TEST_RESOURCE] = resource + save_stack_config(config) + + console.print( + f"\n[green]✓[/green] Resource [bold]{_TEST_RESOURCE!r}[/bold] written to" + f" [dim]{DEFAULT_CONFIG_PATH}[/dim]" + ) + console.print(f"[green]✓[/green] Connection: [dim]{conn.safe_url()}[/dim]") + console.print("\nRun PostgreSQL tests:") + console.print(" [bold]pytest -v tests/test_load_vocab_postgres.py[/bold]") diff --git a/omop_alchemy/maintenance/cli_schema_info.py b/omop_alchemy/maintenance/cli_schema_info.py index 143a5de..0a8cd5b 100644 --- a/omop_alchemy/maintenance/cli_schema_info.py +++ b/omop_alchemy/maintenance/cli_schema_info.py @@ -13,7 +13,7 @@ from oa_configurator import Resolver, load_stack_config from oa_configurator.loader import DEFAULT_CONFIG_PATH from omop_alchemy.backends.resolve import SupportedDialect -from omop_alchemy.db import create_engine_with_dependencies +from omop_alchemy.config import OmopAlchemyConfig from .cli_schema_tables import collect_missing_tables from .tables import ( @@ -325,10 +325,10 @@ def collect_maintenance_info( ) cli_path = shutil.which("omop-alchemy") - resource_name = "default" db_schema: str | None = None engine_url: str | None = None backend: str | None = None + engine: sa.engine.Engine | None = None engine_created = False engine_error: str | None = None connection_ready = False @@ -336,8 +336,12 @@ def collect_maintenance_info( existing_table_count: int | None = None missing_table_count: int | None = None + resource_name = OmopAlchemyConfig.required_resources[0] try: - resolver = Resolver(load_stack_config()) + stack = load_stack_config() + tool = stack.tools.get(OmopAlchemyConfig.tool_name) + resource_name = (tool.default_resource if tool else None) or resource_name + resolver = Resolver(stack) resolved = resolver.resolve_resource(resource_name) db_schema = resolved.cdm_schema raw_url = sa.engine.make_url(resolved.primary_db.url) @@ -350,7 +354,7 @@ def collect_maintenance_info( except Exception as exc: engine_error = f"Could not resolve engine configuration: {exc}" - if engine_created: + if engine is not None: try: with engine.connect() as connection: connection.exec_driver_sql("SELECT 1") diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 2249f3b..99e2489 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -287,6 +287,18 @@ def load_vocab_source( # use. No stale pooled connections survive between tables, which prevents # "connection in recovery mode" failures on subsequent tables after a heavy load. load_engine = sa.create_engine(engine.url, poolclass=NullPool) + if db_schema is not None: + # NullPool discards the DBAPI connection on every commit, so a one-time + # SET search_path on the first checkout doesn't survive into the next + # checkout (e.g. after create_staging_table's commit). Re-apply on every + # new connection via an engine-level connect event so COPY and raw-cursor + # operations always target the right schema. + _quoted_schema = '"' + db_schema.replace('"', '""') + '"' + @sa.event.listens_for(load_engine, "connect") + def _set_search_path(dbapi_conn, _record): + cur = dbapi_conn.cursor() + cur.execute(f"SET search_path TO {_quoted_schema}") + cur.close() all_models = REQUIRED_VOCAB_MODELS + OPTIONAL_VOCAB_MODELS table_count = sum( diff --git a/pyproject.toml b/pyproject.toml index 32a79b8..974e25c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "sqlalchemy>=2.0.45", "pandas>=2.0", "pyyaml>=6.0", - "oa-configurator>=0.2.0", + #"oa-configurator>=0.2.0", "typer>=0.12", "rich>=13.0", "orm-loader>=0.4.1", diff --git a/tests/conftest.py b/tests/conftest.py index 26427f7..2f709e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -326,20 +326,73 @@ def engine(tmp_path_factory: pytest.TempPathFactory): engine.dispose() +_TEST_RESOURCE = "test_cdm_db" + + @pytest.fixture(scope="session") def pg_engine(): """ - Session-scoped engine connecting to a local PostgreSQL container. + Session-scoped engine connecting to a PostgreSQL database. - Start the container with: - docker compose -f tests/docker-compose.yaml up -d - - The fixture retries for up to 20 seconds to allow the container to become ready. + Configuration is resolved in order: + 1. ENGINE_CDM environment variable (used by CI) + 2. oa_configurator resource 'test_cdm_db' in ~/.config/omop/config.toml + (local dev — run: omop-alchemy configure-test-db) """ - _PG_URL = os.getenv("ENGINE_CDM") - if not _PG_URL: - pytest.skip("No PostgreSQL engine configured. Set ENGINE_CDM environment variable.") - engine = sa.create_engine(_PG_URL, future=True) + from oa_configurator import Resolver, load_stack_config + from oa_configurator.package_base import ConfigurationError + + engine: sa.engine.Engine | None = None + _from_env = False + _stack = None + _resolved = None + + url = os.getenv("ENGINE_CDM") + if url: + engine = sa.create_engine(url, future=True) + _from_env = True + + if engine is None: + try: + _stack = load_stack_config() + _resolved = Resolver(_stack).resolve_resource(_TEST_RESOURCE) + engine = _resolved.create_engine() + except FileNotFoundError: + pass + except (KeyError, ConfigurationError): + pass + + if engine is None: + pytest.skip( + f"No PostgreSQL test database configured.\n" + f" Option 1: set ENGINE_CDM to a connection URL.\n" + f" Option 2: run 'omop-alchemy configure-test-db' to register a dedicated test DB." + ) + + # Safety guard: refuse to run if test_cdm_db resolves to the same DB as any other resource. + # DROP SCHEMA public CASCADE would destroy production data. + if _stack is not None and _resolved is not None: + try: + test_url = sa.engine.make_url(_resolved.primary_db.url) + for res_name in _stack.resources: + if res_name == _TEST_RESOURCE: + continue + try: + other = Resolver(_stack).resolve_resource(res_name) + other_url = sa.engine.make_url(other.primary_db.url) + if (test_url.host, test_url.port, test_url.database) == ( + other_url.host, other_url.port, other_url.database + ): + pytest.fail( + f"SAFETY ABORT: test_cdm_db points to the same database as" + f" '{res_name}'. Tests would DROP SCHEMA public CASCADE on your" + f" production database." + ) + except Exception: + pass + except Exception: + pass + for attempt in range(20): try: with engine.connect() as conn: @@ -348,10 +401,14 @@ def pg_engine(): except Exception: if attempt == 19: engine.dispose() - pytest.fail( - "PostgreSQL container not available after 20 attempts. " - "Run: docker compose -f tests/docker-compose.yaml up -d" - ) + if _from_env: + pytest.fail("PostgreSQL not reachable after 20 attempts. Check CI configuration.") + else: + db_url = engine.url.render_as_string(hide_password=True) + pytest.skip( + f"PostgreSQL test database not reachable: {db_url}\n" + f"Ensure the database is running or re-run 'omop-alchemy configure-test-db'." + ) time.sleep(1) try: yield engine diff --git a/tests/test_load_vocab_postgres.py b/tests/test_load_vocab_postgres.py index 7740a3d..02394e2 100644 --- a/tests/test_load_vocab_postgres.py +++ b/tests/test_load_vocab_postgres.py @@ -121,7 +121,7 @@ def test_load_vocab_model_csv_on_postgres(pg_session, tmp_path): row_count = _load_vocab_model_csv( pg_session, - model=Concept, + model=Concept, # type: ignore[arg-type] csv_path=csv_path, merge_strategy="replace", ) diff --git a/uv.lock b/uv.lock index c95b2cb..1b69cd9 100644 --- a/uv.lock +++ b/uv.lock @@ -390,9 +390,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/37/4549f149c9797c21b32c2683c33522af22522099de128b2406672526d005/greenlet-3.5.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:fa4f98af3a528f0c3fd592a26df7f376f93329c8f4d987f6bb979057af8bf5e2", size = 286220, upload-time = "2026-05-20T13:07:28.463Z" }, { url = "https://files.pythonhosted.org/packages/38/ff/a4f436709716965eaab9f36ea7b906c8a927fbe32fb1372a2071d964f6b1/greenlet-3.5.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ffea73584b216150eab159b6d12348fb253e68757974de1e2c40d8a318ac89ed", size = 601585, upload-time = "2026-05-20T14:00:06.141Z" }, { url = "https://files.pythonhosted.org/packages/65/ad/54bc3fcee3ad368a61b19b67d88117f7a8c29727bf71fffdeda81fbd946e/greenlet-3.5.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1072b4f9edcc1e192d9283a66a3e68d6b84c561de33a83d7858beb9ba1effe10", size = 614215, upload-time = "2026-05-20T14:05:42.675Z" }, - { url = "https://files.pythonhosted.org/packages/7c/6c/de5b1b388cd2d9fbdfeab324863daba37d54e6e233ddbefd70b385a8c591/greenlet-3.5.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:89101bfd5011e069be974903cb3a4e4523845e4ece2d62dcd8d358933c0ef249", size = 620094, upload-time = "2026-05-20T14:09:09.18Z" }, { url = "https://files.pythonhosted.org/packages/40/69/b91cda0647df839483201545913514c2827ebea5e5ccdf931842763bc127/greenlet-3.5.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:add5217d68b31130f0beca584d7fef4878327d2e31642b66618a14eef312b63b", size = 611358, upload-time = "2026-05-20T13:14:26.37Z" }, - { url = "https://files.pythonhosted.org/packages/4a/43/1204baffab8a6476464795a7ccf394a3248d4f22c9f87173a15b36b6d971/greenlet-3.5.1-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:e6cd99ea59dd5d89f0c956606571d79bfe6f68c9eb7f4a4083a41a7f1587edee", size = 422782, upload-time = "2026-05-20T14:01:39.597Z" }, { url = "https://files.pythonhosted.org/packages/59/90/3cf77e080350cd02fa307bb2abf05df48f4482c240275bbd2c203ba8bb1c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5ea42a752d47a145eae922b605cd1634665ac3d5ec1e72402d5048e8d60d207", size = 1570475, upload-time = "2026-05-20T14:02:25.29Z" }, { url = "https://files.pythonhosted.org/packages/65/2c/18cece62045e74598c3c393f70dce4a63f56222015ba29a5d4eeb04f764c/greenlet-3.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c5551170cf4f5ff5623e9af81323751979fee2c731e2287b61f73cd27257b823", size = 1635625, upload-time = "2026-05-20T13:14:34.027Z" }, { url = "https://files.pythonhosted.org/packages/30/f5/310d104ddf41eb5a70f4c268d22508dfb0c3c8e86fec152be34d0d2ed819/greenlet-3.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c8bb982ad117d29478ef8f5533e97df21f1e2befd17a299257b0c96d1371c0b", size = 238791, upload-time = "2026-05-20T13:10:39.018Z" }, @@ -400,9 +398,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/69/7f7e5372d998b81001899b1c0823c957aa413ba0f2662e65821611cc31e4/greenlet-3.5.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:51518ff74664078fc51bffcc6fc529b0df5ae58da192691cee765d45ce944a2b", size = 285060, upload-time = "2026-05-20T13:08:51.899Z" }, { url = "https://files.pythonhosted.org/packages/b1/bf/387f9b6b865fd2ae0d0be09e0004827295a01b71be76ed350dd1e28a91a4/greenlet-3.5.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ffdb3c0bb002c99cd8f298957e046c3dbf6006b5b7cdf11a4e19194624a0a0a", size = 604370, upload-time = "2026-05-20T14:00:07.492Z" }, { url = "https://files.pythonhosted.org/packages/32/f5/169ce3d4e4c67291bd18f8cbe0299c9f3e45102c7f1fb3c14780c93e4532/greenlet-3.5.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7715a5a2c3378ba602c3a440558261e13a820bb53a82693aacd7b7f6d964e283", size = 616987, upload-time = "2026-05-20T14:05:44.237Z" }, - { url = "https://files.pythonhosted.org/packages/19/ba/c24110c55dffa55aa6e1d98b45310da33801aeba7686ff0190fe5d46fd32/greenlet-3.5.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d40a890035c0058cadbdc4af7569800fd28a0e527a0fdbb7b5f9418f176846ce", size = 622911, upload-time = "2026-05-20T14:09:10.598Z" }, { url = "https://files.pythonhosted.org/packages/ee/e5/7f2e41d5273be07e77560d61ea4e56485b4d6c316d2a84518c62d1364061/greenlet-3.5.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc71ff466927a201b08305acac451ebe1aedfcea002f62f1f2f2ac2ac1e6a135", size = 613911, upload-time = "2026-05-20T13:14:27.539Z" }, - { url = "https://files.pythonhosted.org/packages/ec/7b/d20db2e8a5ad6c038702f3179b136f93f0a3d1a21a0c0777f3e470cdf4b2/greenlet-3.5.1-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:67821bb03e4e98664490edb787ff6af501194c29bbee0f5c1dfdcf1dc3d9d436", size = 425228, upload-time = "2026-05-20T14:01:40.837Z" }, { url = "https://files.pythonhosted.org/packages/c5/a4/fbdc67579b73615a1f91615e814303cc71e06128f7baaba87be79b8fb90c/greenlet-3.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cd443683db272ebaaca03af98c0b063ab30db70ea8a31a1559f35e3f7b744ccd", size = 1570689, upload-time = "2026-05-20T14:02:27.225Z" }, { url = "https://files.pythonhosted.org/packages/e6/b4/77abbe35078be39718a46cd49caf16bceb35662f97a34101dca28aa98e47/greenlet-3.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:089fff7a6ce8d9316d1f65ebc00273a56be258c1725b32b94de90a3a979557e1", size = 1635602, upload-time = "2026-05-20T13:14:36.344Z" }, { url = "https://files.pythonhosted.org/packages/37/f7/129f27ca700845b8ee8ca88ce7f43435a1239c2eddb7677fc938822762cf/greenlet-3.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:110a1ca7b49b014b097f6078272c3f4ed31af45b254de5228b79adba879f6af9", size = 238683, upload-time = "2026-05-20T13:11:50.57Z" }, @@ -410,9 +406,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/cb/c62454606daf5640369c94d8a9dd540599b1bfc090e2d2180cb77f4038d2/greenlet-3.5.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:d8ab31c9de8651a2facdd5c5bb0011f2380dd1a7af78ce2adf4b56095294fc07", size = 285579, upload-time = "2026-05-20T13:08:56.396Z" }, { url = "https://files.pythonhosted.org/packages/ec/71/c4270398c2eba968a6071af1dfbdcaeee6ec1c24bc8b435b8cc452700da6/greenlet-3.5.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e300185139abc337ade480c327183adf42a875ac7181bfe66d7d4efea31fbea", size = 651106, upload-time = "2026-05-20T14:00:09.448Z" }, { url = "https://files.pythonhosted.org/packages/1a/ab/71e34b78a44ec271fb5f550c17bc46d301ddc5953890d935f270b0dcdb5a/greenlet-3.5.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7ffdb990dcaa0234cf9845aead5df2e3c3a8b6507d409274dd87e0d5ab05ffc2", size = 663478, upload-time = "2026-05-20T14:05:45.88Z" }, - { url = "https://files.pythonhosted.org/packages/c6/2d/2d80842910da44f78c286532d084b8a5c3717c844ae80ceb3858738ae89a/greenlet-3.5.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c09df69dc1712d131332054a858a3e5cca400967fa3a672e2324fbb0971448c", size = 667767, upload-time = "2026-05-20T14:09:12.15Z" }, { url = "https://files.pythonhosted.org/packages/77/96/4efd6fa5c62c85426a0c19077a586258ebc3a2a146ff2493e4312a697a22/greenlet-3.5.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f82b3597e9d83b63408affed0b48fd0f54935edac4302237b9a837be0dae33c", size = 660800, upload-time = "2026-05-20T13:14:29.129Z" }, - { url = "https://files.pythonhosted.org/packages/e9/d3/dad2eecedfbb1ed7050a20dcfae40c1442b74bc7423608be2c7e03ee7133/greenlet-3.5.1-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:a4764e0bfc6a4d114c865b32520805c16a990ef5f286a514413b05d5ecd6a23d", size = 470786, upload-time = "2026-05-20T14:01:42.064Z" }, { url = "https://files.pythonhosted.org/packages/7a/e0/6c71401a25cac7000261304e866a2f2cc04dc74810d40e2f118aa4799495/greenlet-3.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c0141e37414c10164e702b8fb1473304221ad98f71600850c6ef7ff4880feba0", size = 1617518, upload-time = "2026-05-20T14:02:28.662Z" }, { url = "https://files.pythonhosted.org/packages/41/26/c5c06643e8c0af9e7bf18e16cb51d0ab7625155f0392e1c9015d66d556cd/greenlet-3.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:50ae25a67bea74ea41fb14b960bc532df73eb713417b2d61892dced82fe8d3bc", size = 1681593, upload-time = "2026-05-20T13:14:39.417Z" }, { url = "https://files.pythonhosted.org/packages/8a/bd/e11a108317485075e68af9d23039619b86b28130c3b50d227d42edece64b/greenlet-3.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:8a17c42330e261299766b75ac1ea32caa437a9453c8f65d16a13140db378ecd3", size = 239800, upload-time = "2026-05-20T13:09:30.128Z" }, @@ -420,18 +414,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/90/12/41bf27fde4d3605d3773ae57751eda182b8be2f5398011c041173b1d9534/greenlet-3.5.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:ea8da1e900d758d078810d4255d8c6aa572181896a31ec79d779eb79c3adc9ad", size = 293637, upload-time = "2026-05-20T13:12:35.529Z" }, { url = "https://files.pythonhosted.org/packages/44/44/ba14b23e9757707050c2f397d305bbcae62e5d7cad122f8b6baec5ae4a1f/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a19570c52a21420dcbc94e661994bc325c0b5b11304540fed514586da5dc8f2e", size = 650840, upload-time = "2026-05-20T14:00:11.079Z" }, { url = "https://files.pythonhosted.org/packages/a8/37/5ddc2b686a6844f91abecef43411842426da2e1573f60b49ecf2547f4ae1/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3d955c89b75eeca4723d7cc14135f393cd47c32e2a6cb4a8e4c6e760a26b0986", size = 656416, upload-time = "2026-05-20T14:05:47.118Z" }, - { url = "https://files.pythonhosted.org/packages/8c/46/5987dcd1a2570ba84f3b187536b2ca3ae97613387e57f5cfa99df068fe5e/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ea37d5a157eb9493820d3792ac4ece28619a394391d2b9f2f78057d396ff0f0f", size = 656607, upload-time = "2026-05-20T14:09:13.949Z" }, { url = "https://files.pythonhosted.org/packages/e1/f0/d17510297c35a2992712f0bf84de3779749999f7d3d63aa1f09db7c62dbe/greenlet-3.5.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2daaaebd1a5aa88c49045b6baf9310b3263796bd88db713edf37cf53e7bb4e", size = 654397, upload-time = "2026-05-20T13:14:30.696Z" }, - { url = "https://files.pythonhosted.org/packages/2c/c1/6da0a9ddcc29d7e51ef14883fa3dc1e53b3f4ffba00582106c7bf55da1d8/greenlet-3.5.1-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:8d8a23250ea3ec7b36de8fa4b541e9e2db3ee82915cc060ab0631609ad8b28de", size = 488287, upload-time = "2026-05-20T14:01:43.143Z" }, { url = "https://files.pythonhosted.org/packages/37/eb/147387705bb89092645b012586e7273cb5ed3c90ef7eaf3a69173eaf0209/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bfbd69cc349e43bf3a8ae1c85548ff0718efc887615c2db16c3833d7b0b072d", size = 1614469, upload-time = "2026-05-20T14:02:30.192Z" }, { url = "https://files.pythonhosted.org/packages/a6/4e/37ee0da7732b7aa9896f17e15579a9df34b9fcb9dd494f0adfa749af6623/greenlet-3.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4378720dd888136c27215a0214d32a4d37c3852765d45bc37aad0623423cfd78", size = 1675115, upload-time = "2026-05-20T13:14:40.972Z" }, { url = "https://files.pythonhosted.org/packages/57/f3/97dfcf4a6eb5077f8a672234216fb5923eb89f2cab7081cb10b2cf75b605/greenlet-3.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:45718441607f9325d948db98cbc691276059316d0358c188c246da4e1d4d23d2", size = 245246, upload-time = "2026-05-20T13:12:22.646Z" }, { url = "https://files.pythonhosted.org/packages/5d/73/d7f72e34b582f694f4a9b248162db7b09cc458a259ba8f0c0bfa1a34ea7d/greenlet-3.5.1-cp315-cp315-macosx_11_0_universal2.whl", hash = "sha256:2baee5ca02031757ffe8cc3d69f0cc0aec7065ce362622da74f32d3bcab1c541", size = 285575, upload-time = "2026-05-20T13:12:07.043Z" }, { url = "https://files.pythonhosted.org/packages/df/59/fa9c6e87dc8ad27a95dabe2f29f372b733d05a8a67470f6c901ed9975655/greenlet-3.5.1-cp315-cp315-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b1ec3274918a81d3ea778b9e75b56b72b33f300edb6cf7f3a7fe1dae56683de", size = 656428, upload-time = "2026-05-20T14:00:12.556Z" }, { url = "https://files.pythonhosted.org/packages/f6/f9/e753408871eaa61dfe35e619cfc67512b036fde99893685d50eea9e07146/greenlet-3.5.1-cp315-cp315-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:111e2390ffffc47d5840b01711dd7fac07d4c09283d0283e7f3264b14e284c64", size = 667064, upload-time = "2026-05-20T14:05:48.662Z" }, - { url = "https://files.pythonhosted.org/packages/dc/74/807a047255bf1e09303627c46dc043dca596b6958a354d904f32ab382005/greenlet-3.5.1-cp315-cp315-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:10a9a1c0bfbc93d41156ffcb90c75fbc05544054faf15dcc1fdf9765f8b607f0", size = 672962, upload-time = "2026-05-20T14:09:15.532Z" }, { url = "https://files.pythonhosted.org/packages/96/27/5565b5b40389f1c7753003a07e21892fda8660926787036d5bc0308b8113/greenlet-3.5.1-cp315-cp315-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e630136e905fe5ff43e86945ae41220b6d1470956a39220e708110ac48d01ea5", size = 665697, upload-time = "2026-05-20T13:14:32.943Z" }, - { url = "https://files.pythonhosted.org/packages/76/32/19d4e13225193c29b13e308015223f7d75fd3d8623d49dd19040d2ce8ec1/greenlet-3.5.1-cp315-cp315-manylinux_2_39_riscv64.whl", hash = "sha256:ef08c1567c78074b22d1a200183d52d04a14df447bf70bcbb6a3507a48e776fc", size = 476047, upload-time = "2026-05-20T14:01:44.39Z" }, { url = "https://files.pythonhosted.org/packages/cf/82/e7de4178c0c2d1c9a5a3be3cc0b33e46a85b3ee4a77c071bf7ad8600e079/greenlet-3.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:975eac34b44a7077ca4d421348455b94f0f518246a7f14bc6d2fdcfe5b584368", size = 1621256, upload-time = "2026-05-20T14:02:31.91Z" }, { url = "https://files.pythonhosted.org/packages/00/10/f2dddcf7dacac17dfc68691809589adad06135eb28930429cf58a6467a2f/greenlet-3.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:9ab3c3a0b2ae6198e67c898dad5215a49f9ae0d0081b3c3ec59f333e39eeca26", size = 1685956, upload-time = "2026-05-20T13:14:42.55Z" }, { url = "https://files.pythonhosted.org/packages/22/17/4a232b32133230ada52f70e9d7f5b65b0caef8772f01849bd8d149e7e4ca/greenlet-3.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:cbfc69be86e10dcfef5b1e6269d1d6926552aa89ee39e1de3353360c1b6989ab", size = 239802, upload-time = "2026-05-20T13:13:15.481Z" }, @@ -439,9 +429,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/57/816d9cff29119da3505b3d6a5e14a8af89006ac36f47f891ff293ee05af1/greenlet-3.5.1-cp315-cp315t-macosx_11_0_universal2.whl", hash = "sha256:a6fdf2433a5441ef9a95464f7c3e674775da1c8c1177fff311cee1acad4626ed", size = 293877, upload-time = "2026-05-20T13:10:19.078Z" }, { url = "https://files.pythonhosted.org/packages/23/a1/59b0a7c7d140ff1a75626680b9a9899b79a9176cab298b394968fb023295/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7546556f0d649f99f6a361098a55f761181bb2ea12ff150bb16d26092ad88244", size = 655333, upload-time = "2026-05-20T14:00:14.758Z" }, { url = "https://files.pythonhosted.org/packages/72/1b/5efe127597625042218939d01855109f352779050768b670b52edcc16a6c/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d5ee3ea898009fa898f85f9982255d35278c477bebe185beca249cab42d4526c", size = 659443, upload-time = "2026-05-20T14:05:50.159Z" }, - { url = "https://files.pythonhosted.org/packages/c9/9d/1dcdf7b95ab3cf8c7b6d7277c18a5e167312f2b362ddfcc5d5e6d8d84b43/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a57b0d05a0448eed231d59c0ceb287dde984551e54cbc51ac2d4865712838e9c", size = 659998, upload-time = "2026-05-20T14:09:16.912Z" }, { url = "https://files.pythonhosted.org/packages/6c/6d/c404246ea4d22d097a7426d0efb5b781bd7eb67715f09e79001bd552ab18/greenlet-3.5.1-cp315-cp315t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5c81f74d204d3edd136ebfd50dce53acbb776995d721a0fe801626cfc93b8cd", size = 658356, upload-time = "2026-05-20T13:14:35.091Z" }, - { url = "https://files.pythonhosted.org/packages/05/7e/c4959664fc231d587d66d8e81f2095e98056ba1954beafdcbe635e251052/greenlet-3.5.1-cp315-cp315t-manylinux_2_39_riscv64.whl", hash = "sha256:b0703c2cef53e01baec47f7a3868009913ad71ec678bbecb42a6f40895e4ce62", size = 494470, upload-time = "2026-05-20T14:01:45.611Z" }, { url = "https://files.pythonhosted.org/packages/51/02/f8ee37fb6d2219329f350af241c27fcf12df57e723d11f6fc6d3bacdadaa/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:2c18ef16bf6d4dd410e4dd52996888ea1497be26892fe5bbc73580aba4287b8e", size = 1619216, upload-time = "2026-05-20T14:02:33.403Z" }, { url = "https://files.pythonhosted.org/packages/93/c5/3dc9475ace2c7a3680da12372cddd7f1ac874eb410a1ac48d3e9dab83782/greenlet-3.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:17d86354f0ae6b61bf9be5148d0dd34e06c3cb7c602c671f79f29ac3b150e659", size = 1678427, upload-time = "2026-05-20T13:14:43.71Z" }, { url = "https://files.pythonhosted.org/packages/df/4e/750c15c317a41ffb36f0bf40b933e3d744a7dede61889f74443ea69690cf/greenlet-3.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:e7516cf6ae6b8a582c2770a0caed47b8a48373ed732c33d69a72913ae6ac923e", size = 245225, upload-time = "2026-05-20T13:13:59.366Z" }, @@ -1010,7 +998,6 @@ source = { editable = "." } dependencies = [ { name = "orm-loader" }, { name = "pandas" }, - { name = "python-dotenv" }, { name = "pyyaml" }, { name = "rich" }, { name = "sqlalchemy" }, @@ -1052,7 +1039,6 @@ requires-dist = [ { name = "psycopg", extras = ["binary"], marker = "extra == 'postgres'", specifier = ">=3.2" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" }, - { name = "python-dotenv", specifier = ">=1.2.2" }, { name = "pyyaml", specifier = ">=6.0" }, { name = "requests", marker = "extra == 'dev'", specifier = ">=2.33.0" }, { name = "rich", specifier = ">=13.0" }, @@ -1419,15 +1405,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] -[[package]] -name = "python-dotenv" -version = "1.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, -] - [[package]] name = "pyyaml" version = "6.0.3" From 88050bddbcb4c42f2c171cc09c69fe24d2a6c8a6 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 05:51:26 +0000 Subject: [PATCH 16/25] Have correct resource that this package provides --- omop_alchemy/maintenance/cli_config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/omop_alchemy/maintenance/cli_config.py b/omop_alchemy/maintenance/cli_config.py index c33697e..8a36409 100644 --- a/omop_alchemy/maintenance/cli_config.py +++ b/omop_alchemy/maintenance/cli_config.py @@ -10,6 +10,8 @@ from sqlalchemy import text from sqlalchemy.exc import SQLAlchemyError +from omop_alchemy.config import CDM_DB_RESOURCE + console = Console() err_console = Console(stderr=True) @@ -46,7 +48,7 @@ def _get_admin_engine(*, yes: bool) -> sa.engine.Engine: suggested_url: str | None = None try: stack = load_stack_config() - resolved = Resolver(stack).resolve_resource("cdm_db") + resolved = Resolver(stack).resolve_resource(CDM_DB_RESOURCE) suggested_url = resolved.primary_db.url except Exception: pass From b6fb46e81788bf040011a9d55ead9932b4884322 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 06:47:35 +0000 Subject: [PATCH 17/25] Remove dotenv references, have CDM utils for building connections for CLIs, alter docs slightly --- docs/api/typing.md | 27 ++++++++++++++++++- mkdocs.yml | 14 ++++++++++ omop_alchemy/config.py | 12 ++++++++- omop_alchemy/maintenance/_cli_utils.py | 17 ++++-------- omop_alchemy/maintenance/cli_foreign_keys.py | 16 +++++------ omop_alchemy/maintenance/cli_schema_doctor.py | 10 ++----- omop_alchemy/maintenance/cli_vocab.py | 3 ++- omop_alchemy/maintenance/ui.py | 4 +-- tests/test_foreign_keys.py | 6 ++--- tests/test_fulltext.py | 2 +- tests/test_indexes.py | 2 +- tests/test_load_vocab_source.py | 4 +-- tests/test_truncate_tables.py | 4 +-- 13 files changed, 78 insertions(+), 43 deletions(-) diff --git a/docs/api/typing.md b/docs/api/typing.md index dca5a23..40e00dc 100644 --- a/docs/api/typing.md +++ b/docs/api/typing.md @@ -19,6 +19,10 @@ These live in two modules: Satisfied by any object with an integer `concept_id` attribute. ::: omop_alchemy.cdm.base.typing.HasConceptId + options: + heading_level: 4 + show_root_heading: false + members: true --- @@ -27,6 +31,10 @@ Satisfied by any object with an integer `concept_id` attribute. Satisfied by any object with an integer `person_id` attribute. ::: omop_alchemy.cdm.base.typing.HasPersonId + options: + heading_level: 4 + show_root_heading: false + members: true --- @@ -35,6 +43,10 @@ Satisfied by any object with an integer `person_id` attribute. Satisfied by any object with an integer `episode_id` attribute. ::: omop_alchemy.cdm.base.typing.HasEpisodeId + options: + heading_level: 4 + show_root_heading: false + members: true --- @@ -45,6 +57,10 @@ satisfies this protocol if it has `__tablename__`, `__mapper__`, `__expected_dom and a `collect_domain_rules()` classmethod. ::: omop_alchemy.cdm.base.typing.DomainSemanticTable + options: + heading_level: 4 + show_root_heading: false + members: true --- @@ -60,6 +76,10 @@ utilities that operate across multiple CDM tables. structural interface; the mixin there is the implementation. ::: omop_alchemy.cdm.base.typing.ClinicalEvent + options: + heading_level: 4 + show_root_heading: false + members: true --- @@ -68,7 +88,8 @@ utilities that operate across multiple CDM tables. Protocol for objects that can look up whether a set of concept IDs are standard. ::: omop_alchemy.cdm.base.typing.ConceptResolver - + options: + heading_level: 4 --- ## Typed row containers (`cdm.model.typing`) @@ -79,3 +100,7 @@ A frozen dataclass representing the core fields of a concept lookup row. Used wh lightweight, hashable concept record is preferable to a full ORM object. ::: omop_alchemy.cdm.model.typing.ConceptRow + options: + heading_level: 4 + show_root_heading: false + members: true diff --git a/mkdocs.yml b/mkdocs.yml index 62ed7af..1b71f81 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,6 +13,19 @@ theme: - navigation.top - content.code.copy - content.code.annotate + palette: + - scheme: default + primary: indigo + accent: blue + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: indigo + accent: light-blue + toggle: + icon: material/brightness-4 + name: Switch to light mode markdown_extensions: - admonition @@ -28,6 +41,7 @@ markdown_extensions: format: !!python/name:mermaid2.fence_mermaid - toc: permalink: true + toc_depth: 3 extra_javascript: - https://unpkg.com/mermaid@10/dist/mermaid.min.js diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index 11a59cb..805d2f3 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -4,7 +4,7 @@ from typing import ClassVar, Final from pydantic import Field -from oa_configurator import PackageConfigBase, ResourceSpec, Resolver, load_stack_config +from oa_configurator import PackageConfigBase, ResourceSpec, Resolver, ResolvedResource, load_stack_config ROOT_PATH = Path(__file__).parent TEST_PATH = Path(__file__).parent.parent / "tests" @@ -36,3 +36,13 @@ def get_resolver() -> Resolver: def get_config() -> OmopAlchemyConfig: return OmopAlchemyConfig.from_stack(load_stack_config()) + + +def get_cdm_context() -> tuple[OmopAlchemyConfig, ResolvedResource]: + """Load config once and return (pkg_config, resolved_cdm_resource).""" + stack = load_stack_config() + pkg_config = OmopAlchemyConfig.from_stack(stack) + tool = stack.tools.get(OmopAlchemyConfig.tool_name) + resource_name = (tool.default_resource if tool else None) or CDM_DB_RESOURCE + resolved = Resolver(stack).resolve_resource(resource_name) + return pkg_config, resolved diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index 9ab354f..6e323fd 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -10,8 +10,6 @@ import typer from sqlalchemy.exc import SQLAlchemyError -from oa_configurator import Resolver, load_stack_config - from .tables import TableScope from .ui import console, render_error, render_command_header from ..backends import BackendNotSupportedError @@ -24,8 +22,7 @@ class _ConnContext: """Connection context derived from the oa_configurator resolved resource.""" db_schema: str | None - engine_schema: str = "default" - dotenv: None = None # kept so existing conn.dotenv references compile + engine_url: str = "" athena_source: str | None = None # from OmopAlchemyConfig.athena_source_path @@ -53,22 +50,18 @@ def wrapper(**kwargs: Any) -> Any: _vocab = kwargs.get("vocabulary_included", vocabulary_included) _mode = mode_label if mode_label is not None else ("dry-run" if _dry_run else "apply") try: - from ..config import OmopAlchemyConfig, get_config - stack = load_stack_config() - pkg_config = get_config() # validates required_resources — raises ConfigurationError if missing - tool = stack.tools.get("omop_alchemy") - resource_name = (tool.default_resource if tool else None) or OmopAlchemyConfig.required_resources[0] - resolver = Resolver(stack) - resolved = resolver.resolve_resource(resource_name) + from ..config import get_cdm_context + pkg_config, resolved = get_cdm_context() engine = resolved.create_engine() conn = _ConnContext( db_schema=resolved.cdm_schema, + engine_url=engine.url.render_as_string(hide_password=True), athena_source=pkg_config.athena_source_path, ) console.print( render_command_header( command_name=command_name, - engine_schema=conn.engine_schema, + engine_url=conn.engine_url, db_schema=conn.db_schema, vocabulary_included=_vocab, mode_label=_mode, diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index e696e12..d245a5a 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -7,8 +7,6 @@ import sqlalchemy as sa import typer -from oa_configurator import Resolver, load_stack_config -from omop_alchemy.config import OmopAlchemyConfig from ._cli_utils import _ConnContext from ..backends import Backend, resolve_backend, require_backend_support, backend_support_note from ._cli_utils import handle_error, omop_command @@ -494,20 +492,20 @@ def enable_foreign_keys_command( ) -> None: """Re-enable PostgreSQL RI trigger enforcement. Use --strict to abort if any violations exist first.""" try: - stack = load_stack_config() - tool = stack.tools.get("omop_alchemy") - resource_name = (tool.default_resource if tool else None) or OmopAlchemyConfig.required_resources[0] - resolver = Resolver(stack) - resolved = resolver.resolve_resource(resource_name) - conn = _ConnContext(db_schema=resolved.cdm_schema) + from omop_alchemy.config import get_cdm_context + pkg_config, resolved = get_cdm_context() engine = resolved.create_engine() + conn = _ConnContext( + db_schema=resolved.cdm_schema, + engine_url=engine.url.render_as_string(hide_password=True), + ) except Exception as exc: handle_error(exc) return console.print( render_command_header( command_name="foreign-keys enable --strict" if strict else "foreign-keys enable", - engine_schema=conn.engine_schema, + engine_url=conn.engine_url, db_schema=conn.db_schema, vocabulary_included=vocabulary_included, mode_label="dry-run" if dry_run else "apply", diff --git a/omop_alchemy/maintenance/cli_schema_doctor.py b/omop_alchemy/maintenance/cli_schema_doctor.py index 0044476..55d8f20 100644 --- a/omop_alchemy/maintenance/cli_schema_doctor.py +++ b/omop_alchemy/maintenance/cli_schema_doctor.py @@ -4,10 +4,7 @@ from dataclasses import dataclass -from oa_configurator import Resolver, load_stack_config -from omop_alchemy.config import OmopAlchemyConfig from omop_alchemy.backends.resolve import SupportedDialect -from omop_alchemy.db import create_engine_with_dependencies from .cli_foreign_keys import ( ForeignKeyStatusResult, @@ -179,11 +176,8 @@ def collect_doctor_report( foreign_key_validation: ForeignKeyValidationReport | None = None if info.connection_ready: - stack = load_stack_config() - tool = stack.tools.get("omop_alchemy") - resource_name = (tool.default_resource if tool else None) or OmopAlchemyConfig.required_resources[0] - resolver = Resolver(stack) - resolved = resolver.resolve_resource(resource_name) + from omop_alchemy.config import get_cdm_context + _, resolved = get_cdm_context() engine = resolved.create_engine() db_schema = resolved.cdm_schema try: diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 99e2489..6c9d85f 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -11,6 +11,7 @@ from enum import StrEnum import sqlalchemy as sa import sqlalchemy.orm as so +import sqlalchemy.event as sae from sqlalchemy.exc import OperationalError import typer from sqlalchemy.pool import NullPool @@ -294,7 +295,7 @@ def load_vocab_source( # new connection via an engine-level connect event so COPY and raw-cursor # operations always target the right schema. _quoted_schema = '"' + db_schema.replace('"', '""') + '"' - @sa.event.listens_for(load_engine, "connect") + @sae.listens_for(load_engine, "connect") def _set_search_path(dbapi_conn, _record): cur = dbapi_conn.cursor() cur.execute(f"SET search_path TO {_quoted_schema}") diff --git a/omop_alchemy/maintenance/ui.py b/omop_alchemy/maintenance/ui.py index 4f67cc8..1e6380b 100644 --- a/omop_alchemy/maintenance/ui.py +++ b/omop_alchemy/maintenance/ui.py @@ -96,7 +96,7 @@ def _category_label(category: TableCategory) -> Text: def render_command_header( *, command_name: str, - engine_schema: str | None, + engine_url: str | None, db_schema: str | None, vocabulary_included: bool | None, mode_label: str, @@ -109,7 +109,7 @@ def render_command_header( grid.add_column(style="bold cyan") grid.add_column() grid.add_row("Command", command_name) - grid.add_row("Engine", engine_schema or "default ENGINE") + grid.add_row("Connection", engine_url or "(none)") grid.add_row("DB schema", db_schema or "default search_path") if vocabulary_included is not None: grid.add_row("Vocabulary", _bool_label(vocabulary_included)) diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py index 7b289d9..affe23c 100644 --- a/tests/test_foreign_keys.py +++ b/tests/test_foreign_keys.py @@ -79,7 +79,7 @@ def test_disable_foreign_keys_cli_fails_gracefully_for_sqlite(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( @@ -254,7 +254,7 @@ def test_enable_foreign_keys_strict_cli_invokes_strict_management(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance.cli_foreign_keys.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( @@ -367,7 +367,7 @@ def test_foreign_keys_validate_cli_invokes_validation(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( diff --git a/tests/test_fulltext.py b/tests/test_fulltext.py index da66a7f..344c2d0 100644 --- a/tests/test_fulltext.py +++ b/tests/test_fulltext.py @@ -207,7 +207,7 @@ def test_fulltext_install_cli_passes_options(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "public"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( diff --git a/tests/test_indexes.py b/tests/test_indexes.py index e7f5736..1e32419 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -125,7 +125,7 @@ def test_disable_indexes_cli_invokes_management(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index 8afe0f2..8284025 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -164,7 +164,7 @@ def test_load_vocab_source_cli_uses_configured_athena_source(monkeypatch, tmp_pa ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( @@ -432,7 +432,7 @@ def test_load_vocab_source_cli_surfaces_database_error_detail(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( diff --git a/tests/test_truncate_tables.py b/tests/test_truncate_tables.py index 53ee2a2..ded3d1c 100644 --- a/tests/test_truncate_tables.py +++ b/tests/test_truncate_tables.py @@ -47,7 +47,7 @@ def test_truncate_tables_cli_requires_confirmation(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( @@ -71,7 +71,7 @@ def test_truncate_tables_cli_invokes_management(monkeypatch): resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, ) monkeypatch.setattr( - "omop_alchemy.maintenance._cli_utils.load_stack_config", + "omop_alchemy.config.load_stack_config", lambda: cfg, ) monkeypatch.setattr( From e0f3837d0fbcbb7c58989d1b7e1276271c4f42f8 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 06:51:44 +0000 Subject: [PATCH 18/25] Global TOOL_NAME param for re-usability --- docs/cli/index.md | 3 ++- omop_alchemy/__init__.py | 4 +--- omop_alchemy/config.py | 12 +++++++----- omop_alchemy/logger_config.py | 5 ++--- omop_alchemy/maintenance/cli.py | 2 +- tests/test_load_vocab_source.py | 3 ++- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/cli/index.md b/docs/cli/index.md index 376b0c5..510e2c4 100644 --- a/docs/cli/index.md +++ b/docs/cli/index.md @@ -61,9 +61,10 @@ When a decorated command is invoked: Without the decorator, every command would need this boilerplate: ```python +from omop_alchemy.config import TOOL_NAME def my_command() -> None: stack = load_stack_config() - tool = stack.tools.get("omop_alchemy") + tool = stack.tools.get(TOOL_NAME) resource_name = (tool.default_resource if tool else None) or "cdm_db" resolved = Resolver(stack).resolve_resource(resource_name) engine = resolved.create_engine() diff --git a/omop_alchemy/__init__.py b/omop_alchemy/__init__.py index 947e793..26f2212 100644 --- a/omop_alchemy/__init__.py +++ b/omop_alchemy/__init__.py @@ -1,4 +1,4 @@ -from .config import CDM_DB_RESOURCE, ROOT_PATH, TEST_PATH, OmopAlchemyConfig, get_resolver, get_config +from .config import CDM_DB_RESOURCE, OmopAlchemyConfig, get_resolver, get_config from .db import create_engine_with_dependencies @@ -8,6 +8,4 @@ "create_engine_with_dependencies", "get_config", "get_resolver", - "ROOT_PATH", - "TEST_PATH", ] diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index 805d2f3..e642031 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -1,19 +1,17 @@ from __future__ import annotations -from pathlib import Path from typing import ClassVar, Final from pydantic import Field from oa_configurator import PackageConfigBase, ResourceSpec, Resolver, ResolvedResource, load_stack_config - -ROOT_PATH = Path(__file__).parent -TEST_PATH = Path(__file__).parent.parent / "tests" +from oa_configurator import configure_logging as _configure_logging CDM_DB_RESOURCE: Final[str] = "cdm_db" +TOOL_NAME: Final[str] = "omop_alchemy" class OmopAlchemyConfig(PackageConfigBase): - tool_name: ClassVar[str] = "omop_alchemy" + tool_name: ClassVar[str] = TOOL_NAME required_resources: ClassVar[tuple[str, ...]] = (CDM_DB_RESOURCE,) owned_resources: ClassVar[tuple[ResourceSpec, ...]] = ( ResourceSpec( @@ -46,3 +44,7 @@ def get_cdm_context() -> tuple[OmopAlchemyConfig, ResolvedResource]: resource_name = (tool.default_resource if tool else None) or CDM_DB_RESOURCE resolved = Resolver(stack).resolve_resource(resource_name) return pkg_config, resolved + + +def configure_logging(verbosity: int = 0) -> None: + _configure_logging(verbosity=verbosity, extra_namespaces=[TOOL_NAME]) \ No newline at end of file diff --git a/omop_alchemy/logger_config.py b/omop_alchemy/logger_config.py index 36870f7..fd40910 100644 --- a/omop_alchemy/logger_config.py +++ b/omop_alchemy/logger_config.py @@ -1,5 +1,4 @@ -from oa_configurator import configure_logging as _configure_logging -def configure_logging(verbosity: int = 0) -> None: - _configure_logging(verbosity=verbosity, extra_namespaces=["omop_alchemy"]) + + diff --git a/omop_alchemy/maintenance/cli.py b/omop_alchemy/maintenance/cli.py index d8aaa13..bfc3887 100644 --- a/omop_alchemy/maintenance/cli.py +++ b/omop_alchemy/maintenance/cli.py @@ -16,7 +16,7 @@ cli_tables as tables, cli_vocab as vocab, ) -from ..logger_config import configure_logging +from ..config import configure_logging from .help import install_help_customizations install_help_customizations() diff --git a/tests/test_load_vocab_source.py b/tests/test_load_vocab_source.py index 8284025..529f37d 100644 --- a/tests/test_load_vocab_source.py +++ b/tests/test_load_vocab_source.py @@ -14,6 +14,7 @@ load_vocab_source, ) from omop_alchemy.cdm.model.vocabulary import Drug_Strength +from omop_alchemy.config import TOOL_NAME runner = CliRunner() @@ -160,7 +161,7 @@ def test_load_vocab_source_cli_uses_configured_athena_source(monkeypatch, tmp_pa cfg = StackConfig.for_session( connections={"db": {"dialect": "sqlite", "database": ":memory:"}}, resources={"cdm_db": {"primary_db": "db", "cdm_schema": "main"}}, - tools={"omop_alchemy": {"extra": {"athena_source_path": str(athena_dir)}}}, + tools={TOOL_NAME: {"extra": {"athena_source_path": str(athena_dir)}}}, ) monkeypatch.setattr( From d8394c4163876f3e142157e382665b2e9483a47c Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 06:55:49 +0000 Subject: [PATCH 19/25] Remove logger_config.py since it has been absorbed by the respective config.py --- omop_alchemy/logger_config.py | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 omop_alchemy/logger_config.py diff --git a/omop_alchemy/logger_config.py b/omop_alchemy/logger_config.py deleted file mode 100644 index fd40910..0000000 --- a/omop_alchemy/logger_config.py +++ /dev/null @@ -1,4 +0,0 @@ - - - - From 8c7eb1ef203df0ec4a5eca8a7b25629c363268ca Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 28 May 2026 07:52:41 +0000 Subject: [PATCH 20/25] Absorb the db into the config to overwrite the engine configuration --- omop_alchemy/__init__.py | 2 - omop_alchemy/config.py | 40 ++++++++++++++ omop_alchemy/db.py | 55 ------------------- omop_alchemy/maintenance/_cli_utils.py | 4 +- omop_alchemy/maintenance/cli_foreign_keys.py | 4 +- omop_alchemy/maintenance/cli_schema_doctor.py | 4 +- omop_alchemy/maintenance/cli_schema_info.py | 3 +- tests/test_config_driver.py | 34 +++++++----- 8 files changed, 67 insertions(+), 79 deletions(-) delete mode 100644 omop_alchemy/db.py diff --git a/omop_alchemy/__init__.py b/omop_alchemy/__init__.py index 26f2212..d4b9854 100644 --- a/omop_alchemy/__init__.py +++ b/omop_alchemy/__init__.py @@ -1,11 +1,9 @@ from .config import CDM_DB_RESOURCE, OmopAlchemyConfig, get_resolver, get_config -from .db import create_engine_with_dependencies __all__ = [ "CDM_DB_RESOURCE", "OmopAlchemyConfig", - "create_engine_with_dependencies", "get_config", "get_resolver", ] diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index e642031..a5abdac 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -2,6 +2,7 @@ from typing import ClassVar, Final +import sqlalchemy as sa from pydantic import Field from oa_configurator import PackageConfigBase, ResourceSpec, Resolver, ResolvedResource, load_stack_config from oa_configurator import configure_logging as _configure_logging @@ -9,6 +10,34 @@ CDM_DB_RESOURCE: Final[str] = "cdm_db" TOOL_NAME: Final[str] = "omop_alchemy" +# Mapping of PostgreSQL SQLAlchemy drivernames to the Python module they require. +# Kept here (not in oa_configurator) because the driver choice and install instructions +# are OMOP_Alchemy-specific — orm-loader ≥ 0.4.0 dropped the implicit psycopg2 dependency. +_POSTGRES_DRIVER_MODULES: dict[str, str] = { + "postgresql": "psycopg", + "postgresql+psycopg": "psycopg", + "postgresql+psycopg2": "psycopg2", +} + + +def _missing_driver_message(url: str, exc: ModuleNotFoundError) -> str | None: + """Return an install hint if exc is a missing PostgreSQL driver, else None.""" + drivername = sa.engine.make_url(url).drivername + expected = _POSTGRES_DRIVER_MODULES.get(drivername) + if expected is None: + return None + missing = exc.name + if missing is None and expected in str(exc): + missing = expected + if missing != expected: + return None + return ( + f"Database driver '{expected}' is required for dialect '{drivername}' " + "but is not installed. " + "Install PostgreSQL support with " + "`uv sync --extra postgres` or `pip install -e '.[postgres]'`." + ) + class OmopAlchemyConfig(PackageConfigBase): tool_name: ClassVar[str] = TOOL_NAME @@ -46,5 +75,16 @@ def get_cdm_context() -> tuple[OmopAlchemyConfig, ResolvedResource]: return pkg_config, resolved +def create_cdm_engine(resolved: ResolvedResource) -> sa.Engine: + """Create the CDM SQLAlchemy engine with helpful PostgreSQL driver error messages.""" + try: + return resolved.create_engine() + except ModuleNotFoundError as exc: + msg = _missing_driver_message(resolved.primary_db.url, exc) + if msg is not None: + raise RuntimeError(msg) from exc + raise + + def configure_logging(verbosity: int = 0) -> None: _configure_logging(verbosity=verbosity, extra_namespaces=[TOOL_NAME]) \ No newline at end of file diff --git a/omop_alchemy/db.py b/omop_alchemy/db.py deleted file mode 100644 index dc97d1e..0000000 --- a/omop_alchemy/db.py +++ /dev/null @@ -1,55 +0,0 @@ -from __future__ import annotations - -from collections.abc import Mapping - -import sqlalchemy as sa -from sqlalchemy.engine import Engine - - -def _missing_driver_message( - engine_name: str, - exc: ModuleNotFoundError, -) -> str | None: - drivername = sa.engine.make_url(engine_name).drivername - expected_module = POSTGRES_DRIVER_MODULES.get(drivername) - if expected_module is None: - return None - - missing_module = exc.name - if missing_module is None and expected_module in str(exc): - missing_module = expected_module - - if missing_module != expected_module: - return None - - return ( - f"Database driver '{expected_module}' is required for engine " - f"'{drivername}' but is not installed. " - "Install PostgreSQL support with " - "`uv sync --extra postgres` " - "or " - "`pip install -e '.[postgres]'`." - ) - - -def create_engine_with_dependencies( - engine_name: str, - **engine_kwargs, -) -> sa.Engine: - """Create a SQLAlchemy engine with clearer dependency errors for postgres.""" - try: - return sa.create_engine(engine_name, **engine_kwargs) - except ModuleNotFoundError as exc: - message = _missing_driver_message(engine_name, exc) - if message is not None: - raise RuntimeError(message) from exc - raise - - -# from orm-loader 0.4.0 onwards, implicit psycopg2 dependency has been removed in favor of explicit driver modules. -# This mapping is used to provide clearer error messages when a required driver is missing. -POSTGRES_DRIVER_MODULES: Mapping[str, str] = { - "postgresql": "psycopg", # bare URL aliased to psycopg - "postgresql+psycopg": "psycopg", - "postgresql+psycopg2": "psycopg2", # retained so missing-driver message is clear -} diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index 6e323fd..1d0d353 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -50,9 +50,9 @@ def wrapper(**kwargs: Any) -> Any: _vocab = kwargs.get("vocabulary_included", vocabulary_included) _mode = mode_label if mode_label is not None else ("dry-run" if _dry_run else "apply") try: - from ..config import get_cdm_context + from ..config import create_cdm_engine, get_cdm_context pkg_config, resolved = get_cdm_context() - engine = resolved.create_engine() + engine = create_cdm_engine(resolved) conn = _ConnContext( db_schema=resolved.cdm_schema, engine_url=engine.url.render_as_string(hide_password=True), diff --git a/omop_alchemy/maintenance/cli_foreign_keys.py b/omop_alchemy/maintenance/cli_foreign_keys.py index d245a5a..5f735c7 100644 --- a/omop_alchemy/maintenance/cli_foreign_keys.py +++ b/omop_alchemy/maintenance/cli_foreign_keys.py @@ -492,9 +492,9 @@ def enable_foreign_keys_command( ) -> None: """Re-enable PostgreSQL RI trigger enforcement. Use --strict to abort if any violations exist first.""" try: - from omop_alchemy.config import get_cdm_context + from omop_alchemy.config import create_cdm_engine, get_cdm_context pkg_config, resolved = get_cdm_context() - engine = resolved.create_engine() + engine = create_cdm_engine(resolved) conn = _ConnContext( db_schema=resolved.cdm_schema, engine_url=engine.url.render_as_string(hide_password=True), diff --git a/omop_alchemy/maintenance/cli_schema_doctor.py b/omop_alchemy/maintenance/cli_schema_doctor.py index 55d8f20..278062e 100644 --- a/omop_alchemy/maintenance/cli_schema_doctor.py +++ b/omop_alchemy/maintenance/cli_schema_doctor.py @@ -176,9 +176,9 @@ def collect_doctor_report( foreign_key_validation: ForeignKeyValidationReport | None = None if info.connection_ready: - from omop_alchemy.config import get_cdm_context + from omop_alchemy.config import create_cdm_engine, get_cdm_context _, resolved = get_cdm_context() - engine = resolved.create_engine() + engine = create_cdm_engine(resolved) db_schema = resolved.cdm_schema try: missing_table_count = info.missing_table_count or 0 diff --git a/omop_alchemy/maintenance/cli_schema_info.py b/omop_alchemy/maintenance/cli_schema_info.py index 0a8cd5b..eb6e5f9 100644 --- a/omop_alchemy/maintenance/cli_schema_info.py +++ b/omop_alchemy/maintenance/cli_schema_info.py @@ -347,7 +347,8 @@ def collect_maintenance_info( raw_url = sa.engine.make_url(resolved.primary_db.url) engine_url = raw_url.render_as_string(hide_password=True) backend = raw_url.get_backend_name() - engine = resolved.create_engine() + from omop_alchemy.config import create_cdm_engine + engine = create_cdm_engine(resolved) engine_created = True except RuntimeError as exc: engine_error = str(exc) diff --git a/tests/test_config_driver.py b/tests/test_config_driver.py index 7b3d517..5cf7a0a 100644 --- a/tests/test_config_driver.py +++ b/tests/test_config_driver.py @@ -1,16 +1,16 @@ """ -Tests for omop_alchemy.config driver-selection logic. +Tests for driver-selection logic in omop_alchemy.config. These tests do not require a database; they exercise the driver-mapping -constants, _missing_driver_message(), and create_engine_with_dependencies() +constants, _missing_driver_message(), and create_cdm_engine() using mock exceptions to simulate missing packages. """ import pytest -from omop_alchemy.db import ( - POSTGRES_DRIVER_MODULES, +from omop_alchemy.config import ( + _POSTGRES_DRIVER_MODULES as POSTGRES_DRIVER_MODULES, _missing_driver_message, - create_engine_with_dependencies, + create_cdm_engine, ) @@ -88,21 +88,25 @@ def test_missing_driver_message_returns_none_for_sqlite_url(): # --------------------------------------------------------------------------- def test_sqlite_url_not_intercepted(): - """create_engine_with_dependencies should work for sqlite without wrapping errors.""" - engine = create_engine_with_dependencies("sqlite:///:memory:", future=True) + """create_cdm_engine should work for sqlite without wrapping errors.""" + from oa_configurator.resolver import ResolvedDatabaseTarget + target = ResolvedDatabaseTarget(name="test", url="sqlite:///:memory:", safe_url="sqlite:///:memory:") + from unittest.mock import MagicMock + resolved = MagicMock() + resolved.create_engine.return_value = target.create_engine() + resolved.primary_db.url = "sqlite:///:memory:" + engine = create_cdm_engine(resolved) engine.dispose() def test_create_engine_raises_runtime_for_missing_postgres_driver(monkeypatch): - """When psycopg is missing, create_engine_with_dependencies raises RuntimeError with install hint.""" - import sqlalchemy as sa - + """When psycopg is missing, create_cdm_engine raises RuntimeError with install hint.""" + from unittest.mock import MagicMock exc = _make_module_not_found("psycopg") - def raising_create_engine(url, **kwargs): - raise exc - - monkeypatch.setattr(sa, "create_engine", raising_create_engine) + resolved = MagicMock() + resolved.create_engine.side_effect = exc + resolved.primary_db.url = "postgresql+psycopg://host/db" with pytest.raises(RuntimeError, match="psycopg"): - create_engine_with_dependencies("postgresql+psycopg://host/db") + create_cdm_engine(resolved) From 97c5ae739ee146c4b901d92d3d501199f7243ff7 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 3 Jun 2026 00:09:18 +0000 Subject: [PATCH 21/25] Remove legacy artifact --- .omop-maint.toml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .omop-maint.toml diff --git a/.omop-maint.toml b/.omop-maint.toml deleted file mode 100644 index 6c68aad..0000000 --- a/.omop-maint.toml +++ /dev/null @@ -1,5 +0,0 @@ -[defaults] -dotenv = ".env" -engine_schema = "cdm" -athena_source = "docker/data" -logging = "file" From 47de848c95de1e11c0a0c4ed7f052afcb92d061c Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 3 Jun 2026 00:10:05 +0000 Subject: [PATCH 22/25] Additional docs for CDM context fn --- omop_alchemy/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/omop_alchemy/config.py b/omop_alchemy/config.py index a5abdac..2651e0d 100644 --- a/omop_alchemy/config.py +++ b/omop_alchemy/config.py @@ -66,7 +66,11 @@ def get_config() -> OmopAlchemyConfig: def get_cdm_context() -> tuple[OmopAlchemyConfig, ResolvedResource]: - """Load config once and return (pkg_config, resolved_cdm_resource).""" + """Return (pkg_config, resolved_cdm_resource), loading config once. + + The resource is taken from tools.omop_alchemy.default_resource when set; + otherwise falls back to the canonical CDM_DB_RESOURCE alias ("cdm_db"). + """ stack = load_stack_config() pkg_config = OmopAlchemyConfig.from_stack(stack) tool = stack.tools.get(OmopAlchemyConfig.tool_name) From b7ddad9bfe066b0aec24ab10392ddbdad6ab2d58 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 4 Jun 2026 00:14:14 +0000 Subject: [PATCH 23/25] Updated with docker-compose --- .env.example | 5 +++ Dockerfile | 3 ++ README.md | 41 ++++++++++++++++++++++- docker-compose.yaml | 48 +++++++++++++++++++++++++++ docs/getting-started/configuration.md | 36 ++++++++++++++++++++ 5 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 .env.example create mode 100644 Dockerfile create mode 100644 docker-compose.yaml diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e222298 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +# Copy to .env and edit to override defaults. All vars below are optional; +# docker-compose uses the shown defaults when .env is absent. +OMOP_CDM_DB_USER=omop +OMOP_CDM_DB_PASSWORD=omop +OMOP_CDM_DB_NAME=omop_cdm diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d20e252 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,3 @@ +FROM python:3.12-slim +RUN pip install --no-cache-dir ".[postgres]" +WORKDIR /workspace diff --git a/README.md b/README.md index f36b866..8ce21db 100644 --- a/README.md +++ b/README.md @@ -76,4 +76,43 @@ The API is stabilising, but some modules may change as real-world use cases expa ### Some additional background This work builds on earlier research and tooling presented at the 2023 OHDSI APAC Symposium -> see [background paper](https://github.com/AustralianCancerDataNetwork/OMOP_Alchemy/blob/main/notebooks/ORMforResearchReadyData_APAC2023.pdf). \ No newline at end of file +> see [background paper](https://github.com/AustralianCancerDataNetwork/OMOP_Alchemy/blob/main/notebooks/ORMforResearchReadyData_APAC2023.pdf). + +--- + +## Configuration + +OMOP Alchemy reads all database connection and schema settings from +[oa-configurator](https://github.com/AustralianCancerDataNetwork/oa-configurator). +No `.env` files or `ENGINE` environment variables are needed. + +Run once after installation: + +```bash +omop-config init +omop-config configure omop_alchemy +``` + +See [Configuration](docs/getting-started/configuration.md) for full details. + +--- + +## Docker Compose + +The included `docker-compose.yaml` provides a PostgreSQL database and a Python +container with the `[postgres]` extra pre-installed. Default credentials work out of the box: + +```bash +docker compose up +``` + +The `python-alchemy` service runs `omop-config configure` at startup and writes +`~/.config/omop/config.toml` on the host on first start; subsequent starts skip +configuration automatically. + +To override credentials, copy `.env.example` to `.env` and edit before starting: + +```bash +cp .env.example .env +docker compose up +``` \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..a8e8267 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,48 @@ +services: + omop-cdm-db: + image: postgres:16-alpine + restart: always + environment: + POSTGRES_USER: ${OMOP_CDM_DB_USER:-omop} + POSTGRES_PASSWORD: ${OMOP_CDM_DB_PASSWORD:-omop} + POSTGRES_DB: ${OMOP_CDM_DB_NAME:-omop_cdm} + PGDATA: /var/lib/postgresql/data/pgdata + volumes: + - db_data:/var/lib/postgresql/data + ports: + - "5432:5432" + networks: + - omop-net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${OMOP_CDM_DB_USER:-omop}"] + interval: 5s + timeout: 5s + retries: 5 + + python-alchemy: + build: . + restart: unless-stopped + depends_on: + omop-cdm-db: + condition: service_healthy + volumes: + - ${HOME}/.config/omop:/root/.config/omop + networks: + - omop-net + command: > + bash -c " + omop-config configure omop_alchemy --skip-if-configured + --conn-name cdm --dialect postgresql+psycopg + --host omop-cdm-db --port 5432 + --user ${OMOP_CDM_DB_USER:-omop} + --password ${OMOP_CDM_DB_PASSWORD:-omop} + --database ${OMOP_CDM_DB_NAME:-omop_cdm} --cdm-schema omop && + sleep infinity + " + +networks: + omop-net: + name: omop-net + +volumes: + db_data: diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 74c09dd..611b61e 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -60,6 +60,42 @@ omop-alchemy info This prints the resolved config file path, connection details, and schema. A successful run confirms that OMOP_Alchemy can reach your database. +## Docker Compose + +The included `docker-compose.yaml` spins up a PostgreSQL database and a `python-alchemy` +container. Default credentials work out of the box — no additional setup needed: + +```bash +docker compose up +``` + +The `python-alchemy` container runs `omop-config configure omop_alchemy` automatically at +startup. Your `~/.config/omop/config.toml` on the host is written on first start and +skipped on subsequent starts (`--skip-if-configured` flag makes this idempotent). + +### Overriding default values + +The compose file uses built-in defaults for all database credentials. To use different +values, create a `.env` file in this directory with any of the following variables: + +| Variable | Default | Description | +|---|---|---| +| `OMOP_CDM_DB_USER` | `omop` | CDM database username | +| `OMOP_CDM_DB_PASSWORD` | `omop` | CDM database password | +| `OMOP_CDM_DB_NAME` | `omop_cdm` | CDM database name | + +Copy the example and edit as needed: + +```bash +cp .env.example .env +# edit .env +docker compose up +``` + +The `.env` file is only read by Docker Compose for variable substitution — it is not +loaded by OMOP_Alchemy at runtime. + ## Further reading - [oa_configurator quickstart](https://AustralianCancerDataNetwork.github.io/oa-configurator/) — full config reference, multiple profiles, env var export +- [oa_configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/) — Docker Compose details and multi-package setups From bb60d561d16f7a31e37311718e068a5828d71a94 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 4 Jun 2026 05:22:22 +0000 Subject: [PATCH 24/25] Add schema for CDM tables --- docs/getting-started/configuration.md | 17 +++++++++++++++++ omop_alchemy/maintenance/_cli_utils.py | 14 ++++++++++++++ omop_alchemy/maintenance/cli_schema_tables.py | 3 +++ omop_alchemy/maintenance/cli_vocab.py | 5 ++++- 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 611b61e..e3e1efd 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -95,6 +95,23 @@ docker compose up The `.env` file is only read by Docker Compose for variable substitution — it is not loaded by OMOP_Alchemy at runtime. +## Multiple instances + +To configure a second CDM database (e.g. for production), use `--resource-name`: + +```bash +omop-config configure omop_alchemy --resource-name cdm_db_prod +``` + +This creates `cdm_db_prod` without touching the existing `cdm_db`. Re-run without +`--resource-name` afterwards to select which one omop_alchemy uses by default: + +```bash +omop-config configure omop_alchemy # prompts for default_resource +``` + +See the [oa-configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/#multiple-environments) for the full multi-environment guide. + ## Further reading - [oa_configurator quickstart](https://AustralianCancerDataNetwork.github.io/oa-configurator/) — full config reference, multiple profiles, env var export diff --git a/omop_alchemy/maintenance/_cli_utils.py b/omop_alchemy/maintenance/_cli_utils.py index 1d0d353..a1b8f3f 100644 --- a/omop_alchemy/maintenance/_cli_utils.py +++ b/omop_alchemy/maintenance/_cli_utils.py @@ -7,6 +7,7 @@ from dataclasses import dataclass from typing import Any, Callable, TypeVar +import sqlalchemy as sa import typer from sqlalchemy.exc import SQLAlchemyError @@ -103,6 +104,19 @@ def wrapper(**kwargs: Any) -> Any: # ── Helpers ─────────────────────────────────────────────────────────────────── +def ensure_schema(engine: sa.Engine, schema: str | None) -> None: + """Create the schema in the database if it does not already exist. + + No-op when schema is None or ``"public"`` (PostgreSQL's default schema + always exists and cannot be created via ``CREATE SCHEMA``). + """ + if not schema or schema == "public": + return + with engine.connect() as conn: + conn.execute(sa.text(f'CREATE SCHEMA IF NOT EXISTS "{schema}"')) + conn.commit() + + def handle_error(exc: Exception) -> None: if isinstance(exc, BackendNotSupportedError): console.print(render_error(f"Not supported: {exc}")) diff --git a/omop_alchemy/maintenance/cli_schema_tables.py b/omop_alchemy/maintenance/cli_schema_tables.py index 4aa0504..960fda7 100644 --- a/omop_alchemy/maintenance/cli_schema_tables.py +++ b/omop_alchemy/maintenance/cli_schema_tables.py @@ -6,6 +6,7 @@ import sqlalchemy as sa +from ._cli_utils import ensure_schema from .tables import ( MaintenanceTable, TableCategory, @@ -62,6 +63,8 @@ def create_missing_tables( dry_run: bool = False, ) -> list[TableCreationResult]: """Create any ORM-managed tables missing from the target database. Skips tables with unresolved FK dependencies.""" + if not dry_run: + ensure_schema(engine, db_schema) inspector = sa.inspect(engine) missing_tables = collect_missing_tables( engine, diff --git a/omop_alchemy/maintenance/cli_vocab.py b/omop_alchemy/maintenance/cli_vocab.py index 6c9d85f..3a84a55 100644 --- a/omop_alchemy/maintenance/cli_vocab.py +++ b/omop_alchemy/maintenance/cli_vocab.py @@ -33,7 +33,7 @@ ) from ..backends import resolve_backend -from ._cli_utils import omop_command +from ._cli_utils import ensure_schema, omop_command from .cli_foreign_keys import manage_foreign_key_triggers from .cli_indexes import manage_indexes from .cli_tables import reset_model_sequences @@ -284,6 +284,9 @@ def load_vocab_source( + ", ".join(sorted(missing_required)) ) + if not dry_run: + ensure_schema(engine, db_schema) + # NullPool: each session/connection is opened fresh and closed immediately after # use. No stale pooled connections survive between tables, which prevents # "connection in recovery mode" failures on subsequent tables after a heavy load. From 8473af79d18006ea37f2c1521dd3dd50a0412a44 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 4 Jun 2026 05:36:29 +0000 Subject: [PATCH 25/25] Updated CFG to not reconfigure --- docs/getting-started/configuration.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index e3e1efd..3dddaeb 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -103,11 +103,15 @@ To configure a second CDM database (e.g. for production), use `--resource-name`: omop-config configure omop_alchemy --resource-name cdm_db_prod ``` -This creates `cdm_db_prod` without touching the existing `cdm_db`. Re-run without -`--resource-name` afterwards to select which one omop_alchemy uses by default: +This creates `cdm_db_prod` without touching the existing `cdm_db`. Because two +resources now exist, configure automatically prompts you to choose the default at +the end of the same run — no second invocation needed. -```bash -omop-config configure omop_alchemy # prompts for default_resource +To change the default later, set `default_resource` directly in `config.toml`: + +```toml +[tools.omop_alchemy] +default_resource = "cdm_db_prod" ``` See the [oa-configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/#multiple-environments) for the full multi-environment guide.