diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e222298 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +# Copy to .env and edit to override defaults. All vars below are optional; +# docker-compose uses the shown defaults when .env is absent. +OMOP_CDM_DB_USER=omop +OMOP_CDM_DB_PASSWORD=omop +OMOP_CDM_DB_NAME=omop_cdm diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..208b49a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,3 @@ +FROM python:3.12-slim +RUN pip install --no-cache-dir ".[postgres,emb,pgvector,faiss-cpu]" +WORKDIR /workspace diff --git a/README.md b/README.md index a6d16a3..428c27a 100644 --- a/README.md +++ b/README.md @@ -114,3 +114,41 @@ omop_graph/ ├── api.py # stable public API surface └── db/ # session helpers ``` + +--- + +## Configuration + +omop-graph reads database connection settings from +[oa-configurator](https://github.com/AustralianCancerDataNetwork/oa-configurator). +It requires the CDM database configured by omop-alchemy. + +Run once after installation: + +```bash +omop-config init +omop-config configure omop_alchemy +omop-config configure omop_graph +``` + +See [Configuration](docs/getting-started/configuration.md) for full details. + +--- + +## Docker Compose + +The included `docker-compose.yaml` provides a PostgreSQL CDM database and a Python +container with all optional backends pre-installed (`[postgres,emb,pgvector,faiss-cpu]`). +Default credentials work out of the box: + +```bash +docker compose up +``` + +The `python-graph` service runs `omop-config configure` at startup. To override +credentials: + +```bash +cp .env.example .env +docker compose up +``` diff --git a/docker-compose.yaml b/docker-compose.yaml index 9035ad9..ab13e57 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,23 +2,62 @@ services: omop-cdm-db: image: postgres:16-alpine restart: always - env_file: .env environment: - - POSTGRES_USER=${OMOP_CDM_DB_USER:-omop} - - POSTGRES_PASSWORD=${OMOP_CDM_DB_PASSWORD:-omop} - - POSTGRES_DB=${OMOP_CDM_DB_NAME:-omop} - - PGDATA=/var/lib/postgresql/data/pgdata + POSTGRES_USER: ${OMOP_CDM_DB_USER:-omop} + POSTGRES_PASSWORD: ${OMOP_CDM_DB_PASSWORD:-omop} + POSTGRES_DB: ${OMOP_CDM_DB_NAME:-omop_cdm} + PGDATA: /var/lib/postgresql/data/pgdata volumes: - db_data:/var/lib/postgresql/data + ports: + - "5432:5432" networks: - omop-net + command: > + postgres + -c shared_buffers=512MB + -c effective_cache_size=1GB + -c work_mem=128MB + -c maintenance_work_mem=512MB + -c max_wal_size=4GB + -c min_wal_size=512MB + -c wal_buffers=16MB + -c wal_compression=zstd + -c full_page_writes=off + -c checkpoint_timeout=30min + -c synchronous_commit=off + -c max_parallel_workers_per_gather=2 + -c max_worker_processes=4 + -c max_parallel_maintenance_workers=2 healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${OMOP_CDM_DB_USER:-omop} -d ${OMOP_CDM_DB_NAME:-omop}"] + test: ["CMD-SHELL", "pg_isready -U ${OMOP_CDM_DB_USER:-omop}"] interval: 5s timeout: 5s retries: 5 - ports: - - "5432:5432" + + python-graph: + build: . + restart: unless-stopped + depends_on: + omop-cdm-db: + condition: service_healthy + volumes: + - ${HOME}/.config/omop:/root/.config/omop + networks: + - omop-net + command: > + bash -c " + omop-config configure omop_alchemy + --database cdm --dialect postgresql+psycopg + --host omop-cdm-db --port 5432 + --user ${OMOP_CDM_DB_USER:-omop} + --password ${OMOP_CDM_DB_PASSWORD:-omop} + --database-name ${OMOP_CDM_DB_NAME:-omop_cdm} + --cdm-schema omop && + omop-config configure orm_loader && + omop-config configure omop_graph && + sleep infinity + " networks: omop-net: diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 0000000..6a1de22 --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,82 @@ +# Configuration + +omop-graph reads all database connection and schema settings from +[oa-configurator](https://github.com/AustralianCancerDataNetwork/oa-configurator). +No environment variables are needed for the Python package itself. + +## Quick start + +omop-graph requires the CDM database configured by omop-alchemy. If you have not +already done so, configure omop-alchemy first: + +```bash +omop-config init # creates ~/.config/omop/config.toml if absent +omop-config configure omop_alchemy +omop-config configure omop_graph +``` + +## What gets configured + +omop-graph does not own any database resources. It reads from the `cdm_db` resource +configured by omop-alchemy and stores any package-specific settings (traversal depth, +path limits) under `[tools.omop_graph]` in `config.toml`. + +## Verify + +```bash +omop-config verify +``` + +## Docker Compose + +The included `docker-compose.yaml` spins up a PostgreSQL CDM database and a +`python-graph` container. Default credentials work out of the box: + +```bash +docker compose up +``` + +The `python-graph` container runs `omop-config configure` for both `omop_alchemy` and +`omop_graph` at startup. Your `~/.config/omop/config.toml` on the host is written on +safe to re-run on subsequent starts: connection flags always apply, and any values already stored in `config.toml` are preserved for fields not explicitly provided. + +### Overriding default values + +The compose file uses built-in defaults for all database credentials. To use different +values, create a `.env` file in this directory with any of the following variables: + +| Variable | Default | Description | +|---|---|---| +| `OMOP_CDM_DB_USER` | `omop` | CDM database username | +| `OMOP_CDM_DB_PASSWORD` | `omop` | CDM database password | +| `OMOP_CDM_DB_NAME` | `omop_cdm` | CDM database name | + +Copy the example and edit as needed: + +```bash +cp .env.example .env +# edit .env +docker compose up +``` + +The `.env` file is only read by Docker Compose for variable substitution — it is not +loaded by omop-graph at runtime. + +## Multiple instances + +omop-graph reads from the `cdm_db` resource owned by omop-alchemy. To point +it at a second CDM database (e.g. for production), configure omop-alchemy with +a second resource: + +```bash +omop-config configure omop_alchemy --resource-name cdm_db_prod +``` + +Configure automatically prompts you to choose the default at the end of the same +run — no second invocation needed. + +See the [oa-configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/#multiple-environments) for the full multi-environment guide. + +## Further reading + +- [oa-configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/) — full config reference, profiles, multi-package setups diff --git a/docs/usage/cli.md b/docs/usage/cli.md index 1bf5d59..ead2de4 100644 --- a/docs/usage/cli.md +++ b/docs/usage/cli.md @@ -2,6 +2,17 @@ The OMOP CDM instantiation tool provides a streamlined way to bootstrap a local OHDSI Common Data Model (CDM) database using Athena vocabulary files and synthetic test data. +!!! note "Verbosity flag placement" + The `--verbose` / `-v` flag is a **global option** and must appear **before** the + subcommand name, not after it: + + ``` + omop-graph -v relationship-classification ... # ✓ correct + omop-graph relationship-classification -v ... # ✗ flag is ignored + ``` + + Use `-v` for INFO level and `-vv` for DEBUG level. + --- ## `omop-cdm` diff --git a/mkdocs.yml b/mkdocs.yml index 3caabec..9c7a39e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -31,6 +31,7 @@ nav: - Home: index.md - Getting Started: - Installation: usage/installation.md + - Configuration: getting-started/configuration.md - "CLI Reference": usage/cli.md - Testing: usage/testing.md - Core Components: diff --git a/notebooks/01_quickstart.ipynb b/notebooks/01_quickstart.ipynb index 7206a56..6e6f6c4 100644 --- a/notebooks/01_quickstart.ipynb +++ b/notebooks/01_quickstart.ipynb @@ -21,24 +21,20 @@ "metadata": {}, "outputs": [], "source": [ - "from sqlalchemy import create_engine\n", "from sqlalchemy.orm import sessionmaker\n", "from dotenv import load_dotenv\n", "\n", - "from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path\n", - "from omop_graph.graph.traverse import traverse\n", + "from omop_graph.graph.scoring import rank_paths, explain_path\n", "from omop_graph.graph.paths import find_shortest_paths\n", "from omop_graph.graph.kg import KnowledgeGraph\n", "from omop_graph.graph.edges import PredicateKind\n", "from omop_graph.render import (\n", - " render_subgraph,\n", " render_trace,\n", " render_path,\n", - " render_explained_path,\n", " bind_default_renderers,\n", ")\n", "from orm_loader.helpers import configure_logging\n", - "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n", + "from omop_alchemy import get_engine_name\n", "import sqlalchemy as sa\n" ] }, @@ -157,7 +153,7 @@ "source": [ "## Note on domain scope:\n", "\n", - "By default, traversal is restricted to relationships within the same OMOP domain (e.g. Drug -> Drug, Condition -> Condition). This avoids misleading “shortcut” paths through terminology metadata. \n", + "By default, traversal is restricted to relationships within the same OMOP domain (e.g. Drug -> Drug, Condition -> Condition). This avoids misleading \u201cshortcut\u201d paths through terminology metadata. \n", "\n", "Cross-domain reasoning (e.g. Drug -> Condition) requires explicit biomedical relationships and is intentionally out of scope for the default - it also frequently defaults to structural or metadata relationsips such as SNOMED 'has module' which becomes rapidly non-specific if all terms resolve in 1 or 2 steps to an extreme high level parent.\n" ] @@ -218,142 +214,142 @@ " max-width:420px;\n", " \">\n", "
\n", - " 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet ✅\n", + " 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet \u2705\n", "
\n", "
\n", " RxNorm:860975\n", - " · Drug\n", - " · Quant Clinical Drug\n", + " \u00b7 Drug\n", + " \u00b7 Quant Clinical Drug\n", "
\n", " \n", " \n", " \n", "
\n", - " └─ Standard to Non-standard map (OMOP)\n", + " \u2514\u2500 Standard to Non-standard map (OMOP)\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " … 920 more\n", + " \u2026 920 more\n", "
\n", " \n", "
\n", - " └─ RxNorm to SPL (NLM)\n", + " \u2514\u2500 RxNorm to SPL (NLM)\n", "
\n", " \n", "
\n", - " → METFORMIN ER 500 MG - metformin er 500 mg tablet\n", + " \u2192 METFORMIN ER 500 MG - metformin er 500 mg tablet\n", "
\n", " \n", "
\n", - " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", + " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", "
\n", " \n", "
\n", - " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", + " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", "
\n", " \n", "
\n", - " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", + " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", "
\n", " \n", "
\n", - " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", + " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n", "
\n", " \n", "
\n", - " … 237 more\n", + " \u2026 237 more\n", "
\n", " \n", "
\n", - " └─ Concept replaces\n", + " \u2514\u2500 Concept replaces\n", "
\n", " \n", "
\n", - " → Metformin 500 MG 24 Hour Extended Release Tablet\n", + " \u2192 Metformin 500 MG 24 Hour Extended Release Tablet\n", "
\n", " \n", "
\n", - " └─ Has dose form (RxNorm)\n", + " \u2514\u2500 Has dose form (RxNorm)\n", "
\n", " \n", "
\n", - " → Extended Release Oral Tablet\n", + " \u2192 Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " └─ RxNorm to SNOMED equivalent (RxNorm)\n", + " \u2514\u2500 RxNorm to SNOMED equivalent (RxNorm)\n", "
\n", " \n", "
\n", - " → Metformin hydrochloride 500 mg prolonged-release oral tablet\n", + " \u2192 Metformin hydrochloride 500 mg prolonged-release oral tablet\n", "
\n", " \n", "
\n", - " └─ Is a (RxNorm)\n", + " \u2514\u2500 Is a (RxNorm)\n", "
\n", " \n", "
\n", - " → metformin Oral Product\n", + " \u2192 metformin Oral Product\n", "
\n", " \n", "
\n", - " → metformin Pill\n", + " \u2192 metformin Pill\n", "
\n", " \n", "
\n", - " └─ Non-standard to Standard map (OMOP)\n", + " \u2514\u2500 Non-standard to Standard map (OMOP)\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " └─ Has tradename (RxNorm)\n", + " \u2514\u2500 Has tradename (RxNorm)\n", "
\n", " \n", "
\n", - " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet [Glucophage]\n", + " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet [Glucophage]\n", "
\n", " \n", "
\n", - " └─ Quantified form of (RxNorm)\n", + " \u2514\u2500 Quantified form of (RxNorm)\n", "
\n", " \n", "
\n", - " → metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + " \u2192 metformin hydrochloride 500 MG Extended Release Oral Tablet\n", "
\n", " \n", "
\n", - " └─ RxNorm to VA Product equivalent (NDF-RT)\n", + " \u2514\u2500 RxNorm to VA Product equivalent (NDF-RT)\n", "
\n", " \n", "
\n", - " → METFORMIN HCL 500MG TAB,SA\n", + " \u2192 METFORMIN HCL 500MG TAB,SA\n", "
\n", " \n", "
\n", - " → METFORMIN HCL 500MG 24HR TAB,SA\n", + " \u2192 METFORMIN HCL 500MG 24HR TAB,SA\n", "
\n", " \n", "
\n", @@ -370,202 +366,202 @@ " max-width:420px;\n", " \">\n", "
\n", - " metformin ✅\n", + " metformin \u2705\n", "
\n", "
\n", " RxNorm:6809\n", - " · Drug\n", - " · Ingredient\n", + " \u00b7 Drug\n", + " \u00b7 Ingredient\n", "
\n", "
\n", " \n", " \n", "
\n", - " └─ Brand name of (OMOP)\n", + " \u2514\u2500 Brand name of (OMOP)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " … 521 more\n", + " \u2026 521 more\n", "
\n", " \n", "
\n", - " └─ Has ingredient (RxNorm)\n", + " \u2514\u2500 Has ingredient (RxNorm)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " … 113 more\n", + " \u2026 113 more\n", "
\n", " \n", "
\n", - " └─ Non-standard to Standard map (OMOP)\n", + " \u2514\u2500 Non-standard to Standard map (OMOP)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " … 91 more\n", + " \u2026 91 more\n", "
\n", " \n", "
\n", - " └─ Drug Source to RxNorm equivalent (OMOP)\n", + " \u2514\u2500 Drug Source to RxNorm equivalent (OMOP)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " … 10 more\n", + " \u2026 10 more\n", "
\n", " \n", "
\n", - " └─ ATC to RxNorm/Extension primary lateral (OMOP)\n", + " \u2514\u2500 ATC to RxNorm/Extension primary lateral (OMOP)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " … 16 more\n", + " \u2026 16 more\n", "
\n", " \n", "
\n", - " └─ NDF-RT to RxNorm equivalent (RxNorm)\n", + " \u2514\u2500 NDF-RT to RxNorm equivalent (RxNorm)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " └─ SNOMED to RxNorm equivalent (RxNorm)\n", + " \u2514\u2500 SNOMED to RxNorm equivalent (RxNorm)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " └─ Standard to Non-standard map (OMOP)\n", + " \u2514\u2500 Standard to Non-standard map (OMOP)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " └─ Form of (RxNorm)\n", + " \u2514\u2500 Form of (RxNorm)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", " \n", "
\n", - " └─ ATC to RxNorm/Extension secondary upwards (OMOP)\n", + " \u2514\u2500 ATC to RxNorm/Extension secondary upwards (OMOP)\n", "
\n", " \n", "
\n", - " → metformin\n", + " \u2192 metformin\n", "
\n", "
[terminated: shortest_paths_found]
\n", " \n", @@ -717,4 +713,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/notebooks/02_reasoner.ipynb b/notebooks/02_reasoner.ipynb index 9751697..21b5248 100644 --- a/notebooks/02_reasoner.ipynb +++ b/notebooks/02_reasoner.ipynb @@ -29,27 +29,16 @@ "metadata": {}, "outputs": [], "source": [ - "from sqlalchemy import create_engine\n", "from sqlalchemy.orm import sessionmaker\n", "from dotenv import load_dotenv\n", "\n", - "from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path\n", - "from omop_graph.graph.traverse import traverse\n", - "from omop_graph.graph.paths import find_shortest_paths\n", "from omop_graph.graph.kg import KnowledgeGraph\n", - "from omop_graph.graph.edges import PredicateKind\n", "from omop_graph.render import (\n", - " render_subgraph,\n", - " render_trace,\n", - " render_path,\n", - " render_explained_path,\n", " bind_default_renderers,\n", ")\n", "from orm_loader.helpers import configure_logging\n", - "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n", - "import sqlalchemy as sa\n", - "from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym\n", - "import pandas as pd" + "from omop_alchemy import get_engine_name\n", + "import sqlalchemy as sa" ] }, { diff --git a/notebooks/03_phenotype_helper.ipynb b/notebooks/03_phenotype_helper.ipynb index e5e2dcb..67f913b 100644 --- a/notebooks/03_phenotype_helper.ipynb +++ b/notebooks/03_phenotype_helper.ipynb @@ -14,15 +14,11 @@ "from dataclasses import asdict\n", "from dotenv import load_dotenv\n", "from orm_loader.helpers import configure_logging\n", - "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n", - "from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym\n", + "from omop_alchemy import get_engine_name\n", + "from omop_alchemy.cdm.model.vocabulary import Concept\n", "\n", "from omop_graph.graph.kg import KnowledgeGraph\n", "from omop_graph.render import (\n", - " render_subgraph,\n", - " render_trace,\n", - " render_path,\n", - " render_explained_path,\n", " bind_default_renderers,\n", ")\n", "from omop_graph.reasoning.phenotypes import (\n", diff --git a/notebooks/04_mapping_support.ipynb b/notebooks/04_mapping_support.ipynb index e69de29..1543f24 100644 --- a/notebooks/04_mapping_support.ipynb +++ b/notebooks/04_mapping_support.ipynb @@ -0,0 +1,16 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.0" + } + }, + "cells": [] +} \ No newline at end of file diff --git a/notebooks/99_figures.ipynb b/notebooks/99_figures.ipynb index d389936..85911f0 100644 --- a/notebooks/99_figures.ipynb +++ b/notebooks/99_figures.ipynb @@ -29,27 +29,16 @@ "metadata": {}, "outputs": [], "source": [ - "from sqlalchemy import create_engine\n", "from sqlalchemy.orm import sessionmaker\n", "from dotenv import load_dotenv\n", "\n", - "from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path\n", - "from omop_graph.graph.traverse import traverse\n", - "from omop_graph.graph.paths import find_shortest_paths\n", "from omop_graph.graph.kg import KnowledgeGraph\n", - "from omop_graph.graph.edges import PredicateKind\n", "from omop_graph.render import (\n", - " render_subgraph,\n", - " render_trace,\n", - " render_path,\n", - " render_explained_path,\n", " bind_default_renderers,\n", ")\n", "from orm_loader.helpers import configure_logging\n", - "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n", - "import sqlalchemy as sa\n", - "from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym\n", - "import pandas as pd" + "from omop_alchemy import get_engine_name\n", + "import sqlalchemy as sa" ] }, { diff --git a/pyproject.toml b/pyproject.toml index fded00b..73c4074 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "typing-extensions>=4.15.0", "typer", "oaklib", + #"oa-configurator>=0.0.1", # uncomment once published to PyPI ] [project.optional-dependencies] @@ -74,6 +75,9 @@ dev = [ [project.scripts] omop-graph = "omop_graph.cli:app" +[project.entry-points."omop.config"] +omop_graph = "omop_graph.config:OmopGraphConfig" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py index f726928..fd7adba 100644 --- a/scripts/benchmarks/benchmark.py +++ b/scripts/benchmarks/benchmark.py @@ -32,7 +32,7 @@ EmbeddingRole ) from omop_emb.backends.index_config import index_config_from_index_type -from omop_graph.cli import configure_logging_level +from omop_graph.config import OmopGraphConfig from omop_graph.extensions.emb import get_embedding_writer_interface, MissingExtensionError from omop_graph.extensions.omop_alchemy import PredicateKind from omop_graph.graph.constraints import SearchConstraintConcept @@ -555,7 +555,7 @@ def run_benchmark( ] = None, verbosity: Annotated[int, typer.Option("--verbose", "-v", count=True, help="Increase verbosity (up to two levels)")] = 0, ): - configure_logging_level(verbosity) + OmopGraphConfig.configure_logging(verbosity=verbosity) cases = load_cases(Path(cases_file)) if allowed_domains: diff --git a/src/omop_graph/cli.py b/src/omop_graph/cli.py index 0bd0c48..3a77ced 100644 --- a/src/omop_graph/cli.py +++ b/src/omop_graph/cli.py @@ -1,81 +1,49 @@ -import sqlalchemy as sa -from sqlalchemy.orm import sessionmaker -from typing import Annotated, Optional -import pandas as pd +import logging +import tempfile from pathlib import Path +from typing import Annotated -from dotenv import load_dotenv +import pandas as pd +import sqlalchemy as sa import typer -import tempfile -import logging +from sqlalchemy.orm import sessionmaker from orm_loader.helpers import bulk_load_context -from orm_loader.loaders.loader_interface import PandasLoader from orm_loader.helpers.metadata import Base +from orm_loader.loaders.loader_interface import PandasLoader +from omop_graph.config import OmopGraphConfig +from omop_graph.db.session import get_engine from omop_graph.extensions.omop_alchemy import RelationshipClass, RelationshipMapping -from omop_graph.oaklib_interface.omop_factory import build_engine_string from .cli_utils.cli_add_test_data import populate_test_data app = typer.Typer() logger = logging.getLogger(__name__) -def configure_logging_level(verbosity: int, reduce_logging: bool = False) -> None: - """Configure global logging.""" - level_map = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG} - log_level = level_map.get(min(verbosity, 2), logging.DEBUG) - - logging.basicConfig( - level=log_level, - format="%(asctime)s | %(name)s | %(levelname)s | %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - force=True, - ) - - if reduce_logging: - exempt_loggers = ("omop_graph", "omop_emb") - - class _NamespaceAllowlistFilter(logging.Filter): - def filter(self, record: logging.LogRecord) -> bool: - return record.name.startswith(exempt_loggers) - - allowlist_filter = _NamespaceAllowlistFilter() - - root_logger = logging.getLogger() - for handler in root_logger.handlers: - handler.addFilter(allowlist_filter) - - existing_loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict] - for logger_instance in existing_loggers: - if logger_instance.name.startswith(exempt_loggers): - continue - logger_instance.setLevel(logging.CRITICAL + 1) - logger_instance.propagate = False +@app.callback() +def _main( + verbose: Annotated[ + int, + typer.Option("--verbose", "-v", count=True, help="Increase log verbosity (-v INFO, -vv DEBUG). Must come before the subcommand name."), + ] = 0, +) -> None: + OmopGraphConfig.configure_logging(verbosity=verbose) @app.command() def populate_with_test_data(): - """ - Method to populate the database withsynthetic test data - """ - engine_string = build_engine_string() - engine = sa.create_engine(engine_string, future=True, echo=False) + """Populate the database with synthetic test data.""" + engine = get_engine() Session = sessionmaker(bind=engine, future=True) populate_test_data(Session()) + @app.command() def relationship_classification( pred_class_dir: Annotated[str, typer.Option(help="Path to the directory containing `predicate_classification.csv` and `predicate_mapping.csv`.")], - env_file: Annotated[Optional[str], typer.Option("--env-file", "-e", help="Path to the .env file containing database connection variables. If not provided, will look for .env in the current working directory.")] = None, - verbosity: Annotated[int, typer.Option("--verbose", "-v", count=True, help="Increase verbosity (up to two levels)")] = 0, ): - """ - Method to get the pre-classified predicates into the database. - """ - configure_logging_level(verbosity) - load_dotenv(env_file) - + """Load pre-classified predicates into the database.""" pred_class_dir_pl = Path(pred_class_dir) if not pred_class_dir_pl.is_dir(): raise NotADirectoryError(f"{pred_class_dir} is not a valid directory.") @@ -89,24 +57,21 @@ def relationship_classification( df_class = pd.read_csv(pred_class_file) df_mapping = pd.read_csv(pred_mapping_file) - # 1. RelationshipClass df_rel_cls = df_class.rename(columns={"class": "predicate_kind", "subclass": "predicate_subkind"}) - # Only allow that a predicate_subkind maps exactly to one semantic and inference description check = df_rel_cls.groupby(["predicate_kind", "predicate_subkind"])[["description", "semantics", "inference"]].nunique(dropna=True) violations = check[(check > 1).any(axis=1)] - if not violations.empty: - conflicting_data = df_rel_cls[df_rel_cls["predicate_subkind"].isin(violations.index)].sort_values("predicate_subkind") - logger.error(f"Validation Failed! {len(violations)} predicate_subkinds have conflicting definitions: {conflicting_data}") + if not violations.empty: # type: ignore[union-attr] + conflicting_data = df_rel_cls[df_rel_cls["predicate_subkind"].isin(violations.index)].sort_values("predicate_subkind") # type: ignore[union-attr, arg-type, call-overload] + logger.error(f"Validation Failed! {len(violations)} predicate_subkinds have conflicting definitions: {conflicting_data}") raise AttributeError("Validation not passed") df_rel_cls_to_export = df_rel_cls.groupby(["predicate_kind", "predicate_subkind"], as_index=False).first() # 2. RelationshipMapping df_rel_mapping = df_mapping.rename(columns={"class": "predicate_kind", "subclass": "predicate_subkind", "r_id": "relationship_id"}) - # Same order as relationship_class.py - df_rel_mapping = df_rel_mapping[["relationship_id", "predicate_kind", "predicate_subkind"]].dropna(subset=['predicate_kind', 'predicate_subkind'], how='all') + df_rel_mapping = df_rel_mapping[["relationship_id", "predicate_kind", "predicate_subkind"]].dropna(subset=['predicate_kind', 'predicate_subkind'], how='all') # type: ignore[call-overload] invalid_mask = df_rel_mapping[['predicate_kind', 'predicate_subkind']].isna().any(axis=1) dropped_ids = df_rel_mapping.loc[invalid_mask, 'relationship_id'].unique().tolist() @@ -115,25 +80,22 @@ def relationship_classification( df_rel_mapping = df_rel_mapping.dropna(subset=['predicate_kind', 'predicate_subkind'], how='any') df_rel_mapping_to_export = df_rel_mapping.drop_duplicates(subset=["relationship_id", "predicate_kind", "predicate_subkind"]) - engine_string = build_engine_string() - engine = sa.create_engine(engine_string, future=True, echo=False) + engine = get_engine() Session = sessionmaker(bind=engine, future=True) session = Session() - # Drop the tables with engine.begin() as conn: conn.execute(sa.text(f"DROP TABLE IF EXISTS {RelationshipMapping.staging_tablename()} CASCADE")) # type: ignore conn.execute(sa.text(f"DROP TABLE IF EXISTS {RelationshipClass.staging_tablename()} CASCADE")) # type: ignore conn.execute(sa.text("DROP TYPE IF EXISTS predicatekindenum CASCADE;")) tables_to_drop = [ - RelationshipMapping.__table__, - RelationshipClass.__table__ + RelationshipMapping.__table__, + RelationshipClass.__table__, ] Base.metadata.drop_all(bind=engine, tables=tables_to_drop, checkfirst=True) # type: ignore Base.metadata.create_all(bind=engine, tables=tables_to_drop) # type: ignore - # Save to temporary files named after the table (required by load_csv) and reload from there with tempfile.TemporaryDirectory() as tmp_dir: for model, df in zip([RelationshipClass, RelationshipMapping], [df_rel_cls_to_export, df_rel_mapping_to_export]): csv_path = Path(tmp_dir) / f"{model.__tablename__}.csv" @@ -150,5 +112,6 @@ def relationship_classification( ) session.commit() + if __name__ == "__main__": - app() \ No newline at end of file + app() diff --git a/src/omop_graph/cli_utils/__init__.py b/src/omop_graph/cli_utils/__init__.py index 46056ea..8b12c2e 100644 --- a/src/omop_graph/cli_utils/__init__.py +++ b/src/omop_graph/cli_utils/__init__.py @@ -1 +1,3 @@ -from .cli_add_test_data import populate_test_data \ No newline at end of file +from .cli_add_test_data import populate_test_data + +__all__ = ["populate_test_data"] diff --git a/src/omop_graph/config.py b/src/omop_graph/config.py index 6d36bf2..4245b64 100644 --- a/src/omop_graph/config.py +++ b/src/omop_graph/config.py @@ -1,13 +1,35 @@ -"""General configuration for the omop graph, including envrionment variables.""" - -# DB connection for OMOP CDM database -ENV_OMOP_CDM_DB_URL = "OMOP_CDM_DB_URL" -ENV_OMOP_CDM_DB_USER = "OMOP_CDM_DB_USER" -ENV_OMOP_CDM_DB_PASSWORD = "OMOP_CDM_DB_PASSWORD" -ENV_OMOP_CDM_DB_HOST = "OMOP_CDM_DB_HOST" -ENV_OMOP_CDM_DB_PORT = "OMOP_CDM_DB_PORT" -ENV_OMOP_CDM_DB_NAME = "OMOP_CDM_DB_NAME" -ENV_OMOP_CDM_DB_DRIVER = "OMOP_CDM_DB_DRIVER" - -# Ingestion -ENV_OMOP_VOCABULARY_DIR = "OMOP_VOCABULARY_DIR" \ No newline at end of file +"""Configuration for omop-graph via oa-configurator.""" + +from __future__ import annotations + +from typing import ClassVar, Final + +from pydantic import Field +from oa_configurator import PackageConfigBase, ResourceSpec +from omop_alchemy.config import OmopAlchemyConfig + +TOOL_NAME: Final[str] = "omop_graph" + + +class OmopGraphConfig(PackageConfigBase): + """oa-configurator config class for omop-graph. + + omop-graph does not own any database resources. It requires the CDM + database configured by omop-alchemy. + """ + + tool_name: ClassVar[str] = TOOL_NAME + extra_logging_namespaces: ClassVar[tuple[str, ...]] = ("orm_loader", "omop_alchemy", "omop_emb") + required_resources: ClassVar[tuple[str, ...]] = (OmopAlchemyConfig.CDM_DB.semantic_name,) + owned_resources: ClassVar[tuple[ResourceSpec, ...]] = () + + max_depth: int = Field( + default=6, + description="Maximum graph traversal depth for pathfinding and grounding.", + ) + max_paths: int = Field( + default=20, + description="Maximum number of shortest paths returned per query.", + ) + + diff --git a/src/omop_graph/db/session.py b/src/omop_graph/db/session.py index 5e18c9a..bf504af 100644 --- a/src/omop_graph/db/session.py +++ b/src/omop_graph/db/session.py @@ -1,18 +1,19 @@ +"""SQLAlchemy engine helper for the OMOP CDM database.""" + from __future__ import annotations -import os + from typing import Optional, Union -from sqlalchemy import create_engine, URL, make_url + +from sqlalchemy import create_engine, URL from sqlalchemy.orm import sessionmaker, Session -from omop_graph.config import ( - ENV_OMOP_CDM_DB_DRIVER, - ENV_OMOP_CDM_DB_HOST, - ENV_OMOP_CDM_DB_NAME, - ENV_OMOP_CDM_DB_PASSWORD, - ENV_OMOP_CDM_DB_PORT, - ENV_OMOP_CDM_DB_URL, - ENV_OMOP_CDM_DB_USER -) +from oa_configurator import Resolver +from omop_alchemy.config import OmopAlchemyConfig + + +def get_engine(): + """Return a SQLAlchemy engine for the CDM database via oa-configurator.""" + return Resolver.from_active_config().resolve_resource(OmopAlchemyConfig.CDM_DB.semantic_name).create_engine() def make_engine( @@ -21,64 +22,21 @@ def make_engine( echo: bool = False, connect_timeout: int = 10, ): - url = url or build_engine_string() - if isinstance(url, str): - url = URL.create(url) + """Return a SQLAlchemy engine. + When url is omitted, reads connection details from the active oa-configurator + stack config. Pass url explicitly to override. + """ + if url is None: + return get_engine() + from sqlalchemy import make_url as _make_url + if isinstance(url, str): + url = _make_url(url) kwargs = {} if not url.drivername.startswith("sqlite"): kwargs["connect_args"] = {"connect_timeout": connect_timeout} - return create_engine(url, echo=echo, **kwargs) -def build_engine_string() -> "URL": - """Compose a SQLAlchemy ``URL`` for the given backend at runtime. - - Returns - ------- - sqlalchemy.URL - - Notes - ----- - If ``OMOP_CDM_DB_URL`` is set it is directly used to create the URL, and all other environment variables are ignored. - Otherwise, the following environment variables are read to compose the URL for a relational database backend: - - ``OMOP_CDM_DB_DRIVER`` (required): the SQLAlchemy driver name (e.g. 'postgresql', 'mysql', 'sqlite'). - - ``OMOP_CDM_DB_USER`` (required): the username for database authentication. - - ``OMOP_CDM_DB_PASSWORD`` (required): the password for database authentication. - - ``OMOP_CDM_DB_HOST`` (required): the hostname or IP address of the database server. - - ``OMOP_CDM_DB_NAME`` (required): the name of the database to connect to. - - ``OMOP_CDM_DB_PORT`` (optional, default 5432): the port number on which the database server is listening. - - Raises - ------ - RuntimeError - If a required environment variable is missing. - ValueError - If ``backend`` does not support URL composition from environment - variables (e.g. ``FAISS``). - """ - - - optional_url = os.getenv(ENV_OMOP_CDM_DB_URL) - if optional_url: - return make_url(optional_url) - - driver = _get_required_env_variable(ENV_OMOP_CDM_DB_DRIVER) - user = _get_required_env_variable(ENV_OMOP_CDM_DB_USER) - password = _get_required_env_variable(ENV_OMOP_CDM_DB_PASSWORD) - host = _get_required_env_variable(ENV_OMOP_CDM_DB_HOST) - database = _get_required_env_variable(ENV_OMOP_CDM_DB_NAME) - port_str = os.getenv(ENV_OMOP_CDM_DB_PORT, "5432") - port = int(port_str) if port_str else None - return URL.create( - drivername=driver, - username=user, - password=password, - host=host, - port=port, - database=database, - ) - def make_session( url: str, @@ -88,27 +46,3 @@ def make_session( engine = make_engine(url, echo=echo) SessionLocal = sessionmaker(bind=engine) return SessionLocal() - - -def _get_required_env_variable(name: str) -> str: - """Get the value of a required environment variable. - - Parameters - ---------- - name : str - Environment variable name. - - Returns - ------- - str - Environment variable value. - - Raises - ------ - RuntimeError - If the environment variable is not set. - """ - value = os.getenv(name) - if value is None: - raise RuntimeError(f"Required environment variable {name!r} is not set.") - return value \ No newline at end of file diff --git a/src/omop_graph/extensions/emb.py b/src/omop_graph/extensions/emb.py index c7f8090..822085d 100644 --- a/src/omop_graph/extensions/emb.py +++ b/src/omop_graph/extensions/emb.py @@ -151,12 +151,17 @@ def semantic_similarity( if embedding_writer is not None: missing_concept_ids = tuple(missing_sc_embeddings.keys()) - missing_concept_texts = tuple(missing_sc_embeddings.values()) + missing_concept_texts = tuple(row.concept_name for row in missing_sc_embeddings.values()) + + from omop_emb.utils.cdm import fetch_cdm_concepts_for_filter + from omop_emb.utils.embedding_utils import EmbeddingConceptFilter as _ECF + missing_filter = _ECF(concept_ids=missing_concept_ids, limit=len(missing_concept_ids)) + concept_meta = fetch_cdm_concepts_for_filter(missing_filter, cdm_engine=kg.cdm_engine) embedding_writer.embed_and_upsert_concepts( - omop_cdm_engine=kg.cdm_engine, concept_ids=missing_concept_ids, concept_texts=missing_concept_texts, + concept_meta=concept_meta, ) logger.debug(f"Computed and stored embeddings for missing concepts: {missing_concept_ids}") else: diff --git a/src/omop_graph/graph/kg.py b/src/omop_graph/graph/kg.py index fe7f900..e7209dc 100644 --- a/src/omop_graph/graph/kg.py +++ b/src/omop_graph/graph/kg.py @@ -17,14 +17,13 @@ import functools import logging import re -import os from datetime import date from typing import Dict, Optional, Tuple, Literal, Generator, TYPE_CHECKING from dataclasses import dataclass from sqlalchemy import Engine from sqlalchemy.orm import Session, sessionmaker -from omop_alchemy.cdm.handlers.fulltext import FullTextError +from omop_alchemy.backends import FullTextError if TYPE_CHECKING: from omop_emb import EmbeddingWriterInterface, EmbeddingReaderInterface, EmbeddingClient @@ -161,15 +160,13 @@ def emb(self) -> "EmbeddingWriterInterface | EmbeddingReaderInterface": try: from omop_emb.interface import EmbeddingWriterInterface, EmbeddingReaderInterface - from omop_emb.config import ENV_OMOP_EMB_BACKEND + from omop_emb.config import OmopEmbConfig from omop_emb.backends.base_backend import resolve_backend if self._emb_config is None: raise ValueError("Embedding configuration is not set. Please provide an EmbeddingConfiguration when initializing the KnowledgeGraph to use embedding features.") - - backend_type = self._emb_config.backend_type or os.getenv(ENV_OMOP_EMB_BACKEND, None) - if backend_type is None: - raise ValueError(f"Embedding backend type must be specified either in the configuration or via the {ENV_OMOP_EMB_BACKEND} environment variable.") + + backend_type = self._emb_config.backend_type or OmopEmbConfig.get_config().backend backend = resolve_backend(backend_type) diff --git a/src/omop_graph/graph/paths.py b/src/omop_graph/graph/paths.py index 20a472d..c795aff 100644 --- a/src/omop_graph/graph/paths.py +++ b/src/omop_graph/graph/paths.py @@ -29,6 +29,7 @@ ) # Local Application Imports +from omop_graph.config import OmopGraphConfig from omop_graph.extensions.omop_alchemy import PredicateKind from omop_graph.graph.edges import EdgeView from omop_graph.graph.traverse import GraphTrace, TraceStep @@ -220,9 +221,9 @@ def find_shortest_paths( source: int, target: int, predicate_kinds: Optional[frozenset[PredicateKind]] = None, - max_depth: int = 6, + max_depth: Optional[int] = None, on: Optional[Any] = None, - max_paths: int = 20, + max_paths: Optional[int] = None, traced: bool = False, within_domain: bool = True, ) -> Tuple[List[GraphPath], Optional[GraphTrace]]: @@ -257,6 +258,12 @@ def find_shortest_paths( tuple[list[GraphPath], GraphTrace | None] A list of paths and optionally the trace object. """ + cfg = OmopGraphConfig.get_config() + if max_depth is None: + max_depth = cfg.max_depth + if max_paths is None: + max_paths = cfg.max_paths + if source == target: path = GraphPath(steps=()) trace = ( @@ -422,9 +429,9 @@ def find_shortest_paths_batch( source: int, target: int, predicate_kinds: Union[Set[PredicateKind], frozenset[PredicateKind], None] = None, - max_depth: int = 6, + max_depth: Optional[int] = None, on: Optional[Any] = None, - max_paths: int = 20, + max_paths: Optional[int] = None, within_domain: bool = True, ) -> List[GraphPath]: """ @@ -462,6 +469,12 @@ def find_shortest_paths_batch( if source == target: return [GraphPath(steps=())] + cfg = OmopGraphConfig.get_config() + if max_depth is None: + max_depth = cfg.max_depth + if max_paths is None: + max_paths = cfg.max_paths + # Frontiers: The set of nodes we are currently expanding fwd_frontier = {source} bwd_frontier = {target} @@ -641,7 +654,7 @@ def find_standard_paths( target: int, candidate: CandidateHit, predicate_kinds: Optional[frozenset[Any]] = None, - max_depth: int = 6, + max_depth: Optional[int] = None, max_concepts: Optional[int] = None, within_domain: bool = True, *args, @@ -683,6 +696,9 @@ def find_standard_paths( list[StandardConcept] The resolved standard concepts that satisfy the ancestor constraint. """ + if max_depth is None: + max_depth = OmopGraphConfig.get_config().max_depth + source_view = kg.concept_view(candidate.concept_id) source_is_std = source_view.standard_concept if source_view else False diff --git a/src/omop_graph/graph/queries.py b/src/omop_graph/graph/queries.py index 04fc789..c76b2a7 100644 --- a/src/omop_graph/graph/queries.py +++ b/src/omop_graph/graph/queries.py @@ -21,7 +21,7 @@ from sqlalchemy.orm import aliased from sqlalchemy.sql import Select -from omop_alchemy.cdm.handlers.fulltext import ( +from omop_alchemy.backends import ( CONCEPT_NAME_TSVECTOR_COLUMN, CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, FullTextError diff --git a/src/omop_graph/oaklib_interface/omop_factory.py b/src/omop_graph/oaklib_interface/omop_factory.py index d52d6e0..db1ca7a 100644 --- a/src/omop_graph/oaklib_interface/omop_factory.py +++ b/src/omop_graph/oaklib_interface/omop_factory.py @@ -1,83 +1,14 @@ -import os +"""Factory for creating OAK OMOP ontology resources.""" + +from __future__ import annotations + from typing import Optional, Union from sqlalchemy.engine import URL from .omop_resource import OMOPOntologyResource -from omop_graph.config import ( - ENV_OMOP_CDM_DB_URL, - ENV_OMOP_CDM_DB_HOST, - ENV_OMOP_CDM_DB_NAME, - ENV_OMOP_CDM_DB_PASSWORD, - ENV_OMOP_CDM_DB_PORT, - ENV_OMOP_CDM_DB_USER, - ENV_OMOP_CDM_DB_DRIVER, -) - - -def build_engine_string() -> URL: - """Compose a SQLAlchemy ``URL`` for the OMOP CDM database from environment variables. - - Returns - ------- - sqlalchemy.URL - - Notes - ----- - If ``OMOP_CDM_DB_URL`` is set it is used as-is for any backend, allowing - callers to supply a fully-qualified connection string without setting the - individual component variables. - - Raises - ------ - RuntimeError - If a required environment variable is missing. - """ - from sqlalchemy import URL - from sqlalchemy.engine import make_url - - optional_url = os.getenv(ENV_OMOP_CDM_DB_URL) - if optional_url: - return make_url(optional_url) - - # Required variables for composing the URL - driver = _get_required_env_variable(ENV_OMOP_CDM_DB_DRIVER) - user = _get_required_env_variable(ENV_OMOP_CDM_DB_USER) - password = _get_required_env_variable(ENV_OMOP_CDM_DB_PASSWORD) - host = _get_required_env_variable(ENV_OMOP_CDM_DB_HOST) - database = _get_required_env_variable(ENV_OMOP_CDM_DB_NAME) - port = int(_get_required_env_variable(ENV_OMOP_CDM_DB_PORT)) - return URL.create( - drivername=driver, - username=user, - password=password, - host=host, - port=port, - database=database, - ) - -def _get_required_env_variable(name: str) -> str: - """Get the value of a required environment variable. - - Parameters - ---------- - name : str - Environment variable name. - - Returns - ------- - str - Environment variable value. - - Raises - ------ - RuntimeError - If the environment variable is not set. - """ - value = os.getenv(name) - if value is None: - raise RuntimeError(f"Required environment variable {name!r} is not set.") - return value +from oa_configurator import Resolver +from omop_alchemy.config import OmopAlchemyConfig def omop_resource( @@ -85,41 +16,28 @@ def omop_resource( url: Optional[Union[str, URL]] = None, slug: Optional[str] = "omop", ) -> OMOPOntologyResource: - """ - Create an OMOP DatabaseOntologyResource. + """Create an OMOP DatabaseOntologyResource. - This factory function resolves the database connection string by prioritizing - an explicit URL argument. If no URL is provided, it attempts to read from - the specified environment variable. + When url is omitted, reads connection details from the active oa-configurator + stack config. Pass url explicitly to override. Parameters ---------- url : str | URL, optional - The explicit database connection URL (highest priority). - env_var : str, optional - The name of the environment variable to check if `url` is None. - Defaults to 'OMOP_CDM_DB_URL'. + Explicit database connection URL. When omitted the active oa-configurator + config is used. slug : str, optional - A slug identifier for the resource. Defaults to 'omop'. + Slug identifier for the resource. Defaults to 'omop'. Returns ------- OMOPOntologyResource - The configured resource object. - - Raises - ------ - ValueError - If neither `url` is provided nor the `env_var` is set. """ - resolved = url or build_engine_string() - - if not resolved: - raise ValueError( - f"No database URL provided and required environment variables not set" - ) + if url is None: + resource = Resolver.from_active_config().resolve_resource(OmopAlchemyConfig.CDM_DB.semantic_name) + url = resource.database.url return OMOPOntologyResource( slug=slug, - url=resolved, - ) \ No newline at end of file + url=url, + ) diff --git a/src/omop_graph/oaklib_interface/omop_implementation.py b/src/omop_graph/oaklib_interface/omop_implementation.py index 0987880..7007300 100644 --- a/src/omop_graph/oaklib_interface/omop_implementation.py +++ b/src/omop_graph/oaklib_interface/omop_implementation.py @@ -4,7 +4,7 @@ from typing import Dict, Iterable, Iterator, List, Optional, Tuple import numpy as np -from dotenv import load_dotenv + from linkml_runtime.linkml_model.annotations import Annotation from oaklib.datamodels.search import ( SearchConfiguration, @@ -324,7 +324,6 @@ def split_annotations(ann): vocabularies=vocabs, require_standard=parent_ids is None, ), - max_depth=6, predicate_kinds=frozenset([PredicateKind.IDENTITY]), ) @@ -874,7 +873,6 @@ def __init__( self.engine_string = engine_string self.resource = resource or omop_resource(url=self.engine_string) else: - load_dotenv() self.resource = resource or omop_resource() self.engine_string = self.resource.url diff --git a/src/omop_graph/reasoning/grounding.py b/src/omop_graph/reasoning/grounding.py index b1618c0..7d983d0 100644 --- a/src/omop_graph/reasoning/grounding.py +++ b/src/omop_graph/reasoning/grounding.py @@ -14,11 +14,12 @@ from __future__ import annotations import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional, Tuple import numpy as np +from omop_graph.config import OmopGraphConfig from omop_graph.extensions.omop_alchemy import PredicateKind from omop_graph.graph.constraints import SearchConstraintConcept from omop_graph.graph.kg import KnowledgeGraph @@ -59,8 +60,8 @@ class GroundingConstraints: parent_ids: Optional[Tuple[int, ...]] search_constraint: Optional[SearchConstraintConcept] - max_depth: int = 6 - predicate_kinds: frozenset[PredicateKind] = frozenset({PredicateKind.IDENTITY,}) + max_depth: int = field(default_factory=lambda: OmopGraphConfig.get_config().max_depth) + predicate_kinds: frozenset[PredicateKind] = frozenset({PredicateKind.IDENTITY}) def ground_term( diff --git a/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py b/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py index dd9da2b..e46fabc 100644 --- a/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py +++ b/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass, field, asdict +from dataclasses import dataclass, field from collections import defaultdict from typing import Dict, Set, List from omop_graph.graph.kg import KnowledgeGraph diff --git a/src/omop_graph/reasoning/resolvers/__init__.py b/src/omop_graph/reasoning/resolvers/__init__.py index 20997dc..9359f30 100644 --- a/src/omop_graph/reasoning/resolvers/__init__.py +++ b/src/omop_graph/reasoning/resolvers/__init__.py @@ -1,2 +1,11 @@ from .resolvers import CandidateHit, ExactLabelResolver, ExactSynonymResolver, PartialLabelResolver, CandidateResolver -from .resolver_pipeline import ResolverPipeline \ No newline at end of file +from .resolver_pipeline import ResolverPipeline + +__all__ = [ + "CandidateHit", + "ExactLabelResolver", + "ExactSynonymResolver", + "PartialLabelResolver", + "CandidateResolver", + "ResolverPipeline", +] diff --git a/src/omop_graph/render/auto.py b/src/omop_graph/render/auto.py index a21aa79..4219acb 100644 --- a/src/omop_graph/render/auto.py +++ b/src/omop_graph/render/auto.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Literal, Optional +from typing import Literal from omop_graph.graph.paths import GraphPath, PathExplanation from omop_graph.graph.traverse import Subgraph, GraphTrace @@ -28,7 +28,7 @@ def _in_notebook() -> bool: try: - from IPython.core.getipython import get_ipython + from IPython.core.getipython import get_ipython # type: ignore[import-unresolved] ip = get_ipython() return ip is not None and "IPKernelApp" in ip.config except Exception: diff --git a/src/omop_graph/render/text.py b/src/omop_graph/render/text.py index d008b16..ca8f138 100644 --- a/src/omop_graph/render/text.py +++ b/src/omop_graph/render/text.py @@ -6,7 +6,7 @@ def subgraph_text(kg, sg: Subgraph) -> str: lines = [ - f"Subgraph:", + "Subgraph:", f" Nodes: {len(sg.nodes)}", f" Edges: {len(sg.edges)}", "", diff --git a/src/omop_graph/utils/text_utils.py b/src/omop_graph/utils/text_utils.py index 848f550..0ff6be1 100644 --- a/src/omop_graph/utils/text_utils.py +++ b/src/omop_graph/utils/text_utils.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Protocol, Iterable, Tuple, TYPE_CHECKING +from typing import Protocol, Iterable, Tuple try: from cava_nlp import CaVaLang # type: ignore diff --git a/tests/test_embedding_optional.py b/tests/test_embedding_optional.py index a9a4b9f..658f65d 100644 --- a/tests/test_embedding_optional.py +++ b/tests/test_embedding_optional.py @@ -3,15 +3,12 @@ from __future__ import annotations import builtins -import contextlib import logging -from types import SimpleNamespace from typing import cast from unittest.mock import Mock import numpy as np import pytest -from sqlalchemy import Engine from omop_graph.extensions import emb as emb_ext from omop_graph.extensions.emb import MissingExtensionError diff --git a/tests/test_fulltext_optional.py b/tests/test_fulltext_optional.py index e70b730..0dcd2cf 100644 --- a/tests/test_fulltext_optional.py +++ b/tests/test_fulltext_optional.py @@ -1,29 +1,17 @@ import pytest from sqlalchemy import Engine -from omop_alchemy.cdm.handlers.fulltext import ( - CONCEPT_NAME_TSVECTOR_COLUMN, - CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN, - register_optional_fulltext_columns, - unregister_optional_fulltext_columns, -) -from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Synonym -from omop_alchemy.cdm.handlers.fulltext import FullTextError - +from omop_alchemy.backends import FullTextError from omop_graph.graph.queries import q_concept_name_fulltext @pytest.mark.parametrize("synonym", [False, True]) -def test_fulltext_query_requires_registered_tsvector_columns(synonym: bool, mock_cdm_engine: Engine): - """Full-text queries fail cleanly when optional tsvector metadata is absent.""" - had_name_column = CONCEPT_NAME_TSVECTOR_COLUMN in Concept.__table__.c - had_synonym_column = CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN in Concept_Synonym.__table__.c +def test_fulltext_query_requires_tsvector_columns(synonym: bool, mock_cdm_engine: Engine): + """Full-text query raises FullTextError when tsvector columns are absent from the database. - unregister_optional_fulltext_columns() - try: - with pytest.raises(FullTextError): - q_concept_name_fulltext("kidney cancer", synonym=synonym, engine=mock_cdm_engine) - finally: - if had_name_column or had_synonym_column: - register_optional_fulltext_columns() \ No newline at end of file + The mock CDM engine is SQLite and never has tsvector columns, so the guard in + q_concept_name_fulltext (which inspects the live DB schema) always fires here. + """ + with pytest.raises(FullTextError): + q_concept_name_fulltext("kidney cancer", synonym=synonym, engine=mock_cdm_engine)