diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..e222298
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,5 @@
+# Copy to .env and edit to override defaults. All vars below are optional;
+# docker-compose uses the shown defaults when .env is absent.
+OMOP_CDM_DB_USER=omop
+OMOP_CDM_DB_PASSWORD=omop
+OMOP_CDM_DB_NAME=omop_cdm
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..208b49a
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,3 @@
+FROM python:3.12-slim
+RUN pip install --no-cache-dir ".[postgres,emb,pgvector,faiss-cpu]"
+WORKDIR /workspace
diff --git a/README.md b/README.md
index a6d16a3..428c27a 100644
--- a/README.md
+++ b/README.md
@@ -114,3 +114,41 @@ omop_graph/
├── api.py # stable public API surface
└── db/ # session helpers
```
+
+---
+
+## Configuration
+
+omop-graph reads database connection settings from
+[oa-configurator](https://github.com/AustralianCancerDataNetwork/oa-configurator).
+It requires the CDM database configured by omop-alchemy.
+
+Run once after installation:
+
+```bash
+omop-config init
+omop-config configure omop_alchemy
+omop-config configure omop_graph
+```
+
+See [Configuration](docs/getting-started/configuration.md) for full details.
+
+---
+
+## Docker Compose
+
+The included `docker-compose.yaml` provides a PostgreSQL CDM database and a Python
+container with all optional backends pre-installed (`[postgres,emb,pgvector,faiss-cpu]`).
+Default credentials work out of the box:
+
+```bash
+docker compose up
+```
+
+The `python-graph` service runs `omop-config configure` at startup. To override
+credentials:
+
+```bash
+cp .env.example .env
+docker compose up
+```
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 9035ad9..ab13e57 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -2,23 +2,62 @@ services:
omop-cdm-db:
image: postgres:16-alpine
restart: always
- env_file: .env
environment:
- - POSTGRES_USER=${OMOP_CDM_DB_USER:-omop}
- - POSTGRES_PASSWORD=${OMOP_CDM_DB_PASSWORD:-omop}
- - POSTGRES_DB=${OMOP_CDM_DB_NAME:-omop}
- - PGDATA=/var/lib/postgresql/data/pgdata
+ POSTGRES_USER: ${OMOP_CDM_DB_USER:-omop}
+ POSTGRES_PASSWORD: ${OMOP_CDM_DB_PASSWORD:-omop}
+ POSTGRES_DB: ${OMOP_CDM_DB_NAME:-omop_cdm}
+ PGDATA: /var/lib/postgresql/data/pgdata
volumes:
- db_data:/var/lib/postgresql/data
+ ports:
+ - "5432:5432"
networks:
- omop-net
+ command: >
+ postgres
+ -c shared_buffers=512MB
+ -c effective_cache_size=1GB
+ -c work_mem=128MB
+ -c maintenance_work_mem=512MB
+ -c max_wal_size=4GB
+ -c min_wal_size=512MB
+ -c wal_buffers=16MB
+ -c wal_compression=zstd
+ -c full_page_writes=off
+ -c checkpoint_timeout=30min
+ -c synchronous_commit=off
+ -c max_parallel_workers_per_gather=2
+ -c max_worker_processes=4
+ -c max_parallel_maintenance_workers=2
healthcheck:
- test: ["CMD-SHELL", "pg_isready -U ${OMOP_CDM_DB_USER:-omop} -d ${OMOP_CDM_DB_NAME:-omop}"]
+ test: ["CMD-SHELL", "pg_isready -U ${OMOP_CDM_DB_USER:-omop}"]
interval: 5s
timeout: 5s
retries: 5
- ports:
- - "5432:5432"
+
+ python-graph:
+ build: .
+ restart: unless-stopped
+ depends_on:
+ omop-cdm-db:
+ condition: service_healthy
+ volumes:
+ - ${HOME}/.config/omop:/root/.config/omop
+ networks:
+ - omop-net
+ command: >
+ bash -c "
+ omop-config configure omop_alchemy
+ --database cdm --dialect postgresql+psycopg
+ --host omop-cdm-db --port 5432
+ --user ${OMOP_CDM_DB_USER:-omop}
+ --password ${OMOP_CDM_DB_PASSWORD:-omop}
+ --database-name ${OMOP_CDM_DB_NAME:-omop_cdm}
+ --cdm-schema omop &&
+ omop-config configure orm_loader &&
+ omop-config configure omop_graph &&
+ sleep infinity
+ "
networks:
omop-net:
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
new file mode 100644
index 0000000..6a1de22
--- /dev/null
+++ b/docs/getting-started/configuration.md
@@ -0,0 +1,82 @@
+# Configuration
+
+omop-graph reads all database connection and schema settings from
+[oa-configurator](https://github.com/AustralianCancerDataNetwork/oa-configurator).
+No environment variables are needed for the Python package itself.
+
+## Quick start
+
+omop-graph requires the CDM database configured by omop-alchemy. If you have not
+already done so, configure omop-alchemy first:
+
+```bash
+omop-config init # creates ~/.config/omop/config.toml if absent
+omop-config configure omop_alchemy
+omop-config configure omop_graph
+```
+
+## What gets configured
+
+omop-graph does not own any database resources. It reads from the `cdm_db` resource
+configured by omop-alchemy and stores any package-specific settings (traversal depth,
+path limits) under `[tools.omop_graph]` in `config.toml`.
+
+## Verify
+
+```bash
+omop-config verify
+```
+
+## Docker Compose
+
+The included `docker-compose.yaml` spins up a PostgreSQL CDM database and a
+`python-graph` container. Default credentials work out of the box:
+
+```bash
+docker compose up
+```
+
+The `python-graph` container runs `omop-config configure` for both `omop_alchemy` and
+`omop_graph` at startup. Your `~/.config/omop/config.toml` on the host is written on
+safe to re-run on subsequent starts: connection flags always apply, and any values already stored in `config.toml` are preserved for fields not explicitly provided.
+
+### Overriding default values
+
+The compose file uses built-in defaults for all database credentials. To use different
+values, create a `.env` file in this directory with any of the following variables:
+
+| Variable | Default | Description |
+|---|---|---|
+| `OMOP_CDM_DB_USER` | `omop` | CDM database username |
+| `OMOP_CDM_DB_PASSWORD` | `omop` | CDM database password |
+| `OMOP_CDM_DB_NAME` | `omop_cdm` | CDM database name |
+
+Copy the example and edit as needed:
+
+```bash
+cp .env.example .env
+# edit .env
+docker compose up
+```
+
+The `.env` file is only read by Docker Compose for variable substitution — it is not
+loaded by omop-graph at runtime.
+
+## Multiple instances
+
+omop-graph reads from the `cdm_db` resource owned by omop-alchemy. To point
+it at a second CDM database (e.g. for production), configure omop-alchemy with
+a second resource:
+
+```bash
+omop-config configure omop_alchemy --resource-name cdm_db_prod
+```
+
+Configure automatically prompts you to choose the default at the end of the same
+run — no second invocation needed.
+
+See the [oa-configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/#multiple-environments) for the full multi-environment guide.
+
+## Further reading
+
+- [oa-configurator integration guide](https://AustralianCancerDataNetwork.github.io/oa-configurator/integration/) — full config reference, profiles, multi-package setups
diff --git a/docs/usage/cli.md b/docs/usage/cli.md
index 1bf5d59..ead2de4 100644
--- a/docs/usage/cli.md
+++ b/docs/usage/cli.md
@@ -2,6 +2,17 @@
The OMOP CDM instantiation tool provides a streamlined way to bootstrap a local OHDSI Common Data Model (CDM) database using Athena vocabulary files and synthetic test data.
+!!! note "Verbosity flag placement"
+ The `--verbose` / `-v` flag is a **global option** and must appear **before** the
+ subcommand name, not after it:
+
+ ```
+ omop-graph -v relationship-classification ... # ✓ correct
+ omop-graph relationship-classification -v ... # ✗ flag is ignored
+ ```
+
+ Use `-v` for INFO level and `-vv` for DEBUG level.
+
---
## `omop-cdm`
diff --git a/mkdocs.yml b/mkdocs.yml
index 3caabec..9c7a39e 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -31,6 +31,7 @@ nav:
- Home: index.md
- Getting Started:
- Installation: usage/installation.md
+ - Configuration: getting-started/configuration.md
- "CLI Reference": usage/cli.md
- Testing: usage/testing.md
- Core Components:
diff --git a/notebooks/01_quickstart.ipynb b/notebooks/01_quickstart.ipynb
index 7206a56..6e6f6c4 100644
--- a/notebooks/01_quickstart.ipynb
+++ b/notebooks/01_quickstart.ipynb
@@ -21,24 +21,20 @@
"metadata": {},
"outputs": [],
"source": [
- "from sqlalchemy import create_engine\n",
"from sqlalchemy.orm import sessionmaker\n",
"from dotenv import load_dotenv\n",
"\n",
- "from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path\n",
- "from omop_graph.graph.traverse import traverse\n",
+ "from omop_graph.graph.scoring import rank_paths, explain_path\n",
"from omop_graph.graph.paths import find_shortest_paths\n",
"from omop_graph.graph.kg import KnowledgeGraph\n",
"from omop_graph.graph.edges import PredicateKind\n",
"from omop_graph.render import (\n",
- " render_subgraph,\n",
" render_trace,\n",
" render_path,\n",
- " render_explained_path,\n",
" bind_default_renderers,\n",
")\n",
"from orm_loader.helpers import configure_logging\n",
- "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n",
+ "from omop_alchemy import get_engine_name\n",
"import sqlalchemy as sa\n"
]
},
@@ -157,7 +153,7 @@
"source": [
"## Note on domain scope:\n",
"\n",
- "By default, traversal is restricted to relationships within the same OMOP domain (e.g. Drug -> Drug, Condition -> Condition). This avoids misleading “shortcut” paths through terminology metadata. \n",
+ "By default, traversal is restricted to relationships within the same OMOP domain (e.g. Drug -> Drug, Condition -> Condition). This avoids misleading \u201cshortcut\u201d paths through terminology metadata. \n",
"\n",
"Cross-domain reasoning (e.g. Drug -> Condition) requires explicit biomedical relationships and is intentionally out of scope for the default - it also frequently defaults to structural or metadata relationsips such as SNOMED 'has module' which becomes rapidly non-specific if all terms resolve in 1 or 2 steps to an extreme high level parent.\n"
]
@@ -218,142 +214,142 @@
" max-width:420px;\n",
" \">\n",
"
\n",
- " 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet ✅\n",
+ " 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet \u2705\n",
"
\n",
" \n",
" RxNorm:860975\n",
- " · Drug\n",
- " · Quant Clinical Drug\n",
+ " \u00b7 Drug\n",
+ " \u00b7 Quant Clinical Drug\n",
"
\n",
" \n",
" \n",
" \n",
" \n",
- " └─ Standard to Non-standard map (OMOP)\n",
+ " \u2514\u2500 Standard to Non-standard map (OMOP)\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " … 920 more\n",
+ " \u2026 920 more\n",
"
\n",
" \n",
" \n",
- " └─ RxNorm to SPL (NLM)\n",
+ " \u2514\u2500 RxNorm to SPL (NLM)\n",
"
\n",
" \n",
" \n",
- " → METFORMIN ER 500 MG - metformin er 500 mg tablet\n",
+ " \u2192 METFORMIN ER 500 MG - metformin er 500 mg tablet\n",
"
\n",
" \n",
" \n",
- " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
+ " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
"
\n",
" \n",
" \n",
- " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
+ " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
"
\n",
" \n",
" \n",
- " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
+ " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
"
\n",
" \n",
" \n",
- " → METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
+ " \u2192 METFORMIN HYDROCHLORIDE - metformin hydrochloride tablet, extended release\n",
"
\n",
" \n",
" \n",
- " … 237 more\n",
+ " \u2026 237 more\n",
"
\n",
" \n",
" \n",
- " └─ Concept replaces\n",
+ " \u2514\u2500 Concept replaces\n",
"
\n",
" \n",
" \n",
- " → Metformin 500 MG 24 Hour Extended Release Tablet\n",
+ " \u2192 Metformin 500 MG 24 Hour Extended Release Tablet\n",
"
\n",
" \n",
" \n",
- " └─ Has dose form (RxNorm)\n",
+ " \u2514\u2500 Has dose form (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → Extended Release Oral Tablet\n",
+ " \u2192 Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " └─ RxNorm to SNOMED equivalent (RxNorm)\n",
+ " \u2514\u2500 RxNorm to SNOMED equivalent (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → Metformin hydrochloride 500 mg prolonged-release oral tablet\n",
+ " \u2192 Metformin hydrochloride 500 mg prolonged-release oral tablet\n",
"
\n",
" \n",
" \n",
- " └─ Is a (RxNorm)\n",
+ " \u2514\u2500 Is a (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → metformin Oral Product\n",
+ " \u2192 metformin Oral Product\n",
"
\n",
" \n",
" \n",
- " → metformin Pill\n",
+ " \u2192 metformin Pill\n",
"
\n",
" \n",
" \n",
- " └─ Non-standard to Standard map (OMOP)\n",
+ " \u2514\u2500 Non-standard to Standard map (OMOP)\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " └─ Has tradename (RxNorm)\n",
+ " \u2514\u2500 Has tradename (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet [Glucophage]\n",
+ " \u2192 24 HR metformin hydrochloride 500 MG Extended Release Oral Tablet [Glucophage]\n",
"
\n",
" \n",
" \n",
- " └─ Quantified form of (RxNorm)\n",
+ " \u2514\u2500 Quantified form of (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
+ " \u2192 metformin hydrochloride 500 MG Extended Release Oral Tablet\n",
"
\n",
" \n",
" \n",
- " └─ RxNorm to VA Product equivalent (NDF-RT)\n",
+ " \u2514\u2500 RxNorm to VA Product equivalent (NDF-RT)\n",
"
\n",
" \n",
" \n",
- " → METFORMIN HCL 500MG TAB,SA\n",
+ " \u2192 METFORMIN HCL 500MG TAB,SA\n",
"
\n",
" \n",
" \n",
- " → METFORMIN HCL 500MG 24HR TAB,SA\n",
+ " \u2192 METFORMIN HCL 500MG 24HR TAB,SA\n",
"
\n",
" \n",
" \n",
@@ -370,202 +366,202 @@
" max-width:420px;\n",
" \">\n",
"
\n",
- " metformin ✅\n",
+ " metformin \u2705\n",
"
\n",
"
\n",
" RxNorm:6809\n",
- " · Drug\n",
- " · Ingredient\n",
+ " \u00b7 Drug\n",
+ " \u00b7 Ingredient\n",
"
\n",
"
\n",
" \n",
" \n",
" \n",
- " └─ Brand name of (OMOP)\n",
+ " \u2514\u2500 Brand name of (OMOP)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " … 521 more\n",
+ " \u2026 521 more\n",
"
\n",
" \n",
" \n",
- " └─ Has ingredient (RxNorm)\n",
+ " \u2514\u2500 Has ingredient (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " … 113 more\n",
+ " \u2026 113 more\n",
"
\n",
" \n",
" \n",
- " └─ Non-standard to Standard map (OMOP)\n",
+ " \u2514\u2500 Non-standard to Standard map (OMOP)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " … 91 more\n",
+ " \u2026 91 more\n",
"
\n",
" \n",
" \n",
- " └─ Drug Source to RxNorm equivalent (OMOP)\n",
+ " \u2514\u2500 Drug Source to RxNorm equivalent (OMOP)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " … 10 more\n",
+ " \u2026 10 more\n",
"
\n",
" \n",
" \n",
- " └─ ATC to RxNorm/Extension primary lateral (OMOP)\n",
+ " \u2514\u2500 ATC to RxNorm/Extension primary lateral (OMOP)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " … 16 more\n",
+ " \u2026 16 more\n",
"
\n",
" \n",
" \n",
- " └─ NDF-RT to RxNorm equivalent (RxNorm)\n",
+ " \u2514\u2500 NDF-RT to RxNorm equivalent (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " └─ SNOMED to RxNorm equivalent (RxNorm)\n",
+ " \u2514\u2500 SNOMED to RxNorm equivalent (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " └─ Standard to Non-standard map (OMOP)\n",
+ " \u2514\u2500 Standard to Non-standard map (OMOP)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " └─ Form of (RxNorm)\n",
+ " \u2514\u2500 Form of (RxNorm)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" \n",
" \n",
- " └─ ATC to RxNorm/Extension secondary upwards (OMOP)\n",
+ " \u2514\u2500 ATC to RxNorm/Extension secondary upwards (OMOP)\n",
"
\n",
" \n",
" \n",
- " → metformin\n",
+ " \u2192 metformin\n",
"
\n",
" [terminated: shortest_paths_found]
\n",
" \n",
@@ -717,4 +713,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/notebooks/02_reasoner.ipynb b/notebooks/02_reasoner.ipynb
index 9751697..21b5248 100644
--- a/notebooks/02_reasoner.ipynb
+++ b/notebooks/02_reasoner.ipynb
@@ -29,27 +29,16 @@
"metadata": {},
"outputs": [],
"source": [
- "from sqlalchemy import create_engine\n",
"from sqlalchemy.orm import sessionmaker\n",
"from dotenv import load_dotenv\n",
"\n",
- "from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path\n",
- "from omop_graph.graph.traverse import traverse\n",
- "from omop_graph.graph.paths import find_shortest_paths\n",
"from omop_graph.graph.kg import KnowledgeGraph\n",
- "from omop_graph.graph.edges import PredicateKind\n",
"from omop_graph.render import (\n",
- " render_subgraph,\n",
- " render_trace,\n",
- " render_path,\n",
- " render_explained_path,\n",
" bind_default_renderers,\n",
")\n",
"from orm_loader.helpers import configure_logging\n",
- "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n",
- "import sqlalchemy as sa\n",
- "from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym\n",
- "import pandas as pd"
+ "from omop_alchemy import get_engine_name\n",
+ "import sqlalchemy as sa"
]
},
{
diff --git a/notebooks/03_phenotype_helper.ipynb b/notebooks/03_phenotype_helper.ipynb
index e5e2dcb..67f913b 100644
--- a/notebooks/03_phenotype_helper.ipynb
+++ b/notebooks/03_phenotype_helper.ipynb
@@ -14,15 +14,11 @@
"from dataclasses import asdict\n",
"from dotenv import load_dotenv\n",
"from orm_loader.helpers import configure_logging\n",
- "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n",
- "from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym\n",
+ "from omop_alchemy import get_engine_name\n",
+ "from omop_alchemy.cdm.model.vocabulary import Concept\n",
"\n",
"from omop_graph.graph.kg import KnowledgeGraph\n",
"from omop_graph.render import (\n",
- " render_subgraph,\n",
- " render_trace,\n",
- " render_path,\n",
- " render_explained_path,\n",
" bind_default_renderers,\n",
")\n",
"from omop_graph.reasoning.phenotypes import (\n",
diff --git a/notebooks/04_mapping_support.ipynb b/notebooks/04_mapping_support.ipynb
index e69de29..1543f24 100644
--- a/notebooks/04_mapping_support.ipynb
+++ b/notebooks/04_mapping_support.ipynb
@@ -0,0 +1,16 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 5,
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python",
+ "version": "3.12.0"
+ }
+ },
+ "cells": []
+}
\ No newline at end of file
diff --git a/notebooks/99_figures.ipynb b/notebooks/99_figures.ipynb
index d389936..85911f0 100644
--- a/notebooks/99_figures.ipynb
+++ b/notebooks/99_figures.ipynb
@@ -29,27 +29,16 @@
"metadata": {},
"outputs": [],
"source": [
- "from sqlalchemy import create_engine\n",
"from sqlalchemy.orm import sessionmaker\n",
"from dotenv import load_dotenv\n",
"\n",
- "from omop_graph.graph.scoring import find_shortest_paths, rank_paths, explain_path\n",
- "from omop_graph.graph.traverse import traverse\n",
- "from omop_graph.graph.paths import find_shortest_paths\n",
"from omop_graph.graph.kg import KnowledgeGraph\n",
- "from omop_graph.graph.edges import PredicateKind\n",
"from omop_graph.render import (\n",
- " render_subgraph,\n",
- " render_trace,\n",
- " render_path,\n",
- " render_explained_path,\n",
" bind_default_renderers,\n",
")\n",
"from orm_loader.helpers import configure_logging\n",
- "from omop_alchemy import get_engine_name, TEST_PATH, ROOT_PATH\n",
- "import sqlalchemy as sa\n",
- "from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Ancestor, Concept_Relationship, Concept_Synonym\n",
- "import pandas as pd"
+ "from omop_alchemy import get_engine_name\n",
+ "import sqlalchemy as sa"
]
},
{
diff --git a/pyproject.toml b/pyproject.toml
index fded00b..73c4074 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,6 +37,7 @@ dependencies = [
"typing-extensions>=4.15.0",
"typer",
"oaklib",
+ #"oa-configurator>=0.0.1", # uncomment once published to PyPI
]
[project.optional-dependencies]
@@ -74,6 +75,9 @@ dev = [
[project.scripts]
omop-graph = "omop_graph.cli:app"
+[project.entry-points."omop.config"]
+omop_graph = "omop_graph.config:OmopGraphConfig"
+
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
index f726928..fd7adba 100644
--- a/scripts/benchmarks/benchmark.py
+++ b/scripts/benchmarks/benchmark.py
@@ -32,7 +32,7 @@
EmbeddingRole
)
from omop_emb.backends.index_config import index_config_from_index_type
-from omop_graph.cli import configure_logging_level
+from omop_graph.config import OmopGraphConfig
from omop_graph.extensions.emb import get_embedding_writer_interface, MissingExtensionError
from omop_graph.extensions.omop_alchemy import PredicateKind
from omop_graph.graph.constraints import SearchConstraintConcept
@@ -555,7 +555,7 @@ def run_benchmark(
] = None,
verbosity: Annotated[int, typer.Option("--verbose", "-v", count=True, help="Increase verbosity (up to two levels)")] = 0,
):
- configure_logging_level(verbosity)
+ OmopGraphConfig.configure_logging(verbosity=verbosity)
cases = load_cases(Path(cases_file))
if allowed_domains:
diff --git a/src/omop_graph/cli.py b/src/omop_graph/cli.py
index 0bd0c48..3a77ced 100644
--- a/src/omop_graph/cli.py
+++ b/src/omop_graph/cli.py
@@ -1,81 +1,49 @@
-import sqlalchemy as sa
-from sqlalchemy.orm import sessionmaker
-from typing import Annotated, Optional
-import pandas as pd
+import logging
+import tempfile
from pathlib import Path
+from typing import Annotated
-from dotenv import load_dotenv
+import pandas as pd
+import sqlalchemy as sa
import typer
-import tempfile
-import logging
+from sqlalchemy.orm import sessionmaker
from orm_loader.helpers import bulk_load_context
-from orm_loader.loaders.loader_interface import PandasLoader
from orm_loader.helpers.metadata import Base
+from orm_loader.loaders.loader_interface import PandasLoader
+from omop_graph.config import OmopGraphConfig
+from omop_graph.db.session import get_engine
from omop_graph.extensions.omop_alchemy import RelationshipClass, RelationshipMapping
-from omop_graph.oaklib_interface.omop_factory import build_engine_string
from .cli_utils.cli_add_test_data import populate_test_data
app = typer.Typer()
logger = logging.getLogger(__name__)
-def configure_logging_level(verbosity: int, reduce_logging: bool = False) -> None:
- """Configure global logging."""
- level_map = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
- log_level = level_map.get(min(verbosity, 2), logging.DEBUG)
-
- logging.basicConfig(
- level=log_level,
- format="%(asctime)s | %(name)s | %(levelname)s | %(message)s",
- datefmt="%Y-%m-%d %H:%M:%S",
- force=True,
- )
-
- if reduce_logging:
- exempt_loggers = ("omop_graph", "omop_emb")
-
- class _NamespaceAllowlistFilter(logging.Filter):
- def filter(self, record: logging.LogRecord) -> bool:
- return record.name.startswith(exempt_loggers)
-
- allowlist_filter = _NamespaceAllowlistFilter()
-
- root_logger = logging.getLogger()
- for handler in root_logger.handlers:
- handler.addFilter(allowlist_filter)
-
- existing_loggers = [logging.getLogger(name) for name in logging.root.manager.loggerDict]
- for logger_instance in existing_loggers:
- if logger_instance.name.startswith(exempt_loggers):
- continue
- logger_instance.setLevel(logging.CRITICAL + 1)
- logger_instance.propagate = False
+@app.callback()
+def _main(
+ verbose: Annotated[
+ int,
+ typer.Option("--verbose", "-v", count=True, help="Increase log verbosity (-v INFO, -vv DEBUG). Must come before the subcommand name."),
+ ] = 0,
+) -> None:
+ OmopGraphConfig.configure_logging(verbosity=verbose)
@app.command()
def populate_with_test_data():
- """
- Method to populate the database withsynthetic test data
- """
- engine_string = build_engine_string()
- engine = sa.create_engine(engine_string, future=True, echo=False)
+ """Populate the database with synthetic test data."""
+ engine = get_engine()
Session = sessionmaker(bind=engine, future=True)
populate_test_data(Session())
+
@app.command()
def relationship_classification(
pred_class_dir: Annotated[str, typer.Option(help="Path to the directory containing `predicate_classification.csv` and `predicate_mapping.csv`.")],
- env_file: Annotated[Optional[str], typer.Option("--env-file", "-e", help="Path to the .env file containing database connection variables. If not provided, will look for .env in the current working directory.")] = None,
- verbosity: Annotated[int, typer.Option("--verbose", "-v", count=True, help="Increase verbosity (up to two levels)")] = 0,
):
- """
- Method to get the pre-classified predicates into the database.
- """
- configure_logging_level(verbosity)
- load_dotenv(env_file)
-
+ """Load pre-classified predicates into the database."""
pred_class_dir_pl = Path(pred_class_dir)
if not pred_class_dir_pl.is_dir():
raise NotADirectoryError(f"{pred_class_dir} is not a valid directory.")
@@ -89,24 +57,21 @@ def relationship_classification(
df_class = pd.read_csv(pred_class_file)
df_mapping = pd.read_csv(pred_mapping_file)
-
# 1. RelationshipClass
df_rel_cls = df_class.rename(columns={"class": "predicate_kind", "subclass": "predicate_subkind"})
- # Only allow that a predicate_subkind maps exactly to one semantic and inference description
check = df_rel_cls.groupby(["predicate_kind", "predicate_subkind"])[["description", "semantics", "inference"]].nunique(dropna=True)
violations = check[(check > 1).any(axis=1)]
- if not violations.empty:
- conflicting_data = df_rel_cls[df_rel_cls["predicate_subkind"].isin(violations.index)].sort_values("predicate_subkind")
- logger.error(f"Validation Failed! {len(violations)} predicate_subkinds have conflicting definitions: {conflicting_data}")
+ if not violations.empty: # type: ignore[union-attr]
+ conflicting_data = df_rel_cls[df_rel_cls["predicate_subkind"].isin(violations.index)].sort_values("predicate_subkind") # type: ignore[union-attr, arg-type, call-overload]
+ logger.error(f"Validation Failed! {len(violations)} predicate_subkinds have conflicting definitions: {conflicting_data}")
raise AttributeError("Validation not passed")
df_rel_cls_to_export = df_rel_cls.groupby(["predicate_kind", "predicate_subkind"], as_index=False).first()
# 2. RelationshipMapping
df_rel_mapping = df_mapping.rename(columns={"class": "predicate_kind", "subclass": "predicate_subkind", "r_id": "relationship_id"})
- # Same order as relationship_class.py
- df_rel_mapping = df_rel_mapping[["relationship_id", "predicate_kind", "predicate_subkind"]].dropna(subset=['predicate_kind', 'predicate_subkind'], how='all')
+ df_rel_mapping = df_rel_mapping[["relationship_id", "predicate_kind", "predicate_subkind"]].dropna(subset=['predicate_kind', 'predicate_subkind'], how='all') # type: ignore[call-overload]
invalid_mask = df_rel_mapping[['predicate_kind', 'predicate_subkind']].isna().any(axis=1)
dropped_ids = df_rel_mapping.loc[invalid_mask, 'relationship_id'].unique().tolist()
@@ -115,25 +80,22 @@ def relationship_classification(
df_rel_mapping = df_rel_mapping.dropna(subset=['predicate_kind', 'predicate_subkind'], how='any')
df_rel_mapping_to_export = df_rel_mapping.drop_duplicates(subset=["relationship_id", "predicate_kind", "predicate_subkind"])
- engine_string = build_engine_string()
- engine = sa.create_engine(engine_string, future=True, echo=False)
+ engine = get_engine()
Session = sessionmaker(bind=engine, future=True)
session = Session()
- # Drop the tables
with engine.begin() as conn:
conn.execute(sa.text(f"DROP TABLE IF EXISTS {RelationshipMapping.staging_tablename()} CASCADE")) # type: ignore
conn.execute(sa.text(f"DROP TABLE IF EXISTS {RelationshipClass.staging_tablename()} CASCADE")) # type: ignore
conn.execute(sa.text("DROP TYPE IF EXISTS predicatekindenum CASCADE;"))
tables_to_drop = [
- RelationshipMapping.__table__,
- RelationshipClass.__table__
+ RelationshipMapping.__table__,
+ RelationshipClass.__table__,
]
Base.metadata.drop_all(bind=engine, tables=tables_to_drop, checkfirst=True) # type: ignore
Base.metadata.create_all(bind=engine, tables=tables_to_drop) # type: ignore
- # Save to temporary files named after the table (required by load_csv) and reload from there
with tempfile.TemporaryDirectory() as tmp_dir:
for model, df in zip([RelationshipClass, RelationshipMapping], [df_rel_cls_to_export, df_rel_mapping_to_export]):
csv_path = Path(tmp_dir) / f"{model.__tablename__}.csv"
@@ -150,5 +112,6 @@ def relationship_classification(
)
session.commit()
+
if __name__ == "__main__":
- app()
\ No newline at end of file
+ app()
diff --git a/src/omop_graph/cli_utils/__init__.py b/src/omop_graph/cli_utils/__init__.py
index 46056ea..8b12c2e 100644
--- a/src/omop_graph/cli_utils/__init__.py
+++ b/src/omop_graph/cli_utils/__init__.py
@@ -1 +1,3 @@
-from .cli_add_test_data import populate_test_data
\ No newline at end of file
+from .cli_add_test_data import populate_test_data
+
+__all__ = ["populate_test_data"]
diff --git a/src/omop_graph/config.py b/src/omop_graph/config.py
index 6d36bf2..4245b64 100644
--- a/src/omop_graph/config.py
+++ b/src/omop_graph/config.py
@@ -1,13 +1,35 @@
-"""General configuration for the omop graph, including envrionment variables."""
-
-# DB connection for OMOP CDM database
-ENV_OMOP_CDM_DB_URL = "OMOP_CDM_DB_URL"
-ENV_OMOP_CDM_DB_USER = "OMOP_CDM_DB_USER"
-ENV_OMOP_CDM_DB_PASSWORD = "OMOP_CDM_DB_PASSWORD"
-ENV_OMOP_CDM_DB_HOST = "OMOP_CDM_DB_HOST"
-ENV_OMOP_CDM_DB_PORT = "OMOP_CDM_DB_PORT"
-ENV_OMOP_CDM_DB_NAME = "OMOP_CDM_DB_NAME"
-ENV_OMOP_CDM_DB_DRIVER = "OMOP_CDM_DB_DRIVER"
-
-# Ingestion
-ENV_OMOP_VOCABULARY_DIR = "OMOP_VOCABULARY_DIR"
\ No newline at end of file
+"""Configuration for omop-graph via oa-configurator."""
+
+from __future__ import annotations
+
+from typing import ClassVar, Final
+
+from pydantic import Field
+from oa_configurator import PackageConfigBase, ResourceSpec
+from omop_alchemy.config import OmopAlchemyConfig
+
+TOOL_NAME: Final[str] = "omop_graph"
+
+
+class OmopGraphConfig(PackageConfigBase):
+ """oa-configurator config class for omop-graph.
+
+ omop-graph does not own any database resources. It requires the CDM
+ database configured by omop-alchemy.
+ """
+
+ tool_name: ClassVar[str] = TOOL_NAME
+ extra_logging_namespaces: ClassVar[tuple[str, ...]] = ("orm_loader", "omop_alchemy", "omop_emb")
+ required_resources: ClassVar[tuple[str, ...]] = (OmopAlchemyConfig.CDM_DB.semantic_name,)
+ owned_resources: ClassVar[tuple[ResourceSpec, ...]] = ()
+
+ max_depth: int = Field(
+ default=6,
+ description="Maximum graph traversal depth for pathfinding and grounding.",
+ )
+ max_paths: int = Field(
+ default=20,
+ description="Maximum number of shortest paths returned per query.",
+ )
+
+
diff --git a/src/omop_graph/db/session.py b/src/omop_graph/db/session.py
index 5e18c9a..bf504af 100644
--- a/src/omop_graph/db/session.py
+++ b/src/omop_graph/db/session.py
@@ -1,18 +1,19 @@
+"""SQLAlchemy engine helper for the OMOP CDM database."""
+
from __future__ import annotations
-import os
+
from typing import Optional, Union
-from sqlalchemy import create_engine, URL, make_url
+
+from sqlalchemy import create_engine, URL
from sqlalchemy.orm import sessionmaker, Session
-from omop_graph.config import (
- ENV_OMOP_CDM_DB_DRIVER,
- ENV_OMOP_CDM_DB_HOST,
- ENV_OMOP_CDM_DB_NAME,
- ENV_OMOP_CDM_DB_PASSWORD,
- ENV_OMOP_CDM_DB_PORT,
- ENV_OMOP_CDM_DB_URL,
- ENV_OMOP_CDM_DB_USER
-)
+from oa_configurator import Resolver
+from omop_alchemy.config import OmopAlchemyConfig
+
+
+def get_engine():
+ """Return a SQLAlchemy engine for the CDM database via oa-configurator."""
+ return Resolver.from_active_config().resolve_resource(OmopAlchemyConfig.CDM_DB.semantic_name).create_engine()
def make_engine(
@@ -21,64 +22,21 @@ def make_engine(
echo: bool = False,
connect_timeout: int = 10,
):
- url = url or build_engine_string()
- if isinstance(url, str):
- url = URL.create(url)
+ """Return a SQLAlchemy engine.
+ When url is omitted, reads connection details from the active oa-configurator
+ stack config. Pass url explicitly to override.
+ """
+ if url is None:
+ return get_engine()
+ from sqlalchemy import make_url as _make_url
+ if isinstance(url, str):
+ url = _make_url(url)
kwargs = {}
if not url.drivername.startswith("sqlite"):
kwargs["connect_args"] = {"connect_timeout": connect_timeout}
-
return create_engine(url, echo=echo, **kwargs)
-def build_engine_string() -> "URL":
- """Compose a SQLAlchemy ``URL`` for the given backend at runtime.
-
- Returns
- -------
- sqlalchemy.URL
-
- Notes
- -----
- If ``OMOP_CDM_DB_URL`` is set it is directly used to create the URL, and all other environment variables are ignored.
- Otherwise, the following environment variables are read to compose the URL for a relational database backend:
- - ``OMOP_CDM_DB_DRIVER`` (required): the SQLAlchemy driver name (e.g. 'postgresql', 'mysql', 'sqlite').
- - ``OMOP_CDM_DB_USER`` (required): the username for database authentication.
- - ``OMOP_CDM_DB_PASSWORD`` (required): the password for database authentication.
- - ``OMOP_CDM_DB_HOST`` (required): the hostname or IP address of the database server.
- - ``OMOP_CDM_DB_NAME`` (required): the name of the database to connect to.
- - ``OMOP_CDM_DB_PORT`` (optional, default 5432): the port number on which the database server is listening.
-
- Raises
- ------
- RuntimeError
- If a required environment variable is missing.
- ValueError
- If ``backend`` does not support URL composition from environment
- variables (e.g. ``FAISS``).
- """
-
-
- optional_url = os.getenv(ENV_OMOP_CDM_DB_URL)
- if optional_url:
- return make_url(optional_url)
-
- driver = _get_required_env_variable(ENV_OMOP_CDM_DB_DRIVER)
- user = _get_required_env_variable(ENV_OMOP_CDM_DB_USER)
- password = _get_required_env_variable(ENV_OMOP_CDM_DB_PASSWORD)
- host = _get_required_env_variable(ENV_OMOP_CDM_DB_HOST)
- database = _get_required_env_variable(ENV_OMOP_CDM_DB_NAME)
- port_str = os.getenv(ENV_OMOP_CDM_DB_PORT, "5432")
- port = int(port_str) if port_str else None
- return URL.create(
- drivername=driver,
- username=user,
- password=password,
- host=host,
- port=port,
- database=database,
- )
-
def make_session(
url: str,
@@ -88,27 +46,3 @@ def make_session(
engine = make_engine(url, echo=echo)
SessionLocal = sessionmaker(bind=engine)
return SessionLocal()
-
-
-def _get_required_env_variable(name: str) -> str:
- """Get the value of a required environment variable.
-
- Parameters
- ----------
- name : str
- Environment variable name.
-
- Returns
- -------
- str
- Environment variable value.
-
- Raises
- ------
- RuntimeError
- If the environment variable is not set.
- """
- value = os.getenv(name)
- if value is None:
- raise RuntimeError(f"Required environment variable {name!r} is not set.")
- return value
\ No newline at end of file
diff --git a/src/omop_graph/extensions/emb.py b/src/omop_graph/extensions/emb.py
index c7f8090..822085d 100644
--- a/src/omop_graph/extensions/emb.py
+++ b/src/omop_graph/extensions/emb.py
@@ -151,12 +151,17 @@ def semantic_similarity(
if embedding_writer is not None:
missing_concept_ids = tuple(missing_sc_embeddings.keys())
- missing_concept_texts = tuple(missing_sc_embeddings.values())
+ missing_concept_texts = tuple(row.concept_name for row in missing_sc_embeddings.values())
+
+ from omop_emb.utils.cdm import fetch_cdm_concepts_for_filter
+ from omop_emb.utils.embedding_utils import EmbeddingConceptFilter as _ECF
+ missing_filter = _ECF(concept_ids=missing_concept_ids, limit=len(missing_concept_ids))
+ concept_meta = fetch_cdm_concepts_for_filter(missing_filter, cdm_engine=kg.cdm_engine)
embedding_writer.embed_and_upsert_concepts(
- omop_cdm_engine=kg.cdm_engine,
concept_ids=missing_concept_ids,
concept_texts=missing_concept_texts,
+ concept_meta=concept_meta,
)
logger.debug(f"Computed and stored embeddings for missing concepts: {missing_concept_ids}")
else:
diff --git a/src/omop_graph/graph/kg.py b/src/omop_graph/graph/kg.py
index fe7f900..e7209dc 100644
--- a/src/omop_graph/graph/kg.py
+++ b/src/omop_graph/graph/kg.py
@@ -17,14 +17,13 @@
import functools
import logging
import re
-import os
from datetime import date
from typing import Dict, Optional, Tuple, Literal, Generator, TYPE_CHECKING
from dataclasses import dataclass
from sqlalchemy import Engine
from sqlalchemy.orm import Session, sessionmaker
-from omop_alchemy.cdm.handlers.fulltext import FullTextError
+from omop_alchemy.backends import FullTextError
if TYPE_CHECKING:
from omop_emb import EmbeddingWriterInterface, EmbeddingReaderInterface, EmbeddingClient
@@ -161,15 +160,13 @@ def emb(self) -> "EmbeddingWriterInterface | EmbeddingReaderInterface":
try:
from omop_emb.interface import EmbeddingWriterInterface, EmbeddingReaderInterface
- from omop_emb.config import ENV_OMOP_EMB_BACKEND
+ from omop_emb.config import OmopEmbConfig
from omop_emb.backends.base_backend import resolve_backend
if self._emb_config is None:
raise ValueError("Embedding configuration is not set. Please provide an EmbeddingConfiguration when initializing the KnowledgeGraph to use embedding features.")
-
- backend_type = self._emb_config.backend_type or os.getenv(ENV_OMOP_EMB_BACKEND, None)
- if backend_type is None:
- raise ValueError(f"Embedding backend type must be specified either in the configuration or via the {ENV_OMOP_EMB_BACKEND} environment variable.")
+
+ backend_type = self._emb_config.backend_type or OmopEmbConfig.get_config().backend
backend = resolve_backend(backend_type)
diff --git a/src/omop_graph/graph/paths.py b/src/omop_graph/graph/paths.py
index 20a472d..c795aff 100644
--- a/src/omop_graph/graph/paths.py
+++ b/src/omop_graph/graph/paths.py
@@ -29,6 +29,7 @@
)
# Local Application Imports
+from omop_graph.config import OmopGraphConfig
from omop_graph.extensions.omop_alchemy import PredicateKind
from omop_graph.graph.edges import EdgeView
from omop_graph.graph.traverse import GraphTrace, TraceStep
@@ -220,9 +221,9 @@ def find_shortest_paths(
source: int,
target: int,
predicate_kinds: Optional[frozenset[PredicateKind]] = None,
- max_depth: int = 6,
+ max_depth: Optional[int] = None,
on: Optional[Any] = None,
- max_paths: int = 20,
+ max_paths: Optional[int] = None,
traced: bool = False,
within_domain: bool = True,
) -> Tuple[List[GraphPath], Optional[GraphTrace]]:
@@ -257,6 +258,12 @@ def find_shortest_paths(
tuple[list[GraphPath], GraphTrace | None]
A list of paths and optionally the trace object.
"""
+ cfg = OmopGraphConfig.get_config()
+ if max_depth is None:
+ max_depth = cfg.max_depth
+ if max_paths is None:
+ max_paths = cfg.max_paths
+
if source == target:
path = GraphPath(steps=())
trace = (
@@ -422,9 +429,9 @@ def find_shortest_paths_batch(
source: int,
target: int,
predicate_kinds: Union[Set[PredicateKind], frozenset[PredicateKind], None] = None,
- max_depth: int = 6,
+ max_depth: Optional[int] = None,
on: Optional[Any] = None,
- max_paths: int = 20,
+ max_paths: Optional[int] = None,
within_domain: bool = True,
) -> List[GraphPath]:
"""
@@ -462,6 +469,12 @@ def find_shortest_paths_batch(
if source == target:
return [GraphPath(steps=())]
+ cfg = OmopGraphConfig.get_config()
+ if max_depth is None:
+ max_depth = cfg.max_depth
+ if max_paths is None:
+ max_paths = cfg.max_paths
+
# Frontiers: The set of nodes we are currently expanding
fwd_frontier = {source}
bwd_frontier = {target}
@@ -641,7 +654,7 @@ def find_standard_paths(
target: int,
candidate: CandidateHit,
predicate_kinds: Optional[frozenset[Any]] = None,
- max_depth: int = 6,
+ max_depth: Optional[int] = None,
max_concepts: Optional[int] = None,
within_domain: bool = True,
*args,
@@ -683,6 +696,9 @@ def find_standard_paths(
list[StandardConcept]
The resolved standard concepts that satisfy the ancestor constraint.
"""
+ if max_depth is None:
+ max_depth = OmopGraphConfig.get_config().max_depth
+
source_view = kg.concept_view(candidate.concept_id)
source_is_std = source_view.standard_concept if source_view else False
diff --git a/src/omop_graph/graph/queries.py b/src/omop_graph/graph/queries.py
index 04fc789..c76b2a7 100644
--- a/src/omop_graph/graph/queries.py
+++ b/src/omop_graph/graph/queries.py
@@ -21,7 +21,7 @@
from sqlalchemy.orm import aliased
from sqlalchemy.sql import Select
-from omop_alchemy.cdm.handlers.fulltext import (
+from omop_alchemy.backends import (
CONCEPT_NAME_TSVECTOR_COLUMN,
CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN,
FullTextError
diff --git a/src/omop_graph/oaklib_interface/omop_factory.py b/src/omop_graph/oaklib_interface/omop_factory.py
index d52d6e0..db1ca7a 100644
--- a/src/omop_graph/oaklib_interface/omop_factory.py
+++ b/src/omop_graph/oaklib_interface/omop_factory.py
@@ -1,83 +1,14 @@
-import os
+"""Factory for creating OAK OMOP ontology resources."""
+
+from __future__ import annotations
+
from typing import Optional, Union
from sqlalchemy.engine import URL
from .omop_resource import OMOPOntologyResource
-from omop_graph.config import (
- ENV_OMOP_CDM_DB_URL,
- ENV_OMOP_CDM_DB_HOST,
- ENV_OMOP_CDM_DB_NAME,
- ENV_OMOP_CDM_DB_PASSWORD,
- ENV_OMOP_CDM_DB_PORT,
- ENV_OMOP_CDM_DB_USER,
- ENV_OMOP_CDM_DB_DRIVER,
-)
-
-
-def build_engine_string() -> URL:
- """Compose a SQLAlchemy ``URL`` for the OMOP CDM database from environment variables.
-
- Returns
- -------
- sqlalchemy.URL
-
- Notes
- -----
- If ``OMOP_CDM_DB_URL`` is set it is used as-is for any backend, allowing
- callers to supply a fully-qualified connection string without setting the
- individual component variables.
-
- Raises
- ------
- RuntimeError
- If a required environment variable is missing.
- """
- from sqlalchemy import URL
- from sqlalchemy.engine import make_url
-
- optional_url = os.getenv(ENV_OMOP_CDM_DB_URL)
- if optional_url:
- return make_url(optional_url)
-
- # Required variables for composing the URL
- driver = _get_required_env_variable(ENV_OMOP_CDM_DB_DRIVER)
- user = _get_required_env_variable(ENV_OMOP_CDM_DB_USER)
- password = _get_required_env_variable(ENV_OMOP_CDM_DB_PASSWORD)
- host = _get_required_env_variable(ENV_OMOP_CDM_DB_HOST)
- database = _get_required_env_variable(ENV_OMOP_CDM_DB_NAME)
- port = int(_get_required_env_variable(ENV_OMOP_CDM_DB_PORT))
- return URL.create(
- drivername=driver,
- username=user,
- password=password,
- host=host,
- port=port,
- database=database,
- )
-
-def _get_required_env_variable(name: str) -> str:
- """Get the value of a required environment variable.
-
- Parameters
- ----------
- name : str
- Environment variable name.
-
- Returns
- -------
- str
- Environment variable value.
-
- Raises
- ------
- RuntimeError
- If the environment variable is not set.
- """
- value = os.getenv(name)
- if value is None:
- raise RuntimeError(f"Required environment variable {name!r} is not set.")
- return value
+from oa_configurator import Resolver
+from omop_alchemy.config import OmopAlchemyConfig
def omop_resource(
@@ -85,41 +16,28 @@ def omop_resource(
url: Optional[Union[str, URL]] = None,
slug: Optional[str] = "omop",
) -> OMOPOntologyResource:
- """
- Create an OMOP DatabaseOntologyResource.
+ """Create an OMOP DatabaseOntologyResource.
- This factory function resolves the database connection string by prioritizing
- an explicit URL argument. If no URL is provided, it attempts to read from
- the specified environment variable.
+ When url is omitted, reads connection details from the active oa-configurator
+ stack config. Pass url explicitly to override.
Parameters
----------
url : str | URL, optional
- The explicit database connection URL (highest priority).
- env_var : str, optional
- The name of the environment variable to check if `url` is None.
- Defaults to 'OMOP_CDM_DB_URL'.
+ Explicit database connection URL. When omitted the active oa-configurator
+ config is used.
slug : str, optional
- A slug identifier for the resource. Defaults to 'omop'.
+ Slug identifier for the resource. Defaults to 'omop'.
Returns
-------
OMOPOntologyResource
- The configured resource object.
-
- Raises
- ------
- ValueError
- If neither `url` is provided nor the `env_var` is set.
"""
- resolved = url or build_engine_string()
-
- if not resolved:
- raise ValueError(
- f"No database URL provided and required environment variables not set"
- )
+ if url is None:
+ resource = Resolver.from_active_config().resolve_resource(OmopAlchemyConfig.CDM_DB.semantic_name)
+ url = resource.database.url
return OMOPOntologyResource(
slug=slug,
- url=resolved,
- )
\ No newline at end of file
+ url=url,
+ )
diff --git a/src/omop_graph/oaklib_interface/omop_implementation.py b/src/omop_graph/oaklib_interface/omop_implementation.py
index 0987880..7007300 100644
--- a/src/omop_graph/oaklib_interface/omop_implementation.py
+++ b/src/omop_graph/oaklib_interface/omop_implementation.py
@@ -4,7 +4,7 @@
from typing import Dict, Iterable, Iterator, List, Optional, Tuple
import numpy as np
-from dotenv import load_dotenv
+
from linkml_runtime.linkml_model.annotations import Annotation
from oaklib.datamodels.search import (
SearchConfiguration,
@@ -324,7 +324,6 @@ def split_annotations(ann):
vocabularies=vocabs,
require_standard=parent_ids is None,
),
- max_depth=6,
predicate_kinds=frozenset([PredicateKind.IDENTITY]),
)
@@ -874,7 +873,6 @@ def __init__(
self.engine_string = engine_string
self.resource = resource or omop_resource(url=self.engine_string)
else:
- load_dotenv()
self.resource = resource or omop_resource()
self.engine_string = self.resource.url
diff --git a/src/omop_graph/reasoning/grounding.py b/src/omop_graph/reasoning/grounding.py
index b1618c0..7d983d0 100644
--- a/src/omop_graph/reasoning/grounding.py
+++ b/src/omop_graph/reasoning/grounding.py
@@ -14,11 +14,12 @@
from __future__ import annotations
import logging
-from dataclasses import dataclass
+from dataclasses import dataclass, field
from typing import List, Optional, Tuple
import numpy as np
+from omop_graph.config import OmopGraphConfig
from omop_graph.extensions.omop_alchemy import PredicateKind
from omop_graph.graph.constraints import SearchConstraintConcept
from omop_graph.graph.kg import KnowledgeGraph
@@ -59,8 +60,8 @@ class GroundingConstraints:
parent_ids: Optional[Tuple[int, ...]]
search_constraint: Optional[SearchConstraintConcept]
- max_depth: int = 6
- predicate_kinds: frozenset[PredicateKind] = frozenset({PredicateKind.IDENTITY,})
+ max_depth: int = field(default_factory=lambda: OmopGraphConfig.get_config().max_depth)
+ predicate_kinds: frozenset[PredicateKind] = frozenset({PredicateKind.IDENTITY})
def ground_term(
diff --git a/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py b/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py
index dd9da2b..e46fabc 100644
--- a/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py
+++ b/src/omop_graph/reasoning/phenotypes/phenotype_simplifier.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass, field, asdict
+from dataclasses import dataclass, field
from collections import defaultdict
from typing import Dict, Set, List
from omop_graph.graph.kg import KnowledgeGraph
diff --git a/src/omop_graph/reasoning/resolvers/__init__.py b/src/omop_graph/reasoning/resolvers/__init__.py
index 20997dc..9359f30 100644
--- a/src/omop_graph/reasoning/resolvers/__init__.py
+++ b/src/omop_graph/reasoning/resolvers/__init__.py
@@ -1,2 +1,11 @@
from .resolvers import CandidateHit, ExactLabelResolver, ExactSynonymResolver, PartialLabelResolver, CandidateResolver
-from .resolver_pipeline import ResolverPipeline
\ No newline at end of file
+from .resolver_pipeline import ResolverPipeline
+
+__all__ = [
+ "CandidateHit",
+ "ExactLabelResolver",
+ "ExactSynonymResolver",
+ "PartialLabelResolver",
+ "CandidateResolver",
+ "ResolverPipeline",
+]
diff --git a/src/omop_graph/render/auto.py b/src/omop_graph/render/auto.py
index a21aa79..4219acb 100644
--- a/src/omop_graph/render/auto.py
+++ b/src/omop_graph/render/auto.py
@@ -1,6 +1,6 @@
from __future__ import annotations
-from typing import Literal, Optional
+from typing import Literal
from omop_graph.graph.paths import GraphPath, PathExplanation
from omop_graph.graph.traverse import Subgraph, GraphTrace
@@ -28,7 +28,7 @@
def _in_notebook() -> bool:
try:
- from IPython.core.getipython import get_ipython
+ from IPython.core.getipython import get_ipython # type: ignore[import-unresolved]
ip = get_ipython()
return ip is not None and "IPKernelApp" in ip.config
except Exception:
diff --git a/src/omop_graph/render/text.py b/src/omop_graph/render/text.py
index d008b16..ca8f138 100644
--- a/src/omop_graph/render/text.py
+++ b/src/omop_graph/render/text.py
@@ -6,7 +6,7 @@
def subgraph_text(kg, sg: Subgraph) -> str:
lines = [
- f"Subgraph:",
+ "Subgraph:",
f" Nodes: {len(sg.nodes)}",
f" Edges: {len(sg.edges)}",
"",
diff --git a/src/omop_graph/utils/text_utils.py b/src/omop_graph/utils/text_utils.py
index 848f550..0ff6be1 100644
--- a/src/omop_graph/utils/text_utils.py
+++ b/src/omop_graph/utils/text_utils.py
@@ -1,5 +1,5 @@
from __future__ import annotations
-from typing import Protocol, Iterable, Tuple, TYPE_CHECKING
+from typing import Protocol, Iterable, Tuple
try:
from cava_nlp import CaVaLang # type: ignore
diff --git a/tests/test_embedding_optional.py b/tests/test_embedding_optional.py
index a9a4b9f..658f65d 100644
--- a/tests/test_embedding_optional.py
+++ b/tests/test_embedding_optional.py
@@ -3,15 +3,12 @@
from __future__ import annotations
import builtins
-import contextlib
import logging
-from types import SimpleNamespace
from typing import cast
from unittest.mock import Mock
import numpy as np
import pytest
-from sqlalchemy import Engine
from omop_graph.extensions import emb as emb_ext
from omop_graph.extensions.emb import MissingExtensionError
diff --git a/tests/test_fulltext_optional.py b/tests/test_fulltext_optional.py
index e70b730..0dcd2cf 100644
--- a/tests/test_fulltext_optional.py
+++ b/tests/test_fulltext_optional.py
@@ -1,29 +1,17 @@
import pytest
from sqlalchemy import Engine
-from omop_alchemy.cdm.handlers.fulltext import (
- CONCEPT_NAME_TSVECTOR_COLUMN,
- CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN,
- register_optional_fulltext_columns,
- unregister_optional_fulltext_columns,
-)
-from omop_alchemy.cdm.model.vocabulary import Concept, Concept_Synonym
-from omop_alchemy.cdm.handlers.fulltext import FullTextError
-
+from omop_alchemy.backends import FullTextError
from omop_graph.graph.queries import q_concept_name_fulltext
@pytest.mark.parametrize("synonym", [False, True])
-def test_fulltext_query_requires_registered_tsvector_columns(synonym: bool, mock_cdm_engine: Engine):
- """Full-text queries fail cleanly when optional tsvector metadata is absent."""
- had_name_column = CONCEPT_NAME_TSVECTOR_COLUMN in Concept.__table__.c
- had_synonym_column = CONCEPT_SYNONYM_NAME_TSVECTOR_COLUMN in Concept_Synonym.__table__.c
+def test_fulltext_query_requires_tsvector_columns(synonym: bool, mock_cdm_engine: Engine):
+ """Full-text query raises FullTextError when tsvector columns are absent from the database.
- unregister_optional_fulltext_columns()
- try:
- with pytest.raises(FullTextError):
- q_concept_name_fulltext("kidney cancer", synonym=synonym, engine=mock_cdm_engine)
- finally:
- if had_name_column or had_synonym_column:
- register_optional_fulltext_columns()
\ No newline at end of file
+ The mock CDM engine is SQLite and never has tsvector columns, so the guard in
+ q_concept_name_fulltext (which inspects the live DB schema) always fires here.
+ """
+ with pytest.raises(FullTextError):
+ q_concept_name_fulltext("kidney cancer", synonym=synonym, engine=mock_cdm_engine)