Skip to content

Commit eca6e86

Browse files
committed
refactored condition modifier queries and factories
1 parent f3aaeb8 commit eca6e86

18 files changed

Lines changed: 1032 additions & 133 deletions

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@
22
- created services layer for functions that execute queries and return data / session-bound
33

44
## 0.1.2
5-
- condition modifier queries and factories
5+
- condition modifier queries and factories
6+
7+
## 0.1.3
8+
- refactored condition modifier queries and factories

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
[project]
22
name = "omop-constructs"
3-
version = "0.1.2"
3+
version = "0.1.3"
44
description = "For building complex constructs on top of the omop-alchemy library."
55
readme = "README.md"
66
authors = [
77
{ name = "Georgie Kennedy", email = "georgina.kennedy@unsw.edu.au" }
88
]
99
requires-python = ">=3.12"
1010
dependencies = [
11-
"omop-alchemy==0.5.7",
12-
"omop-semantics==0.1.7",
11+
"omop-alchemy==0.5.11",
12+
"omop-semantics==0.1.8",
1313
"sqlalchemy>=2.0.45",
1414
"urllib3>=2.6.3"
1515
]

src/notebooks/staging_worked_example.ipynb

Lines changed: 324 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from .modifier_mappers import TStage, NStage, MStage, GroupStage, GradeModifier, LatModifier, SizeModifier
2+
3+
__all__ = [
4+
"TStage",
5+
"NStage",
6+
"MStage",
7+
"GroupStage",
8+
"GradeModifier",
9+
"LatModifier",
10+
"SizeModifier",
11+
]
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
2+
from omop_alchemy.cdm.model import Condition_Occurrence, Episode_Event, Concept
3+
import sqlalchemy as sa
4+
import sqlalchemy.orm as so
5+
6+
from omop_semantics.runtime.default_valuesets import runtime
7+
8+
from .modifier_mappers import (
9+
TStage,
10+
NStage,
11+
MStage,
12+
GroupStage,
13+
GradeModifier,
14+
SizeModifier,
15+
LatModifier,
16+
)
17+
18+
condition_concept = so.aliased(Concept, name='condition_concept')
19+
20+
modified_conditions_join = (
21+
sa.select(
22+
sa.func.row_number().over().label('mv_id'),
23+
Condition_Occurrence.person_id,
24+
Condition_Occurrence.condition_start_date,
25+
Condition_Occurrence.condition_occurrence_id,
26+
Condition_Occurrence.condition_source_value,
27+
Condition_Occurrence.condition_concept_id,
28+
condition_concept.concept_name.label('condition_concept'),
29+
Episode_Event.episode_id.label('condition_episode'),
30+
TStage.stage_id.label('t_stage_id'),
31+
TStage.stage_date.label('t_stage_date'),
32+
TStage.stage_concept_id.label('t_stage_concept_id'),
33+
TStage.stage_label.label('t_stage_label'),
34+
NStage.stage_id.label('n_stage_id'),
35+
NStage.stage_date.label('n_stage_date'),
36+
NStage.stage_concept_id.label('n_stage_concept_id'),
37+
NStage.stage_label.label('n_stage_label'),
38+
MStage.stage_id.label('m_stage_id'),
39+
MStage.stage_date.label('m_stage_date'),
40+
MStage.stage_concept_id.label('m_stage_concept_id'),
41+
MStage.stage_label.label('m_stage_label'),
42+
GroupStage.stage_id.label('group_stage_id'),
43+
GroupStage.stage_date.label('group_stage_date'),
44+
GroupStage.stage_concept_id.label('group_stage_concept_id'),
45+
GroupStage.stage_label.label('group_stage_label'),
46+
GradeModifier.measurement_id.label('grade_id'),
47+
GradeModifier.measurement_date.label('grade_date'),
48+
GradeModifier.concept_name.label('grade_concept'),
49+
SizeModifier.measurement_id.label('size_id'),
50+
SizeModifier.measurement_date.label('size_date'),
51+
SizeModifier.value_as_number.label('size_value'),
52+
SizeModifier.concept_name.label('size_concept'),
53+
LatModifier.measurement_id.label('laterality_id'),
54+
LatModifier.measurement_date.label('laterality_date'),
55+
LatModifier.concept_name.label('laterality_concept'),
56+
)
57+
.join(
58+
Episode_Event,
59+
sa.and_(
60+
Episode_Event.event_id==Condition_Occurrence.condition_occurrence_id,
61+
Episode_Event.episode_event_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id
62+
),
63+
isouter=True
64+
)
65+
.join(
66+
TStage,
67+
sa.and_(
68+
TStage.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
69+
Condition_Occurrence.condition_occurrence_id==TStage.modifier_of_event_id
70+
),
71+
isouter=True
72+
)
73+
.join(
74+
NStage,
75+
sa.and_(
76+
NStage.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
77+
Condition_Occurrence.condition_occurrence_id==NStage.modifier_of_event_id
78+
),
79+
isouter=True
80+
)
81+
.join(
82+
MStage,
83+
sa.and_(
84+
MStage.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
85+
Condition_Occurrence.condition_occurrence_id==MStage.modifier_of_event_id
86+
),
87+
isouter=True
88+
)
89+
.join(
90+
GroupStage,
91+
sa.and_(
92+
GroupStage.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
93+
Condition_Occurrence.condition_occurrence_id==GroupStage.modifier_of_event_id
94+
),
95+
isouter=True
96+
)
97+
.join(
98+
GradeModifier,
99+
sa.and_(
100+
GradeModifier.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
101+
Condition_Occurrence.condition_occurrence_id==GradeModifier.modifier_of_event_id
102+
),
103+
isouter=True
104+
)
105+
.join(
106+
SizeModifier,
107+
sa.and_(
108+
SizeModifier.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
109+
Condition_Occurrence.condition_occurrence_id==SizeModifier.modifier_of_event_id
110+
),
111+
isouter=True
112+
)
113+
.join(
114+
LatModifier,
115+
sa.and_(
116+
LatModifier.modifier_of_field_concept_id==runtime.modifiers.modifier_fields.condition_occurrence_id,
117+
Condition_Occurrence.condition_occurrence_id==LatModifier.modifier_of_event_id
118+
),
119+
isouter=True
120+
)
121+
.join(condition_concept, condition_concept.concept_id==Condition_Occurrence.condition_concept_id)
122+
)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import sqlalchemy as sa
2+
from orm_loader.helpers import Base
3+
from .condition_modifier_join import modified_conditions_join
4+
from ...core.materialized import MaterializedViewMixin
5+
6+
7+
class ModifiedCondition(MaterializedViewMixin, Base):
8+
__mv_name__ = 'modified_conditions_mv'
9+
__mv_select__ = modified_conditions_join.select()
10+
__mv_pk__ = ["mv_id"]
11+
__table_args__ = {"extend_existing": True}
12+
__tablename__ = __mv_name__
13+
14+
mv_id = sa.Column(primary_key=True)
15+
person_id = sa.Column(sa.Integer)
16+
condition_start_date = sa.Column(sa.Date)
17+
condition_occurrence_id = sa.Column(sa.Integer)
18+
condition_source_value = sa.Column(sa.String)
19+
condition_concept_id = sa.Column(sa.Integer)
20+
condition_concept = sa.Column(sa.String)
21+
condition_episode = sa.Column(sa.Integer)
22+
stage_id = sa.Column(sa.Integer)
23+
stage_date = sa.Column(sa.Date)
24+
stage_concept_id = sa.Column(sa.Integer)
25+
stage_label = sa.Column(sa.String)
26+
grade_id = sa.Column(sa.Integer)
27+
grade_date = sa.Column(sa.Date)
28+
grade_concept_id = sa.Column(sa.Integer)
29+
grade_label = sa.Column(sa.String)
30+
size_id = sa.Column(sa.Integer)
31+
size_date = sa.Column(sa.Date)
32+
size_concept_id = sa.Column(sa.Integer)
33+
size_label = sa.Column(sa.String)
34+
laterality_id = sa.Column(sa.Integer)
35+
laterality_date = sa.Column(sa.Date)
36+
laterality_concept_id = sa.Column(sa.Integer)
37+
laterality_label = sa.Column(sa.String)
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import sqlalchemy as sa
2+
import sqlalchemy.orm as so
3+
from typing import Iterable
4+
5+
from omop_semantics.runtime.default_valuesets import runtime
6+
from omop_alchemy.cdm.model.clinical import Measurement
7+
from omop_alchemy.cdm.model.vocabulary import Concept
8+
9+
modifier_concept = so.aliased(Concept, name='modifier_concept')
10+
11+
def get_eav_modifier_query(
12+
modifier_concept_id: int,
13+
target_cols: Iterable[so.InstrumentedAttribute] = [Measurement.value_as_concept_id],
14+
join_col: so.InstrumentedAttribute = Measurement.value_as_concept_id,
15+
name: str = "eav_modifier"
16+
) -> sa.Subquery:
17+
return (
18+
sa.select(
19+
Measurement.person_id,
20+
Measurement.modifier_of_event_id,
21+
Measurement.modifier_of_field_concept_id,
22+
Measurement.measurement_id,
23+
Measurement.measurement_date,
24+
modifier_concept.concept_name,
25+
*target_cols
26+
)
27+
.join(modifier_concept, modifier_concept.concept_id==join_col, isouter=True)
28+
.filter(Measurement.measurement_concept_id==modifier_concept_id)
29+
.subquery(name = name)
30+
)
31+
32+
def get_direct_modifier_query(
33+
modifier_concept_id: list[int],
34+
name: str = "direct_modifier"
35+
) -> sa.Subquery:
36+
return (
37+
sa.select(
38+
Measurement.person_id,
39+
Measurement.modifier_of_event_id,
40+
Measurement.modifier_of_field_concept_id,
41+
Measurement.measurement_concept_id,
42+
Measurement.measurement_id,
43+
Measurement.measurement_date,
44+
modifier_concept.concept_name,
45+
)
46+
.join(modifier_concept, modifier_concept.concept_id==Measurement.measurement_concept_id, isouter=True)
47+
.filter(Measurement.measurement_concept_id.in_(modifier_concept_id))
48+
.subquery(name = name)
49+
)
50+
51+
def earliest_modifier(
52+
starting_query: sa.Subquery,
53+
name: str = "earliest_modifier"
54+
) -> sa.Subquery:
55+
ranked = (
56+
sa.select(
57+
*starting_query.c,
58+
sa.func.row_number()
59+
.over(
60+
partition_by=starting_query.c.modifier_of_event_id,
61+
order_by=starting_query.c.measurement_date.asc()
62+
)
63+
.label('rn')
64+
).subquery(name=name)
65+
)
66+
return sa.select(*ranked.c).where(ranked.c.rn==1).subquery(name=f"{name}_filtered")
67+
68+
def get_query_per_stage_type(
69+
subset: Iterable[int],
70+
name: str = "stage_modifier"
71+
) -> sa.Subquery:
72+
"""
73+
Build a subquery for a specific stage modifier
74+
75+
Ranks multiple modifiers per diagnosis and selects the most
76+
relevant one based on predefined rules.
77+
78+
T, N, M and Group stage queries preference *earliest pathological
79+
stage* if it exists else fall back to earliest clinical stage.
80+
"""
81+
stage_select = (
82+
83+
sa.select(
84+
Measurement.person_id,
85+
Measurement.measurement_id.label("stage_id"),
86+
Measurement.measurement_date.label("stage_date"),
87+
Measurement.measurement_datetime.label("stage_datetime"),
88+
Measurement.measurement_concept_id.label("stage_concept_id"),
89+
Measurement.modifier_of_event_id,
90+
Measurement.modifier_of_field_concept_id,
91+
modifier_concept.concept_name.label("stage_label"),
92+
sa.case(
93+
(
94+
modifier_concept.concept_code.like("p%"),
95+
sa.literal("aaa_path"),
96+
),
97+
else_=sa.literal("zzz_clin"),
98+
).label("stage_type"),
99+
)
100+
.join(
101+
modifier_concept,
102+
modifier_concept.concept_id == Measurement.measurement_concept_id,
103+
isouter=True,
104+
)
105+
)
106+
i = (
107+
stage_select
108+
.filter(Measurement.measurement_concept_id.in_(subset))
109+
.subquery(name=f"{name}_initial")
110+
)
111+
112+
ranked = (
113+
sa.select(
114+
*i.c,
115+
sa.func.row_number()
116+
.over(
117+
partition_by=i.c.modifier_of_event_id,
118+
order_by=[i.c.stage_type, i.c.stage_date.asc()],
119+
)
120+
.label("rn"),
121+
)
122+
.subquery(name=f"{name}_ranked")
123+
)
124+
125+
return sa.select(*ranked.c).where(ranked.c.rn == 1).subquery(name=f'{name}_filtered')

0 commit comments

Comments
 (0)