Skip to content

Commit

Permalink
build!: update ga4gh.vrs + gene/variation normalizers (#389)
Browse files Browse the repository at this point in the history
close #305

* Update domain entity and entity class names
* Update tests to add `SequenceLocation.sequence`
* Update FastAPI configs to include response model and return Pydantic
models instead of dict
  • Loading branch information
korikuzma authored Jul 22, 2024
1 parent 1563018 commit c9cffe1
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 19 deletions.
6 changes: 3 additions & 3 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ verify_ssl = true
name = "pypi"

[packages]
"ga4gh.vrs" = "~=2.0.0a8"
gene-normalizer = {version = "~=0.4.0", extras = ["etl"]}
variation-normalizer = "~=0.9.1"
"ga4gh.vrs" = "~=2.0.0a10"
gene-normalizer = {version = "~=0.4.1", extras = ["etl"]}
variation-normalizer = "~=0.10.0"
disease-normalizer = {version = "~=0.5.0", extras = ["etl"]}
thera-py = {version = "~=0.6.0", extras = ["etl"]}
civicpy = "~=3.1"
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ requires-python = ">=3.10"
description = "A search interface for cancer variant interpretations assembled by aggregating and harmonizing across multiple cancer variant interpretation knowledgebases."
license = {file = "LICENSE"}
dependencies = [
"ga4gh.vrs~=2.0.0a8",
"gene-normalizer[etl]~=0.4.0",
"variation-normalizer~=0.9.1",
"ga4gh.vrs~=2.0.0a10",
"gene-normalizer[etl]~=0.4.1",
"variation-normalizer~=0.10.0",
"disease-normalizer[etl]~=0.5.0",
"thera-py[etl]~=0.6.0",
"civicpy~=3.1",
Expand Down
2 changes: 1 addition & 1 deletion src/metakb/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _add_location(tx: ManagedTransaction, location_in: dict) -> None:
loc = location_in.copy()
loc_keys = [
f"loc.{key}=${key}"
for key in ("id", "digest", "start", "end", "type")
for key in ("id", "digest", "start", "end", "sequence", "type")
if loc.get(key) is not None # start could be 0
]
loc["sequence_reference"] = json.dumps(loc["sequenceReference"])
Expand Down
12 changes: 8 additions & 4 deletions src/metakb/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def custom_openapi() -> dict:
@app.get(
"/api/v2/search/studies",
summary=search_studies_summary,
response_model=SearchStudiesService,
response_model_exclude_none=True,
description=search_studies_descr,
)
async def get_studies(
Expand All @@ -94,7 +96,7 @@ async def get_studies(
study_id: Annotated[str | None, Query(description=s_description)] = None,
start: Annotated[int, Query(description=start_description)] = 0,
limit: Annotated[int | None, Query(description=limit_description)] = None,
) -> dict:
) -> SearchStudiesService:
"""Get nested studies from queried concepts that match all conditions provided.
For example, if `variation` and `therapy` are provided, will return all studies
that have both the provided `variation` and `therapy`.
Expand Down Expand Up @@ -125,7 +127,7 @@ async def get_studies(
service_meta_=ServiceMeta(),
warnings=["`start` and `limit` params must both be nonnegative"],
)
return resp.model_dump(exclude_none=True)
return resp


_batch_descr = {
Expand All @@ -140,6 +142,8 @@ async def get_studies(
@app.get(
"/api/v2/batch_search/studies",
summary=_batch_descr["summary"],
response_model=BatchSearchStudiesService,
response_model_exclude_none=True,
description=_batch_descr["description"],
)
async def batch_get_studies(
Expand All @@ -149,7 +153,7 @@ async def batch_get_studies(
] = None,
start: Annotated[int, Query(description=_batch_descr["arg_start"])] = 0,
limit: Annotated[int | None, Query(description=_batch_descr["arg_limit"])] = None,
) -> dict:
) -> BatchSearchStudiesService:
"""Fetch all studies associated with `any` of the provided variations.
:param variations: variations to match against
Expand All @@ -166,4 +170,4 @@ async def batch_get_studies(
warnings=["`start` and `limit` params must both be nonnegative"],
)

return response.model_dump(exclude_none=True)
return response
12 changes: 6 additions & 6 deletions src/metakb/schemas/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum
from typing import Literal

from ga4gh.core.entity_models import IRI, Coding, _DomainEntity, _Entity
from ga4gh.core.entity_models import IRI, Coding, DomainEntity, Entity
from pydantic import Field, StrictInt, StrictStr, constr, field_validator


Expand All @@ -24,7 +24,7 @@ class Direction(str, Enum):
NONE = "none"


class Document(_DomainEntity):
class Document(DomainEntity):
"""a representation of a physical or digital document"""

type: Literal["Document"] = "Document"
Expand All @@ -42,7 +42,7 @@ class Document(_DomainEntity):
)


class Method(_Entity):
class Method(Entity):
"""A set of instructions that specify how to achieve some objective (e.g.
experimental protocols, curation guidelines, rule sets, etc.)
"""
Expand All @@ -57,7 +57,7 @@ class Method(_Entity):
)


class Agent(_Entity):
class Agent(Entity):
"""An autonomous actor (person, organization, or computational agent) that bears
some form of responsibility for an activity taking place, for the existence of an
entity, or for another agent's activity.
Expand All @@ -68,7 +68,7 @@ class Agent(_Entity):
subtype: AgentSubtype | None = None


class Contribution(_Entity):
class Contribution(Entity):
"""The sum of all actions taken by a single agent in contributing to the creation,
modification, assessment, or deprecation of a particular entity (e.g. a Statement,
EvidenceLine, DataItem, Publication, etc.)
Expand Down Expand Up @@ -99,7 +99,7 @@ def date_format(cls, v: str | None) -> str | None:
return v


class _InformationEntity(_Entity):
class _InformationEntity(Entity):
"""InformationEntities are abstract (non-physical) entities that are about something
(i.e. they carry information about things in the real world).
"""
Expand Down
4 changes: 2 additions & 2 deletions src/metakb/schemas/categorical_variation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from enum import Enum
from typing import Literal

from ga4gh.core.entity_models import IRI, _DomainEntity
from ga4gh.core.entity_models import IRI, DomainEntity
from ga4gh.vrs import models
from pydantic import Field, RootModel, StrictStr

Expand All @@ -28,7 +28,7 @@ class LocationMatchCharacteristic(str, Enum):
SUPERINTERVAL = "superinterval"


class _CategoricalVariationBase(_DomainEntity):
class _CategoricalVariationBase(DomainEntity):
"""Base class for Categorical Variation"""

members: list[models.Variation | IRI] | None = Field(
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def civic_mpid33(civic_vid33):
},
"start": 55191821,
"end": 55191822,
"sequence": "T",
},
"state": {"type": "LiteralSequenceExpression", "sequence": "G"},
}
Expand Down Expand Up @@ -362,6 +363,7 @@ def civic_vid12():
},
"start": 599,
"end": 600,
"sequence": "V",
},
"state": {"sequence": "E", "type": "LiteralSequenceExpression"},
"expressions": [
Expand Down Expand Up @@ -396,6 +398,7 @@ def braf_v600e_genomic():
},
"start": 140753335,
"end": 140753336,
"sequence": "A",
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}
Expand Down Expand Up @@ -1737,6 +1740,7 @@ def moa_vid66():
},
"start": 133748282,
"end": 133748283,
"sequence": "C",
},
"state": {"type": "LiteralSequenceExpression", "sequence": "T"},
}
Expand Down
1 change: 1 addition & 0 deletions tests/unit/database/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ def test_location_rules(
"sequence_reference",
"start",
"end",
"sequence",
"type",
}
assert json.loads(loc["sequence_reference"]) == {
Expand Down

0 comments on commit c9cffe1

Please sign in to comment.