Skip to content

Commit

Permalink
fix: refget response compatibility (accept req hdr + content-type res…
Browse files Browse the repository at this point in the history
… hdr)
  • Loading branch information
davidlougheed committed Aug 26, 2024
1 parent a162435 commit 4d1f826
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 62 deletions.
130 changes: 80 additions & 50 deletions bento_reference_service/routers/refget.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import io
import math
import orjson
import typing

from bento_lib.service_info.helpers import build_service_type, build_service_info_from_pydantic_config
from bento_lib.service_info.types import GA4GHServiceInfo
from bento_lib.streaming import exceptions as se, range as sr
from fastapi import APIRouter, HTTPException, Request, Response, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Literal

from .. import models, streaming as s, __version__
from ..authz import authz_middleware
Expand All @@ -25,47 +28,73 @@
REFGET_VERSION = "2.0.0"
REFGET_SERVICE_TYPE = build_service_type("org.ga4gh", "refget", REFGET_VERSION)

REFGET_CHARSET = "us-ascii"

REFGET_HEADER_TEXT = f"text/vnd.ga4gh.refget.v{REFGET_VERSION}+plain"
REFGET_HEADER_TEXT_WITH_CHARSET = f"{REFGET_HEADER_TEXT}; charset=us-ascii"
REFGET_HEADER_TEXT_WITH_CHARSET = f"{REFGET_HEADER_TEXT}; charset={REFGET_CHARSET}"
REFGET_HEADER_JSON = f"application/vnd.ga4gh.refget.v{REFGET_VERSION}+json"
REFGET_HEADER_JSON_WITH_CHARSET = f"{REFGET_HEADER_JSON}; charset=us-ascii"
REFGET_HEADER_JSON_WITH_CHARSET = f"{REFGET_HEADER_JSON}; charset={REFGET_CHARSET}"


class RefGetJSONResponse(Response):
media_type = REFGET_HEADER_JSON
charset = REFGET_CHARSET

def render(self, content: typing.Any) -> bytes:
return orjson.dumps(content, option=orjson.OPT_NON_STR_KEYS)


refget_router = APIRouter(prefix="/sequence")


@refget_router.get("/service-info", dependencies=[authz_middleware.dep_public_endpoint()])
async def refget_service_info(
config: ConfigDependency, logger: LoggerDependency, request: Request, response: Response
) -> dict:
accept_header: str | None = request.headers.get("Accept", None)
if accept_header and accept_header not in (
REFGET_HEADER_JSON_WITH_CHARSET,
REFGET_HEADER_JSON,
"application/json",
"application/*",
"*/*",
):
def check_accept_header(accept_header: str | None, mode: Literal["text", "json"]) -> None:
valid_header_values = (
(
REFGET_HEADER_TEXT_WITH_CHARSET,
REFGET_HEADER_TEXT,
"text/plain",
"text/*",
"*/*",
)
if mode == "text"
else (
REFGET_HEADER_JSON_WITH_CHARSET,
REFGET_HEADER_JSON,
"application/json",
"application/*",
"*/*",
)
)

if accept_header and accept_header not in valid_header_values:
raise HTTPException(status_code=status.HTTP_406_NOT_ACCEPTABLE, detail="Not Acceptable")

response.headers["Content-Type"] = REFGET_HEADER_JSON_WITH_CHARSET

@refget_router.get("/service-info", dependencies=[authz_middleware.dep_public_endpoint()])
async def refget_service_info(
config: ConfigDependency, logger: LoggerDependency, request: Request
) -> RefGetJSONResponse:
check_accept_header(request.headers.get("Accept"), mode="json")

genome_service_info: GA4GHServiceInfo = await build_service_info_from_pydantic_config(
config, logger, {}, REFGET_SERVICE_TYPE, __version__
)

del genome_service_info["bento"]

return {
**genome_service_info,
"refget": {
"circular_supported": False,
# I don't like that they used the word 'subsequence' here... that's not what that means exactly.
# It's a substring!
"subsequence_limit": config.response_substring_limit,
"algorithms": ["md5", "ga4gh"],
"identifier_types": [],
},
}
return RefGetJSONResponse(
{
**genome_service_info,
"refget": {
"circular_supported": False,
# I don't like that they used the word 'subsequence' here... that's not what that means exactly.
# It's a substring!
"subsequence_limit": config.response_substring_limit,
"algorithms": ["md5", "ga4gh"],
"identifier_types": [],
},
}
)


REFGET_BAD_REQUEST = Response(status_code=status.HTTP_400_BAD_REQUEST, content=b"Bad Request")
Expand All @@ -87,16 +116,11 @@ async def refget_sequence(
):
headers = {"Content-Type": REFGET_HEADER_TEXT_WITH_CHARSET, "Accept-Ranges": "bytes"}

accept_header: str | None = request.headers.get("Accept", None)
if accept_header and accept_header not in (
REFGET_HEADER_TEXT_WITH_CHARSET,
REFGET_HEADER_TEXT,
"text/plain",
"text/*",
"*/*",
):
logger.error(f"not acceptable: bad Accept header value")
return Response(status_code=status.HTTP_406_NOT_ACCEPTABLE, content=b"Not Acceptable")
try:
check_accept_header(request.headers.get("Accept"), mode="text")
except HTTPException as e:
logger.error(f"not acceptable: bad Accept header value") # don't log actual value to prevent log injection
return Response(status_code=e.status_code, content=e.detail.encode("ascii"))

# Don't use FastAPI's auto-Header tool for the Range header
# 'cause I don't want to shadow Python's range() function
Expand Down Expand Up @@ -219,18 +243,22 @@ class RefGetSequenceMetadataResponse(BaseModel):
metadata: RefGetSequenceMetadata


@refget_router.get("/{sequence_checksum}/metadata", dependencies=[authz_middleware.dep_public_endpoint()])
@refget_router.get(
"/{sequence_checksum}/metadata",
dependencies=[authz_middleware.dep_public_endpoint()],
responses={
status.HTTP_200_OK: {REFGET_HEADER_JSON: {"schema": RefGetSequenceMetadataResponse.model_json_schema()}}
},
)
async def refget_sequence_metadata(
db: DatabaseDependency,
response: Response,
sequence_checksum: str,
) -> RefGetSequenceMetadataResponse:
db: DatabaseDependency, request: Request, sequence_checksum: str
) -> RefGetJSONResponse:
check_accept_header(request.headers.get("Accept"), mode="json")

res: tuple[str, models.ContigWithRefgetURI] | None = await db.get_genome_and_contig_by_checksum_str(
sequence_checksum
)

response.headers["Content-Type"] = REFGET_HEADER_JSON_WITH_CHARSET

if res is None:
# TODO: proper 404 for refget spec
# TODO: proper content type for exception - RefGet error class?
Expand All @@ -240,11 +268,13 @@ async def refget_sequence_metadata(
)

contig = res[1]
return RefGetSequenceMetadataResponse(
metadata=RefGetSequenceMetadata(
md5=contig.md5,
ga4gh=contig.ga4gh,
length=contig.length,
aliases=contig.aliases,
),
return RefGetJSONResponse(
RefGetSequenceMetadataResponse(
metadata=RefGetSequenceMetadata(
md5=contig.md5,
ga4gh=contig.ga4gh,
length=contig.length,
aliases=contig.aliases,
),
).model_dump(mode="json"),
)
62 changes: 50 additions & 12 deletions tests/test_refget.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def test_refget_service_info(test_client: TestClient, db_cleanup):
rd = res.json()

assert res.status_code == status.HTTP_200_OK
assert res.headers["content-type"] == "application/vnd.ga4gh.refget.v2.0.0+json"

assert "id" in rd
assert "name" in rd
Expand All @@ -41,49 +42,49 @@ def test_refget_sequence_invalid_requests(test_client: TestClient, aioresponse:
# TODO: fixture
create_genome_with_permissions(test_client, aioresponse, TEST_GENOME_SARS_COV_2)
test_contig = TEST_GENOME_SARS_COV_2["contigs"][0]
seq_url = f"/sequence/{test_contig['md5']}"

# ------------------------------------------------------------------------------------------------------------------

# cannot return HTML
res = test_client.get(f"/sequence/{test_contig['md5']}", headers={"Accept": "text/html"})
res = test_client.get(seq_url, headers={"Accept": "text/html"})
assert res.status_code == status.HTTP_406_NOT_ACCEPTABLE
assert res.content == b"Not Acceptable"

# cannot have start > end
res = test_client.get(
f"/sequence/{test_contig['md5']}", params={"start": 5, "end": 1}, headers=HEADERS_ACCEPT_PLAIN
)
res = test_client.get(seq_url, params={"start": 5, "end": 1}, headers=HEADERS_ACCEPT_PLAIN)
assert res.status_code == status.HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE
assert res.content == b"Range Not Satisfiable"

# start > contig length (by 1 base, since it's 0-based)
res = test_client.get(
f"/sequence/{test_contig['md5']}", params={"start": test_contig["length"]}, headers=HEADERS_ACCEPT_PLAIN
)
res = test_client.get(seq_url, params={"start": test_contig["length"]}, headers=HEADERS_ACCEPT_PLAIN)
assert res.status_code == status.HTTP_400_BAD_REQUEST
assert res.content == b"Bad Request"

# end > contig length (by 1 base, since it's 0-based exclusive)
res = test_client.get(
f"/sequence/{test_contig['md5']}", params={"end": test_contig["length"] + 1}, headers=HEADERS_ACCEPT_PLAIN
)
res = test_client.get(seq_url, params={"end": test_contig["length"] + 1}, headers=HEADERS_ACCEPT_PLAIN)
assert res.status_code == status.HTTP_400_BAD_REQUEST
assert res.content == b"Bad Request"

# bad range header
res = test_client.get(f"/sequence/{test_contig['md5']}", headers={"Range": "dajkshfasd", **HEADERS_ACCEPT_PLAIN})
res = test_client.get(seq_url, headers={"Range": "dajkshfasd", **HEADERS_ACCEPT_PLAIN})
assert res.status_code == status.HTTP_400_BAD_REQUEST
assert res.content == b"Bad Request"

# cannot have range header and start/end
res = test_client.get(
f"/sequence/{test_contig['md5']}",
seq_url,
params={"start": "0", "end": "11"},
headers={"Range": "bytes=0-10", **HEADERS_ACCEPT_PLAIN},
)
assert res.status_code == status.HTTP_400_BAD_REQUEST
assert res.content == b"Bad Request"

# cannot have overlaps in range header
res = test_client.get(seq_url, headers={"Range": "bytes=0-10, 5-15", **HEADERS_ACCEPT_PLAIN})
assert res.status_code == status.HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE
assert res.content == b"Range Not Satisfiable"


def test_refget_sequence_full(test_client: TestClient, aioresponse: aioresponses, db_cleanup):
# TODO: fixture
Expand Down Expand Up @@ -161,3 +162,40 @@ def _check_first_10(r, sc, ar="none"):
res = test_client.get(seq_url, headers={"Range": "bytes=-10", **HEADERS_ACCEPT_PLAIN})
assert res.status_code == status.HTTP_206_PARTIAL_CONTENT
assert res.content == seq[-10:]


def test_refget_metadata(test_client: TestClient, aioresponse: aioresponses, db_cleanup):
# TODO: fixture
create_genome_with_permissions(test_client, aioresponse, TEST_GENOME_SARS_COV_2)
test_contig = TEST_GENOME_SARS_COV_2["contigs"][0]
seq_m_url = f"/sequence/{test_contig['md5']}/metadata"

# ------------------------------------------------------------------------------------------------------------------

res = test_client.get(seq_m_url)
assert res.status_code == status.HTTP_200_OK
assert res.headers["content-type"] == "application/vnd.ga4gh.refget.v2.0.0+json"
assert res.json() == {
"metadata": {
"md5": test_contig["md5"],
"ga4gh": test_contig["ga4gh"],
"length": test_contig["length"],
"aliases": test_contig["aliases"],
}
}


def test_refget_metadata_errors(test_client: TestClient, aioresponse: aioresponses, db_cleanup):
# TODO: fixture
create_genome_with_permissions(test_client, aioresponse, TEST_GENOME_SARS_COV_2)
test_contig = TEST_GENOME_SARS_COV_2["contigs"][0]
seq_m_url = f"/sequence/{test_contig['md5']}/metadata"

# ------------------------------------------------------------------------------------------------------------------

res = test_client.get(seq_m_url, headers=HEADERS_ACCEPT_PLAIN)
assert res.status_code == status.HTTP_406_NOT_ACCEPTABLE

res = test_client.get("/sequence/does-not-exist/metadata")
# TODO: proper content type for exception - RefGet error class?
assert res.status_code == status.HTTP_404_NOT_FOUND

0 comments on commit 4d1f826

Please sign in to comment.