From ba82829a4b37c48d95cefffc3ae878b0986d261a Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Wed, 10 Sep 2025 10:58:37 -0400 Subject: [PATCH 1/9] Build RNU4ATAC gene patches and export to ES --- .../src/data_pipeline/data_types/gene.py | 60 +++++++++++++++++++ .../pipelines/export_to_elasticsearch.py | 10 ++++ .../data_pipeline/pipelines/gene_patches.py | 19 ++++++ 3 files changed, 89 insertions(+) create mode 100644 data-pipeline/src/data_pipeline/pipelines/gene_patches.py diff --git a/data-pipeline/src/data_pipeline/data_types/gene.py b/data-pipeline/src/data_pipeline/data_types/gene.py index cfa469b19..97de00c33 100644 --- a/data-pipeline/src/data_pipeline/data_types/gene.py +++ b/data-pipeline/src/data_pipeline/data_types/gene.py @@ -117,6 +117,66 @@ def reject_par_y_genes(genes_path=None): return genes +def patch_rnu4atac(genes_path=None): + gene_symbol = "RNU4ATAC" + + genes = hl.read_table(genes_path) + genes = genes.filter(genes.symbol == gene_symbol) + + correct_start = 121530880 + correct_stop = 121531009 + correct_start_locus = hl.locus(contig="chr2", pos=correct_start, reference_genome="GRCh38") + correct_stop_locus = hl.locus(contig="chr2", pos=correct_stop, reference_genome="GRCh38") + correct_xstart = x_position(correct_start_locus) + correct_xstop = x_position(correct_stop_locus) + + correct_interval = hl.interval(correct_start_locus, correct_stop_locus, includes_start=True, includes_end=True) + + correct_exon = hl.struct( + feature_type="exon", start=correct_start, stop=correct_stop, xstart=correct_xstart, xstop=correct_xstop + ) + + incorrect_transcript = genes.take(1)[0].transcripts[0] + correct_transcript = hl.struct( + interval=correct_interval, + transcript_version="2", + gene_version="2", + start=correct_start, + stop=correct_stop, + xstart=correct_xstart, + xstop=correct_xstop, + exons=hl.array([correct_exon]), + transcript_id=incorrect_transcript.transcript_id, + gene_id=incorrect_transcript.gene_id, + chrom=incorrect_transcript.chrom, + strand=incorrect_transcript.strand, + reference_genome=incorrect_transcript.reference_genome, + gtex_tissue_expression=incorrect_transcript.gtex_tissue_expression, + refseq_id="NR_023343", + refseq_version="3", + ) + + correct_mane_select_transcript = hl.struct( + matched_gene_version="2", + ensembl_id="ENST00000580972", + ensembl_version="2", + refseq_id="NR_023343", + refseq_version="3", + ) + + genes = genes.annotate( + gene_version=2, + start=correct_start, + stop=correct_stop, + xstart=correct_xstart, + xstop=correct_xstop, + exons=[correct_exon], + transcripts=[correct_transcript], + mane_select_transcript=correct_mane_select_transcript, + ) + return genes + + ############################################### # Transcripts # ############################################### diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py index bf074c45b..680a96f41 100644 --- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py +++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py @@ -41,6 +41,7 @@ from data_pipeline.pipelines.gnomad_v4_cnvs import pipeline as gnomad_v4_cnvs_pipeline from data_pipeline.pipelines.gnomad_v4_lof_curation_results import pipeline as gnomad_v4_lof_curation_results_pipeline +from data_pipeline.pipelines.gene_patches import pipeline as gnomad_v4_gene_patches logger = logging.getLogger("gnomad_data_pipeline") @@ -88,6 +89,15 @@ def add_liftover_document_id(ds): "block_size": 200, }, }, + "gene_patches": { + "get_table": lambda: hl.read_table(gnomad_v4_gene_patches.get_output("gene_patches").get_output_path()), + "args": { + "index": "genes_grch38_patches", + "index_fields": ["gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop"], + "id_field": "gene_id", + "block_size": 200, + }, + }, ############################################################################################################## # Transcripts ############################################################################################################## diff --git a/data-pipeline/src/data_pipeline/pipelines/gene_patches.py b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py new file mode 100644 index 000000000..8c24ff183 --- /dev/null +++ b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py @@ -0,0 +1,19 @@ +import hail as hl + +from data_pipeline.pipeline import Pipeline, run_pipeline + +from data_pipeline.data_types.gene import patch_rnu4atac + +pipeline = Pipeline() + +pipeline.add_task( + "patch_rnu4atac_grch38", + patch_rnu4atac, + "/genes/genes_grch38_patched.ht", + {"genes_path": "gs://gnomad-v4-data-pipeline/output/genes/genes_grch38_annotated_6.ht"}, +) + +pipeline.set_outputs({"gene_patches": "patch_rnu4atac_grch38"}) + +if __name__ == "__main__": + run_pipeline(pipeline) From 2edf0107875a1592fc486530e64206ce97362975 Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Wed, 10 Sep 2025 10:41:00 -0400 Subject: [PATCH 2/9] Use RNU4ATAC gene patches in API --- .../data_pipeline/pipelines/gene_patches.py | 2 - graphql-api/src/elasticsearch.ts | 24 ++- graphql-api/src/queries/gene-queries.ts | 200 +++++++++++++----- .../queries/helpers/elasticsearch-helpers.ts | 23 +- 4 files changed, 175 insertions(+), 74 deletions(-) diff --git a/data-pipeline/src/data_pipeline/pipelines/gene_patches.py b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py index 8c24ff183..022b18613 100644 --- a/data-pipeline/src/data_pipeline/pipelines/gene_patches.py +++ b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py @@ -1,5 +1,3 @@ -import hail as hl - from data_pipeline.pipeline import Pipeline, run_pipeline from data_pipeline.data_types.gene import patch_rnu4atac diff --git a/graphql-api/src/elasticsearch.ts b/graphql-api/src/elasticsearch.ts index 7638d66c3..6ebd46380 100644 --- a/graphql-api/src/elasticsearch.ts +++ b/graphql-api/src/elasticsearch.ts @@ -82,8 +82,8 @@ const scheduleElasticsearchRequest = (fn: any) => { const limitedElastic = { indices: elastic.indices, clearScroll: elastic.clearScroll.bind(elastic), - search: (...args: Parameters) => - scheduleElasticsearchRequest(() => elastic.search(...args)).then((response) => { + search: (args: elasticsearch.RequestParams.Search) => + scheduleElasticsearchRequest(() => elastic.search(args)).then((response) => { // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'. if (response.body.timed_out) { throw new Error('Elasticsearch search timed out') @@ -95,8 +95,8 @@ const limitedElastic = { } return response }), - scroll: (...args: Parameters) => - scheduleElasticsearchRequest(() => elastic.scroll(...args)).then((response) => { + scroll: (args: { scroll: string; scrollId?: string }) => + scheduleElasticsearchRequest(() => elastic.scroll(args)).then((response) => { // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'. if (response.body.timed_out) { throw new Error('Elasticsearch scroll timed out') @@ -117,10 +117,22 @@ const limitedElastic = { } return response }), - get: (...args: Parameters) => - scheduleElasticsearchRequest(() => elastic.get(...args)), + get: (args: { index: string; type: '_doc'; id: string }) => + scheduleElasticsearchRequest(() => elastic.get(args)), mget: (...args: Parameters) => scheduleElasticsearchRequest(() => elastic.mget(...args)), } +export type LimitedElasticClient = typeof limitedElastic + +export type GetResponse = { + body: { _source: { value: Record } } +} + +export type SearchHit = { _id: string; _source: any } + +export type SearchResponse = { + body: { hits: { total: { value: number }; hits: SearchHit[] }; _scroll_id?: string } +} + export { limitedElastic as client } diff --git a/graphql-api/src/queries/gene-queries.ts b/graphql-api/src/queries/gene-queries.ts index ac7858c21..57b05e986 100644 --- a/graphql-api/src/queries/gene-queries.ts +++ b/graphql-api/src/queries/gene-queries.ts @@ -1,43 +1,69 @@ +import elasticsearch from '@elastic/elasticsearch' import { withCache } from '../cache' import { fetchAllSearchResults } from './helpers/elasticsearch-helpers' -const GENE_INDICES = { - GRCh37: 'genes_grch37', - GRCh38: 'genes_grch38', -} +import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata' +import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch' -const _fetchGeneById = async (esClient: any, geneId: any, referenceGenome: any) => { - try { - const response = await esClient.get({ - // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message - index: GENE_INDICES[referenceGenome], - type: '_doc', - id: geneId, - }) +type GeneIndex = 'genes_grch37' | 'genes_grch38' | 'genes_grch38_patches-2025-10-23--19-35' + +type GeneSearchRegion = { reference_genome: ReferenceGenome; xstart: number; xstop: number } - return response.body._source.value - } catch (err) { - // meta will not be present if the request times out in the queue before reaching ES - // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'. - if (err.meta && err.meta.body && err.meta.body.found === false) { - return null +const GENE_INDICES: Record = { + // Order matters here: later indices take precedence over earlier + GRCh37: ['genes_grch37'], + GRCh38: ['genes_grch38', 'genes_grch38_patches-2025-10-23--19-35'], +} + +const _fetchGeneById = async ( + esClient: LimitedElasticClient, + geneId: string, + referenceGenome: ReferenceGenome +) => { + const indices = GENE_INDICES[referenceGenome] + const requests = indices.map( + (index) => + esClient + .get({ + index, + type: '_doc', + id: geneId, + }) + .catch((err) => { + // meta will not be present if the request times out in the queue before reaching ES + if (err.meta && err.meta.body && err.meta.body.found === false) { + return null + } + throw err + }) as Promise + ) + return Promise.all(requests).then( + (responses) => { + const responsesWithValue = responses.filter((response) => response !== null) + return responsesWithValue.length > 0 + ? responsesWithValue[responsesWithValue.length - 1]!.body._source.value + : null + }, + (err) => { + throw err } - throw err - } + ) } export const fetchGeneById = withCache( _fetchGeneById, - (_: any, geneId: any, referenceGenome: any) => `gene:${geneId}:${referenceGenome}`, + (_: any, geneId: string, referenceGenome: ReferenceGenome) => `gene:${geneId}:${referenceGenome}`, { expiration: 86400 } ) -export const fetchGeneBySymbol = async (esClient: any, geneSymbol: any, referenceGenome: any) => { - const response = await esClient.search({ - // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message - index: GENE_INDICES[referenceGenome], - type: '_doc', +export const fetchGeneBySymbol = async ( + esClient: LimitedElasticClient, + geneSymbol: string, + referenceGenome: ReferenceGenome +) => { + const indices = GENE_INDICES[referenceGenome] + const responses = await searchMultipleIndices(esClient, indices, { body: { query: { bool: { @@ -48,20 +74,22 @@ export const fetchGeneBySymbol = async (esClient: any, geneSymbol: any, referenc size: 1, }) - if (response.body.hits.total.value === 0) { + const responsesWithValue = responses.filter((response) => response.body.hits.total.value > 0) + if (responsesWithValue.length === 0) { return null } - return response.body.hits.hits[0]._source.value + return responsesWithValue[responsesWithValue.length - 1].body.hits.hits[0]._source.value } -export const fetchGenesByRegion = async (esClient: any, region: any) => { - const { reference_genome: referenceGenome, xstart, xstop } = region +export const fetchGenesByRegion = async ( + esClient: LimitedElasticClient, + region: GeneSearchRegion +) => { + const { reference_genome, xstart, xstop } = region + const indices = GENE_INDICES[reference_genome] - const hits = await fetchAllSearchResults(esClient, { - // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message - index: GENE_INDICES[referenceGenome], - type: '_doc', + const hits = await fetchAllSearchResultsFromMultipleIndices(esClient, indices, { size: 200, _source: [ 'value.exons', @@ -98,28 +126,91 @@ export const fetchGenesByRegion = async (esClient: any, region: any) => { }, }) - return hits.map((hit: any) => hit._source.value) + const mergedHits = mergeHitsById(hits.flat()) + return mergedHits.map((hit) => hit._source.value) +} + +const fetchAllSearchResultsFromMultipleIndices = async ( + esClient: LimitedElasticClient, + indices: string[], + searchParams: elasticsearch.RequestParams.Search +) => { + const requests = indices.map((index) => + fetchAllSearchResults(esClient, { + index, + type: '_doc', + ...searchParams, + }) + ) + return Promise.all(requests) } -export const fetchGenesMatchingText = async (esClient: any, query: any, referenceGenome: any) => { +const searchMultipleIndices = async ( + esClient: LimitedElasticClient, + indices: string[], + searchParams: elasticsearch.RequestParams.Search +): Promise => { + const requests = indices.map( + (index) => + esClient.search({ + index, + type: '_doc', + ...searchParams, + }) as Promise + ) + + return Promise.all(requests) +} + +const mergeHitsById = (hits: SearchHit[]): SearchHit[] => { + const ids: string[] = [] + const idsToHits: Record = {} + hits.forEach((hit) => { + if (idsToHits[hit._id] === undefined) { + ids.push(hit._id) + } + idsToHits[hit._id] = hit + }) + return ids.map((id) => idsToHits[id]) +} + +const mergeResponsesById = (responses: SearchResponse[]) => { + const ids: string[] = [] + const idsToDocs: Record = {} + responses.forEach((response) => + response.body.hits.hits.forEach((hit) => { + if (idsToDocs[hit._id] === undefined) { + ids.push(hit._id) + } + idsToDocs[hit._id] = hit._source + }) + ) + + return ids.map((id) => idsToDocs[id]) +} + +export const fetchGenesMatchingText = async ( + esClient: LimitedElasticClient, + query: string, + referenceGenome: ReferenceGenome +) => { const upperCaseQuery = query.toUpperCase() // Ensembl ID if (/^ENSG\d{11}$/.test(upperCaseQuery)) { const gene = await _fetchGeneById(esClient, upperCaseQuery, referenceGenome) - return [ - { - ensembl_id: gene.gene_id, - symbol: gene.symbol, - }, - ] + return ( + gene && [ + { + ensembl_id: gene.gene_id, + symbol: gene.symbol, + }, + ] + ) } // Symbol - const response = await esClient.search({ - // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message - index: GENE_INDICES[referenceGenome], - type: '_doc', + const responses = await searchMultipleIndices(esClient, GENE_INDICES[referenceGenome], { _source: ['gene_id', 'value.gene_version', 'value.symbol'], body: { query: { @@ -134,15 +225,16 @@ export const fetchGenesMatchingText = async (esClient: any, query: any, referenc size: 5, }) - if (response.body.hits.total.value === 0) { + const responsesWithValue = responses.filter((response) => response.body.hits.total.value !== 0) + if (responsesWithValue.length === 0) { return [] } - return response.body.hits.hits - .map((hit: any) => hit._source) - .map((doc: any) => ({ - ensembl_id: doc.gene_id, - ensembl_version: doc.value.gene_version, - symbol: doc.value.symbol, - })) + const mergedDocs = mergeResponsesById(responsesWithValue) + + return mergedDocs.map((doc) => ({ + ensembl_id: doc.gene_id, + ensembl_version: doc.value.gene_version, + symbol: doc.value.symbol, + })) } diff --git a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts index 5ec797ddb..ee70cb1fa 100644 --- a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts +++ b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts @@ -1,43 +1,42 @@ +import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch' + /** * Search and then scroll to retrieve all pages of search results. * - * @param {elasticsearch.Client} client Elasticsearch client - * @param {Object} searchParams Argument to elasticsearch.Client#search - * @return {Object[]} Combined list of hits from all responses */ -export const fetchAllSearchResults = async (client: any, searchParams: any) => { - const allResults: any = [] - const responseQueue = [] +export const fetchAllSearchResults = async (client: LimitedElasticClient, searchParams: any) => { + const allResults: SearchHit[] = [] + const responseQueue: SearchResponse[] = [] const size = searchParams.size || 1000 const scroll = searchParams.scroll || '30s' responseQueue.push( - await client.search({ + await (client.search({ ...searchParams, scroll, size, - }) + }) as Promise) ) while (responseQueue.length) { - const response = responseQueue.shift() + const response = responseQueue.shift()! allResults.push(...response.body.hits.hits) if (allResults.length === response.body.hits.total.value) { // eslint-disable-next-line no-await-in-loop await client.clearScroll({ - scrollId: response.body._scroll_id, // eslint-disable-line no-underscore-dangle + scroll_id: response.body._scroll_id, // eslint-disable-line no-underscore-dangle }) break } responseQueue.push( // eslint-disable-next-line no-await-in-loop - await client.scroll({ + await (client.scroll({ scroll, scrollId: response.body._scroll_id, // eslint-disable-line no-underscore-dangle - }) + }) as Promise) ) } From 17d59da0c1901049133b82ac599c8a3791d6a891 Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Tue, 30 Sep 2025 15:30:34 -0400 Subject: [PATCH 3/9] Build RNU4ATAC variant patches and export to ES --- .../variant/patch_rnu4atac_variants.py | 48 +++++++++++++++++++ .../annotate_transcript_consequences.py | 48 ++++++++++--------- .../pipelines/export_to_elasticsearch.py | 24 ++++++++++ .../pipelines/variant_patches.py | 20 ++++++++ 4 files changed, 118 insertions(+), 22 deletions(-) create mode 100644 data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py create mode 100644 data-pipeline/src/data_pipeline/pipelines/variant_patches.py diff --git a/data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py b/data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py new file mode 100644 index 000000000..e682c398e --- /dev/null +++ b/data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py @@ -0,0 +1,48 @@ +import hail as hl + +from data_pipeline.data_types.variant.transcript_consequence.annotate_transcript_consequences import ( + annotate_transcript_consequences_in_table, +) + + +def patch_rnu4atac_variants(vepped_path=None, freq_path=None, transcripts_data={}): + veps = hl.read_table(vepped_path) + freqs = hl.read_table(freq_path) + # Drop all consequences except for gene RNU4ATAC and transcript ENST00000580972 + veps = veps.filter(veps.vep.transcript_consequences.any(lambda tc: tc.gene_symbol == "RNU4ATAC")) + veps = veps.annotate( + vep=veps.vep.annotate( + transcript_consequences=veps.vep.transcript_consequences.filter( + lambda tc: tc.transcript_id == "ENST00000580972" + ) + ) + ) + veps = veps.filter(veps.vep.transcript_consequences.length() > 0) + veps = annotate_transcript_consequences_in_table(veps, transcripts_data=transcripts_data) + + # We filter the data again here because annotate_transcript_consequences_in_table removes consequences with unimportant consequences terms + veps = veps.filter(veps.transcript_consequences.length() > 0) + veps = veps.annotate( + transcript_consequences=veps.transcript_consequences.map( + lambda tc: tc.annotate( + transcript_version="2", + gene_version="2", + is_mane_select=False, + is_mane_select_version=False, + refseq_id=hl.null(hl.tstr), + refseq_version=hl.null(hl.tstr), + ) + ) + ) + veps = veps.annotate( + transcript_consequences=veps.transcript_consequences.map( + lambda tc: tc.drop("polyphen_prediction", "sift_prediction") + ) + ) + + freqs = freqs.drop("transcript_consequences") + veps = veps.join(freqs) + + # Include just consequences and index fields + veps = veps.select(veps.variant_id, veps.rsids, veps.caid, veps.vrs, veps.transcript_consequences) + return veps diff --git a/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py b/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py index 1ec656b03..103d876ce 100644 --- a/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py +++ b/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py @@ -3,13 +3,18 @@ from .hgvs import hgvsp_from_consequence_amino_acids from .vep import consequence_term_rank - OMIT_CONSEQUENCE_TERMS = hl.set(["upstream_gene_variant", "downstream_gene_variant"]) +# ruff doesn't like explicit comparisons to None, but we need them in here, so: +# ruff: noqa: E711 + -def annotate_transcript_consequences(variants_path, transcripts_path, mane_transcripts_path=None): +def annotate_transcript_consequences(variants_path, transcripts_path=None, mane_transcripts_path=None): ds = hl.read_table(variants_path) + return annotate_transcript_consequences_in_table(ds, transcripts_path, mane_transcripts_path) + +def annotate_transcript_consequences_in_table(ds, transcripts_path=None, mane_transcripts_path=None): most_severe_consequence = ds.vep.most_severe_consequence transcript_consequences = ds.vep.transcript_consequences @@ -62,26 +67,25 @@ def annotate_transcript_consequences(variants_path, transcripts_path, mane_trans transcript_consequences = transcript_consequences.map(lambda c: c.select(*consequences)) - transcripts = hl.read_table(transcripts_path) - - # TODO: This can potentially be improved by removing Table.collect - # See https://hail.zulipchat.com/#narrow/stream/123010-Hail-0.2E2.20support/topic/Optimize.20annotation.20with.20small.20dataset - # and https://github.com/Nealelab/ukb_common/blob/ad94d20f8c9f3b711e40a473425925775f0b1f30/utils/generic.py#L18 - transcript_info = hl.dict( - [ - (row.transcript_id, row.transcript_info) - for row in transcripts.select( - transcript_info=hl.struct( - transcript_version=transcripts.transcript_version, - gene_version=transcripts.gene.gene_version, - ) - ).collect() - ] - ) - - transcript_consequences = transcript_consequences.map( - lambda csq: csq.annotate(**transcript_info.get(csq.transcript_id)) - ) + if transcripts_path != None: + transcripts = hl.read_table(transcripts_path) + # TODO: This can potentially be improved by removing Table.collect + # See https://hail.zulipchat.com/#narrow/stream/123010-Hail-0.2E2.20support/topic/Optimize.20annotation.20with.20small.20dataset + # and https://github.com/Nealelab/ukb_common/blob/ad94d20f8c9f3b711e40a473425925775f0b1f30/utils/generic.py#L18 + transcript_info = hl.dict( + [ + (row.transcript_id, row.transcript_info) + for row in transcripts.select( + transcript_info=hl.struct( + transcript_version=transcripts.transcript_version, + gene_version=transcripts.gene.gene_version, + ) + ).collect() + ] + ) + transcript_consequences = transcript_consequences.map( + lambda csq: csq.annotate(**transcript_info.get(csq.transcript_id)) + ) if mane_transcripts_path: mane_transcripts = hl.read_table(mane_transcripts_path) diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py index 680a96f41..acdbe8be2 100644 --- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py +++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py @@ -143,6 +143,30 @@ def add_liftover_document_id(ds): "block_size": 1_000, }, }, + "gnomad_v4_variant_patches": { + "get_table": lambda: subset_table( + add_variant_document_id( + hl.read_table( + "gs://gnomad-browser-data-pipeline/phil-scratch/output/gnomad_v4/gnomad_v4_variants_patched.ht" + ) + ) + ), + "args": { + "index": "gnomad_v4_variants_patches", + "index_fields": [ + "document_id", + "variant_id", + "rsids", + "caid", + "locus", + "transcript_consequences.gene_id", + "transcript_consequences.transcript_id", + "vrs.alt.allele_id", + ], + "id_field": "document_id", + "block_size": 1_000, + }, + }, "gnomad_v4_exome_coverage": { "get_table": lambda: subset_table( hl.read_table(gnomad_v4_coverage_pipeline.get_output("exome_coverage").get_output_path()) diff --git a/data-pipeline/src/data_pipeline/pipelines/variant_patches.py b/data-pipeline/src/data_pipeline/pipelines/variant_patches.py new file mode 100644 index 000000000..0762e6063 --- /dev/null +++ b/data-pipeline/src/data_pipeline/pipelines/variant_patches.py @@ -0,0 +1,20 @@ +from data_pipeline.pipeline import Pipeline, run_pipeline + +from data_pipeline.data_types.variant.patch_rnu4atac_variants import patch_rnu4atac_variants + +pipeline = Pipeline() + +pipeline.add_task( + "patch_rnu4atac_variants", + patch_rnu4atac_variants, + "/gnomad_v4/gnomad_v4_variants_patched.ht", + { + "vepped_path": "gs://gnomad-v4-data-pipeline/inputs/secondary-analyses/gnomad_v4.1.RNU4ATAC.vep115.ht", + "freq_path": "gs://gnomad-v4-data-pipeline/output/gnomad_v4/gnomad_v4_variants_annotated_4.ht", + }, +) + +pipeline.set_outputs({"variant_patches": "patch_rnu4atac_variants"}) + +if __name__ == "__main__": + run_pipeline(pipeline) From 517188951144b6e909b7a82eac4e8d0ecae8f8ca Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Wed, 15 Oct 2025 14:31:33 -0400 Subject: [PATCH 4/9] Use RNU4ATAC variant patches in API --- graphql-api/src/queries/gene-queries.ts | 17 +- .../queries/helpers/elasticsearch-helpers.ts | 16 ++ .../gnomad-v4-variant-queries.ts | 247 ++++++++++++------ 3 files changed, 182 insertions(+), 98 deletions(-) diff --git a/graphql-api/src/queries/gene-queries.ts b/graphql-api/src/queries/gene-queries.ts index 57b05e986..51e28bdaf 100644 --- a/graphql-api/src/queries/gene-queries.ts +++ b/graphql-api/src/queries/gene-queries.ts @@ -1,7 +1,7 @@ import elasticsearch from '@elastic/elasticsearch' import { withCache } from '../cache' -import { fetchAllSearchResults } from './helpers/elasticsearch-helpers' +import { fetchAllSearchResultsFromMultipleIndices } from './helpers/elasticsearch-helpers' import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata' import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch' @@ -130,21 +130,6 @@ export const fetchGenesByRegion = async ( return mergedHits.map((hit) => hit._source.value) } -const fetchAllSearchResultsFromMultipleIndices = async ( - esClient: LimitedElasticClient, - indices: string[], - searchParams: elasticsearch.RequestParams.Search -) => { - const requests = indices.map((index) => - fetchAllSearchResults(esClient, { - index, - type: '_doc', - ...searchParams, - }) - ) - return Promise.all(requests) -} - const searchMultipleIndices = async ( esClient: LimitedElasticClient, indices: string[], diff --git a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts index ee70cb1fa..a18f237cb 100644 --- a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts +++ b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts @@ -1,3 +1,4 @@ +import elasticsearch from '@elastic/elasticsearch' import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch' /** @@ -43,6 +44,21 @@ export const fetchAllSearchResults = async (client: LimitedElasticClient, search return allResults } +export const fetchAllSearchResultsFromMultipleIndices = async ( + esClient: LimitedElasticClient, + indices: string[], + searchParams: elasticsearch.RequestParams.Search +) => { + const requests = indices.map((index) => + fetchAllSearchResults(esClient, { + index, + type: '_doc', + ...searchParams, + }) + ) + return Promise.all(requests) +} + // Retrieve index metadata set by data pipeline export const fetchIndexMetadata = async (esClient: any, index: any) => { const response = await esClient.indices.getMapping({ diff --git a/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts b/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts index 46955fb37..5d96a23a4 100644 --- a/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts +++ b/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts @@ -5,7 +5,10 @@ import { isRsId } from '@gnomad/identifiers' import { UserVisibleError } from '../../errors' import { fetchLocalAncestryPopulationsByVariant } from '../local-ancestry-queries' -import { fetchAllSearchResults } from '../helpers/elasticsearch-helpers' +import { + fetchAllSearchResults, + fetchAllSearchResultsFromMultipleIndices, +} from '../helpers/elasticsearch-helpers' import { mergeOverlappingRegions } from '../helpers/region-helpers' import { fetchLofCurationResultsByVariant, @@ -16,10 +19,67 @@ import { import { getFlagsForContext } from './shared/flags' import { getConsequenceForContext } from './shared/transcriptConsequence' import largeGenes from '../helpers/large-genes' +import { LimitedElasticClient, SearchResponse } from '../../elasticsearch' const GNOMAD_V4_VARIANT_INDEX = 'gnomad_v4_variants' +const GNOMAD_V4_VARIANT_INDEX_PATCHES = 'gnomad_v4_variants_patches-2025-10-14--20-02' type Subset = 'all' | 'non_ukb' +type ESTranscriptConsequence = { + biotype: string + consequence_terms: string[] + gene_id: string + gene_symbol: string + gene_version: string + is_canonical: boolean + major_consequence: string + transcript_id: string + transcript_version: string +} +type ESPatch = { + variant_id: string + transcript_consequences: ESTranscriptConsequence[] +} + +const mergeTranscriptConsequences = ( + transcriptConsequences: ESTranscriptConsequence[], + patchedTranscriptConsequences?: ESTranscriptConsequence[] | null +) => { + if (!patchedTranscriptConsequences) { + return transcriptConsequences + } + + const result: ESTranscriptConsequence[] = [] + transcriptConsequences.forEach((csq) => { + const patchedConsequence = patchedTranscriptConsequences!.find( + (patchedCsq) => patchedCsq.transcript_id === csq.transcript_id + ) + result.push(patchedConsequence || csq) + }) + return result +} + +const mergeTranscriptConsequencesInVariant = ( + variant: { variant_id: string; transcript_consequences: ESTranscriptConsequence[] }, + patches: ESPatch[] +) => { + const matchingPatch = patches.find((patch) => patch.variant_id === variant.variant_id) + if (matchingPatch === undefined) { + return variant + } + + return { + ...variant, + transcript_consequences: mergeTranscriptConsequences( + variant.transcript_consequences, + matchingPatch.transcript_consequences + ), + } +} + +const hasPositiveAC = (variant: any, subset: string) => + (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) || + variant.exome.freq[subset].ac_raw > 0 // ================================================================================================ // Count query @@ -69,30 +129,50 @@ const chooseIdField = (variantId: string) => { return 'variant_id' } -const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) => { +const fetchVariantById = async ( + esClient: LimitedElasticClient, + variantId: string, + subset: Subset +) => { const idField = chooseIdField(variantId) - const response = await esClient.search({ + const query = { + bool: { + filter: { term: { [idField]: variantId } }, + }, + } + + const variantResponsePromise = esClient.search({ index: GNOMAD_V4_VARIANT_INDEX, body: { - query: { - bool: { - filter: { term: { [idField]: variantId } }, - }, - }, + query, }, size: 1, - }) + }) as Promise + const patchResponsePromise = esClient.search({ + index: GNOMAD_V4_VARIANT_INDEX_PATCHES, + body: { query }, + size: 1, + }) as Promise - if (response.body.hits.total.value === 0) { + const variantResponse = await variantResponsePromise + + if (variantResponse.body.hits.total.value === 0) { throw new UserVisibleError('Variant not found') } // An rsID may match multiple variants - if (response.body.hits.total.value > 1) { + if (variantResponse.body.hits.total.value > 1) { throw new UserVisibleError('Multiple variants found, query using variant ID to select one.') } - const variant = response.body.hits.hits[0]._source.value + const patchResponse = await patchResponsePromise + const patchedTranscriptConsequences = + patchResponse.body.hits.total.value > 0 + ? (patchResponse.body.hits.hits[0]._source.value + .transcript_consequences as ESTranscriptConsequence[]) + : null + + const variant = variantResponse.body.hits.hits[0]._source.value const subsetGenomeFreq = variant.genome.freq.all || {} const subsetJointFreq = variant.joint.freq[subset] || {} @@ -244,9 +324,10 @@ const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) = flags: variantFlags, // TODO: Include RefSeq transcripts once the browser supports them. lof_curations: lofCurationResults, - transcript_consequences: (variant.transcript_consequences || []).filter((csq: any) => - csq.gene_id.startsWith('ENSG') - ), + transcript_consequences: mergeTranscriptConsequences( + variant.transcript_consequences, + patchedTranscriptConsequences + ).filter((csq: any) => csq.gene_id.startsWith('ENSG')), in_silico_predictors: inSilicoPredictorsList, } @@ -454,28 +535,30 @@ const fetchVariantsByGene = async (esClient: any, gene: any, subset: Subset) => }, })) - const hits = await fetchAllSearchResults(esClient, { - index: GNOMAD_V4_VARIANT_INDEX, - type: '_doc', - size: pageSize, - _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset), - body: { - query: { - bool: { - filter: [{ term: { gene_id: gene.gene_id } }, { bool: { should: rangeQueries } }], + const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices( + esClient, + [GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES], + { + type: '_doc', + size: pageSize, + _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset), + body: { + query: { + bool: { + filter: [{ term: { gene_id: gene.gene_id } }, { bool: { should: rangeQueries } }], + }, }, + sort: [{ 'locus.position': { order: 'asc' } }], }, - sort: [{ 'locus.position': { order: 'asc' } }], - }, - }) + } + ) + + const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value) const shapedHits = hits .map((hit: any) => hit._source.value) - .filter( - (variant: any) => - (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) || - variant.exome.freq[subset].ac_raw > 0 - ) + .filter((variant) => hasPositiveAC(variant, subset)) + .map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches)) .map(shapeVariantSummary(subset, { type: 'gene', geneId: gene.gene_id })) const lofCurationResults = await fetchLofCurationResultsByGene(esClient, 'v4', gene) @@ -507,38 +590,40 @@ const fetchVariantsByRegion = async (esClient: any, region: any, subset: Subset) const genomeSubset = 'all' const jointSubset = 'all' - const hits = await fetchAllSearchResults(esClient, { - index: GNOMAD_V4_VARIANT_INDEX, - type: '_doc', - size: 10000, - _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset), - body: { - query: { - bool: { - filter: [ - { term: { 'locus.contig': `chr${region.chrom}` } }, - { - range: { - 'locus.position': { - gte: region.start, - lte: region.stop, + const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices( + esClient, + [GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES], + { + type: '_doc', + size: 10000, + _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset), + body: { + query: { + bool: { + filter: [ + { term: { 'locus.contig': `chr${region.chrom}` } }, + { + range: { + 'locus.position': { + gte: region.start, + lte: region.stop, + }, }, }, - }, - ], + ], + }, }, + sort: [{ 'locus.position': { order: 'asc' } }], }, - sort: [{ 'locus.position': { order: 'asc' } }], - }, - }) + } + ) + + const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value) const variants = hits .map((hit: any) => hit._source.value) - .filter( - (variant: any) => - (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) || - variant.exome.freq[subset].ac_raw > 0 - ) + .filter((variant) => hasPositiveAC(variant, subset)) + .map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches)) .map(shapeVariantSummary(subset, { type: 'region' })) const lofCurationResults = await fetchLofCurationResultsByRegion(esClient, 'v4', region) @@ -599,31 +684,33 @@ const fetchVariantsByTranscript = async (esClient: any, transcript: any, subset: }, })) - const hits = await fetchAllSearchResults(esClient, { - index: GNOMAD_V4_VARIANT_INDEX, - type: '_doc', - size: 10000, - _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset), - body: { - query: { - bool: { - filter: [ - { term: { transcript_id: transcript.transcript_id } }, - { bool: { should: rangeQueries } }, - ], + const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices( + esClient, + [GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES], + { + type: '_doc', + size: 10000, + _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset), + body: { + query: { + bool: { + filter: [ + { term: { transcript_id: transcript.transcript_id } }, + { bool: { should: rangeQueries } }, + ], + }, }, + sort: [{ 'locus.position': { order: 'asc' } }], }, - sort: [{ 'locus.position': { order: 'asc' } }], - }, - }) + } + ) + + const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value) return hits .map((hit: any) => hit._source.value) - .filter( - (variant: any) => - (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) || - variant.exome.freq[subset].ac_raw > 0 - ) + .filter((variant) => hasPositiveAC(variant, subset)) + .map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches)) .map( shapeVariantSummary(subset, { type: 'transcript', transcriptId: transcript.transcript_id }) ) @@ -665,11 +752,7 @@ const fetchMatchingVariants = async ( return hits .map((hit: any) => hit._source.value) - .filter( - (variant: any) => - (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) || - variant.exome.freq[subset].ac_raw > 0 - ) + .filter((variant) => hasPositiveAC(variant, subset)) .map((variant: any) => ({ variant_id: variant.variant_id, })) From 12a856344cc76ab071b2a5d2fc2716e98ba0311a Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Fri, 17 Oct 2025 14:20:49 -0400 Subject: [PATCH 5/9] Build RNU4ATAC transcript patches and export to ES --- .../pipelines/export_to_elasticsearch.py | 13 +++++++++ .../pipelines/transcript_patches.py | 28 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 data-pipeline/src/data_pipeline/pipelines/transcript_patches.py diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py index acdbe8be2..1446aa58e 100644 --- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py +++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py @@ -42,6 +42,8 @@ from data_pipeline.pipelines.gnomad_v4_lof_curation_results import pipeline as gnomad_v4_lof_curation_results_pipeline from data_pipeline.pipelines.gene_patches import pipeline as gnomad_v4_gene_patches +from data_pipeline.pipelines.transcript_patches import pipeline as gnomad_v4_transcript_patches + logger = logging.getLogger("gnomad_data_pipeline") @@ -119,6 +121,17 @@ def add_liftover_document_id(ds): "block_size": 1_000, }, }, + "transcripts_grch38_patched": { + "get_table": lambda: hl.read_table( + gnomad_v4_transcript_patches.get_output("transcripts_grch38_patched").get_output_path() + ), + "args": { + "index": "transcripts_grch38_patched", + "index_fields": ["transcript_id"], + "id_field": "transcript_id", + "block_size": 1_000, + }, + }, ############################################################################################################## # gnomAD v4 ############################################################################################################## diff --git a/data-pipeline/src/data_pipeline/pipelines/transcript_patches.py b/data-pipeline/src/data_pipeline/pipelines/transcript_patches.py new file mode 100644 index 000000000..fe332d898 --- /dev/null +++ b/data-pipeline/src/data_pipeline/pipelines/transcript_patches.py @@ -0,0 +1,28 @@ +from data_pipeline.pipeline import Pipeline, run_pipeline + +from data_pipeline.data_types.transcript import extract_transcripts +from data_pipeline.helpers import annotate_table + +pipeline = Pipeline() + +pipeline.add_task( + "extract_patched_transcripts", + extract_transcripts, + "/transcripts/transcripts_grch38_patched_base.ht", + {"genes_path": "gs://gnomad-browser-data-pipeline/phil-scratch/output/genes/genes_grch38_patched.ht"}, +) + +pipeline.add_task( + "annotate_patched_transcripts", + annotate_table, + "/transcripts/transcripts_grch38_annotated_1.ht", + { + "table_path": pipeline.get_task("extract_patched_transcripts"), + "gnomad_constraint": "gs://gnomad-v4-data-pipeline/output/constraint/gnomad_v4_constraint.ht", + }, +) + +pipeline.set_outputs({"transcripts_grch38_patched": "annotate_patched_transcripts"}) + +if __name__ == "__main__": + run_pipeline(pipeline) From 427892df28b88327ade578445c7e0f90d3139b68 Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Fri, 17 Oct 2025 14:44:39 -0400 Subject: [PATCH 6/9] Use RNU4ATAC transcript patches in API --- graphql-api/src/queries/gene-queries.ts | 17 ++---- .../queries/helpers/elasticsearch-helpers.ts | 15 ++++- graphql-api/src/queries/transcript-queries.ts | 55 ++++++++++++------- 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/graphql-api/src/queries/gene-queries.ts b/graphql-api/src/queries/gene-queries.ts index 51e28bdaf..cc8b85a96 100644 --- a/graphql-api/src/queries/gene-queries.ts +++ b/graphql-api/src/queries/gene-queries.ts @@ -1,7 +1,10 @@ import elasticsearch from '@elastic/elasticsearch' import { withCache } from '../cache' -import { fetchAllSearchResultsFromMultipleIndices } from './helpers/elasticsearch-helpers' +import { + fetchAllSearchResultsFromMultipleIndices, + getFromMultipleIndices, +} from './helpers/elasticsearch-helpers' import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata' import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch' @@ -38,17 +41,7 @@ const _fetchGeneById = async ( throw err }) as Promise ) - return Promise.all(requests).then( - (responses) => { - const responsesWithValue = responses.filter((response) => response !== null) - return responsesWithValue.length > 0 - ? responsesWithValue[responsesWithValue.length - 1]!.body._source.value - : null - }, - (err) => { - throw err - } - ) + return getFromMultipleIndices(requests) } export const fetchGeneById = withCache( diff --git a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts index a18f237cb..ab946c26c 100644 --- a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts +++ b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts @@ -1,5 +1,5 @@ import elasticsearch from '@elastic/elasticsearch' -import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch' +import { LimitedElasticClient, SearchResponse, SearchHit, GetResponse } from '../../elasticsearch' /** * Search and then scroll to retrieve all pages of search results. @@ -69,3 +69,16 @@ export const fetchIndexMetadata = async (esClient: any, index: any) => { // eslint-disable-next-line no-underscore-dangle return Object.values(response.body)[0].mappings._meta } + +export const getFromMultipleIndices = (requests: Promise[]) => + Promise.all(requests).then( + (responses) => { + const responsesWithValue = responses.filter((response) => response !== null) + return responsesWithValue.length > 0 + ? responsesWithValue[responsesWithValue.length - 1]!.body._source.value + : null + }, + (err) => { + throw err + } + ) diff --git a/graphql-api/src/queries/transcript-queries.ts b/graphql-api/src/queries/transcript-queries.ts index e36b22969..e60ffd726 100644 --- a/graphql-api/src/queries/transcript-queries.ts +++ b/graphql-api/src/queries/transcript-queries.ts @@ -1,24 +1,39 @@ -const TRANSCRIPT_INDICES = { - GRCh37: 'transcripts_grch37', - GRCh38: 'transcripts_grch38', +import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata' +import { GetResponse, LimitedElasticClient } from '../elasticsearch' +import { getFromMultipleIndices } from './helpers/elasticsearch-helpers' + +type TranscriptIndex = + | 'transcripts_grch37' + | 'transcripts_grch38' + | 'transcripts_grch38_patched-2025-10-23--19-36' + +const TRANSCRIPT_INDICES: Record = { + GRCh37: ['transcripts_grch37'], + GRCh38: ['transcripts_grch38', 'transcripts_grch38_patched-2025-10-23--19-36'], } -export const fetchTranscriptById = async (es: any, transcriptId: any, referenceGenome: any) => { - try { - const response = await es.get({ - // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message - index: TRANSCRIPT_INDICES[referenceGenome], - type: '_doc', - id: transcriptId, - }) +export const fetchTranscriptById = async ( + esClient: LimitedElasticClient, + transcriptId: string, + referenceGenome: ReferenceGenome +) => { + const indices = TRANSCRIPT_INDICES[referenceGenome] + const requests = indices.map( + (index) => + esClient + .get({ + index, + type: '_doc', + id: transcriptId, + }) + .catch((err) => { + // meta will not be present if the request times out in the queue before reaching ES + if (err.meta && err.meta.body.found === false) { + return null + } + throw err + }) as Promise + ) - return response.body._source.value - } catch (err) { - // meta will not be present if the request times out in the queue before reaching ES - // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'. - if (err.meta && err.meta.body.found === false) { - return null - } - throw err - } + return getFromMultipleIndices(requests) } From 555cdd3a72e0fa9fcb6173a9948e689bc5d021df Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Thu, 23 Oct 2025 11:02:16 -0400 Subject: [PATCH 7/9] Add VEP 115 warning to RNU4ATAC --- browser/src/GenePage/GeneFlags.spec.tsx | 8 +++++++ browser/src/GenePage/GeneFlags.tsx | 12 +++++++++++ .../__snapshots__/GeneFlags.spec.tsx.snap | 21 +++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/browser/src/GenePage/GeneFlags.spec.tsx b/browser/src/GenePage/GeneFlags.spec.tsx index 45d29ef28..b10c4400a 100644 --- a/browser/src/GenePage/GeneFlags.spec.tsx +++ b/browser/src/GenePage/GeneFlags.spec.tsx @@ -29,4 +29,12 @@ describe('GeneFlags', () => { expect(tree).toMatchSnapshot() }) + + test('renders VEP 115 warning for RNU4ATAC', () => { + const testGene = geneFactory.build({ symbol: 'RNU4ATAC', reference_genome: 'GRCh38' }) + + const tree = renderer.create() + + expect(tree).toMatchSnapshot() + }) }) diff --git a/browser/src/GenePage/GeneFlags.tsx b/browser/src/GenePage/GeneFlags.tsx index 3150800a8..dcf08e848 100644 --- a/browser/src/GenePage/GeneFlags.tsx +++ b/browser/src/GenePage/GeneFlags.tsx @@ -13,11 +13,15 @@ type Props = { } const allOfUsCMRGGenes = ['CBS', 'KCNE1', 'CRYAA'] +const vep115Genes = ['RNU4ATAC'] const GeneFlags = ({ gene }: Props) => { const shouldDisplayCMRGWarning = gene.reference_genome === 'GRCh38' && allOfUsCMRGGenes.includes(gene.symbol) + const shouldDisplayVEP115Warning = + gene.reference_genome === 'GRCh38' && vep115Genes.includes(gene.symbol) + return ( <> {shouldDisplayCMRGWarning && ( @@ -35,6 +39,14 @@ const GeneFlags = ({ gene }: Props) => { ) callset to remedy this issue in the future.

)} + {shouldDisplayVEP115Warning && ( +

+ Warning MANE Select and variant consequence information in + this gene were annotated using Ensembl VEP version 115 (GENCODE v49). For more + information, see our{' '} + help page. +

+ )} {gene.flags.includes('chip') && (

Note Analysis of allele balance and age data indicates that diff --git a/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap b/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap index 72aeb4bab..4d1b9affe 100644 --- a/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap +++ b/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap @@ -32,6 +32,27 @@ exports[`GeneFlags renders CMRG flag if one of 3 relevant genes 1`] = `

`; +exports[`GeneFlags renders VEP 115 warning for RNU4ATAC 1`] = ` +

+ + Warning + + MANE Select and variant consequence information in this gene were annotated using Ensembl VEP version 115 (GENCODE v49). For more information, see our + + + help page + + . +

+`; + exports[`GeneFlags renders chip flag if present on gene 1`] = `

Date: Thu, 23 Oct 2025 12:22:37 -0400 Subject: [PATCH 8/9] Correct typo in identifier --- browser/src/GenePage/GeneInfo.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/browser/src/GenePage/GeneInfo.tsx b/browser/src/GenePage/GeneInfo.tsx index ccf461f22..526a9f9f6 100644 --- a/browser/src/GenePage/GeneInfo.tsx +++ b/browser/src/GenePage/GeneInfo.tsx @@ -23,12 +23,12 @@ type ManeSelectTranscriptIdProps = { } const ManeSelectTranscriptId = ({ gene }: ManeSelectTranscriptIdProps) => { - const gencodeVersionOfManeSelectTransript = gene.transcripts.find( + const gencodeVersionOfManeSelectTranscript = gene.transcripts.find( (transcript: any) => transcript.transcript_id === gene.mane_select_transcript.ensembl_id ) const shouldLinkToTranscriptPage = - gencodeVersionOfManeSelectTransript && - gencodeVersionOfManeSelectTransript.transcript_version === + gencodeVersionOfManeSelectTranscript && + gencodeVersionOfManeSelectTranscript.transcript_version === gene.mane_select_transcript.ensembl_version return ( From 4a4e2b44635eb66a45ff690c92207720a8365fe6 Mon Sep 17 00:00:00 2001 From: Phil Darnowsky Date: Thu, 23 Oct 2025 12:27:01 -0400 Subject: [PATCH 9/9] Tighten types around MANE select ID --- browser/src/GenePage/GeneInfo.tsx | 49 ++++++++++++++++++------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/browser/src/GenePage/GeneInfo.tsx b/browser/src/GenePage/GeneInfo.tsx index 526a9f9f6..a3bbaa35e 100644 --- a/browser/src/GenePage/GeneInfo.tsx +++ b/browser/src/GenePage/GeneInfo.tsx @@ -8,39 +8,40 @@ import Link from '../Link' import GeneReferences from './GeneReferences' type ManeSelectTranscriptIdProps = { - gene: { - mane_select_transcript: { - ensembl_id: string - ensembl_version: string - refseq_id: string - refseq_version: string - } - transcripts: { - transcript_id: string - transcript_version: string - }[] + mane_select_transcript: { + ensembl_id: string + ensembl_version: string + refseq_id: string + refseq_version: string } + transcripts: { + transcript_id: string + transcript_version: string + }[] } -const ManeSelectTranscriptId = ({ gene }: ManeSelectTranscriptIdProps) => { - const gencodeVersionOfManeSelectTranscript = gene.transcripts.find( - (transcript: any) => transcript.transcript_id === gene.mane_select_transcript.ensembl_id +const ManeSelectTranscriptId = ({ + mane_select_transcript, + transcripts, +}: ManeSelectTranscriptIdProps) => { + const gencodeVersionOfManeSelectTranscript = transcripts.find( + (transcript) => transcript.transcript_id === mane_select_transcript.ensembl_id ) const shouldLinkToTranscriptPage = gencodeVersionOfManeSelectTranscript && gencodeVersionOfManeSelectTranscript.transcript_version === - gene.mane_select_transcript.ensembl_version + mane_select_transcript.ensembl_version return ( {shouldLinkToTranscriptPage ? ( - - {gene.mane_select_transcript.ensembl_id}.{gene.mane_select_transcript.ensembl_version} + + {mane_select_transcript.ensembl_id}.{mane_select_transcript.ensembl_version} ) : ( - `${gene.mane_select_transcript.ensembl_id}.${gene.mane_select_transcript.ensembl_version}` + `${mane_select_transcript.ensembl_id}.${mane_select_transcript.ensembl_version}` )}{' '} - / {gene.mane_select_transcript.refseq_id}.{gene.mane_select_transcript.refseq_version} + / {mane_select_transcript.refseq_id}.{mane_select_transcript.refseq_version} ) } @@ -109,8 +110,14 @@ const GeneInfo = ({ gene }: GeneInfoProps) => { } > - {/* @ts-expect-error TS(2322) FIXME: Type '{ gene_id: string; gene_version: string; sym... Remove this comment to see the full error message */} - {gene.mane_select_transcript ? : 'Not available'} + {gene.mane_select_transcript ? ( + + ) : ( + 'Not available' + )} )}