From ba82829a4b37c48d95cefffc3ae878b0986d261a Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Wed, 10 Sep 2025 10:58:37 -0400
Subject: [PATCH 1/9] Build RNU4ATAC gene patches and export to ES

---
 .../src/data_pipeline/data_types/gene.py      | 60 +++++++++++++++++++
 .../pipelines/export_to_elasticsearch.py      | 10 ++++
 .../data_pipeline/pipelines/gene_patches.py   | 19 ++++++
 3 files changed, 89 insertions(+)
 create mode 100644 data-pipeline/src/data_pipeline/pipelines/gene_patches.py

diff --git a/data-pipeline/src/data_pipeline/data_types/gene.py b/data-pipeline/src/data_pipeline/data_types/gene.py
index cfa469b19..97de00c33 100644
--- a/data-pipeline/src/data_pipeline/data_types/gene.py
+++ b/data-pipeline/src/data_pipeline/data_types/gene.py
@@ -117,6 +117,66 @@ def reject_par_y_genes(genes_path=None):
     return genes
 
 
+def patch_rnu4atac(genes_path=None):
+    gene_symbol = "RNU4ATAC"
+
+    genes = hl.read_table(genes_path)
+    genes = genes.filter(genes.symbol == gene_symbol)
+
+    correct_start = 121530880
+    correct_stop = 121531009
+    correct_start_locus = hl.locus(contig="chr2", pos=correct_start, reference_genome="GRCh38")
+    correct_stop_locus = hl.locus(contig="chr2", pos=correct_stop, reference_genome="GRCh38")
+    correct_xstart = x_position(correct_start_locus)
+    correct_xstop = x_position(correct_stop_locus)
+
+    correct_interval = hl.interval(correct_start_locus, correct_stop_locus, includes_start=True, includes_end=True)
+
+    correct_exon = hl.struct(
+        feature_type="exon", start=correct_start, stop=correct_stop, xstart=correct_xstart, xstop=correct_xstop
+    )
+
+    incorrect_transcript = genes.take(1)[0].transcripts[0]
+    correct_transcript = hl.struct(
+        interval=correct_interval,
+        transcript_version="2",
+        gene_version="2",
+        start=correct_start,
+        stop=correct_stop,
+        xstart=correct_xstart,
+        xstop=correct_xstop,
+        exons=hl.array([correct_exon]),
+        transcript_id=incorrect_transcript.transcript_id,
+        gene_id=incorrect_transcript.gene_id,
+        chrom=incorrect_transcript.chrom,
+        strand=incorrect_transcript.strand,
+        reference_genome=incorrect_transcript.reference_genome,
+        gtex_tissue_expression=incorrect_transcript.gtex_tissue_expression,
+        refseq_id="NR_023343",
+        refseq_version="3",
+    )
+
+    correct_mane_select_transcript = hl.struct(
+        matched_gene_version="2",
+        ensembl_id="ENST00000580972",
+        ensembl_version="2",
+        refseq_id="NR_023343",
+        refseq_version="3",
+    )
+
+    genes = genes.annotate(
+        gene_version=2,
+        start=correct_start,
+        stop=correct_stop,
+        xstart=correct_xstart,
+        xstop=correct_xstop,
+        exons=[correct_exon],
+        transcripts=[correct_transcript],
+        mane_select_transcript=correct_mane_select_transcript,
+    )
+    return genes
+
+
 ###############################################
 # Transcripts                                 #
 ###############################################
diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
index bf074c45b..680a96f41 100644
--- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
+++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
@@ -41,6 +41,7 @@
 from data_pipeline.pipelines.gnomad_v4_cnvs import pipeline as gnomad_v4_cnvs_pipeline
 from data_pipeline.pipelines.gnomad_v4_lof_curation_results import pipeline as gnomad_v4_lof_curation_results_pipeline
 
+from data_pipeline.pipelines.gene_patches import pipeline as gnomad_v4_gene_patches
 
 logger = logging.getLogger("gnomad_data_pipeline")
 
@@ -88,6 +89,15 @@ def add_liftover_document_id(ds):
             "block_size": 200,
         },
     },
+    "gene_patches": {
+        "get_table": lambda: hl.read_table(gnomad_v4_gene_patches.get_output("gene_patches").get_output_path()),
+        "args": {
+            "index": "genes_grch38_patches",
+            "index_fields": ["gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop"],
+            "id_field": "gene_id",
+            "block_size": 200,
+        },
+    },
     ##############################################################################################################
     # Transcripts
     ##############################################################################################################
diff --git a/data-pipeline/src/data_pipeline/pipelines/gene_patches.py b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py
new file mode 100644
index 000000000..8c24ff183
--- /dev/null
+++ b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py
@@ -0,0 +1,19 @@
+import hail as hl
+
+from data_pipeline.pipeline import Pipeline, run_pipeline
+
+from data_pipeline.data_types.gene import patch_rnu4atac
+
+pipeline = Pipeline()
+
+pipeline.add_task(
+    "patch_rnu4atac_grch38",
+    patch_rnu4atac,
+    "/genes/genes_grch38_patched.ht",
+    {"genes_path": "gs://gnomad-v4-data-pipeline/output/genes/genes_grch38_annotated_6.ht"},
+)
+
+pipeline.set_outputs({"gene_patches": "patch_rnu4atac_grch38"})
+
+if __name__ == "__main__":
+    run_pipeline(pipeline)

From 2edf0107875a1592fc486530e64206ce97362975 Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Wed, 10 Sep 2025 10:41:00 -0400
Subject: [PATCH 2/9] Use RNU4ATAC gene patches in API

---
 .../data_pipeline/pipelines/gene_patches.py   |   2 -
 graphql-api/src/elasticsearch.ts              |  24 ++-
 graphql-api/src/queries/gene-queries.ts       | 200 +++++++++++++-----
 .../queries/helpers/elasticsearch-helpers.ts  |  23 +-
 4 files changed, 175 insertions(+), 74 deletions(-)

diff --git a/data-pipeline/src/data_pipeline/pipelines/gene_patches.py b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py
index 8c24ff183..022b18613 100644
--- a/data-pipeline/src/data_pipeline/pipelines/gene_patches.py
+++ b/data-pipeline/src/data_pipeline/pipelines/gene_patches.py
@@ -1,5 +1,3 @@
-import hail as hl
-
 from data_pipeline.pipeline import Pipeline, run_pipeline
 
 from data_pipeline.data_types.gene import patch_rnu4atac
diff --git a/graphql-api/src/elasticsearch.ts b/graphql-api/src/elasticsearch.ts
index 7638d66c3..6ebd46380 100644
--- a/graphql-api/src/elasticsearch.ts
+++ b/graphql-api/src/elasticsearch.ts
@@ -82,8 +82,8 @@ const scheduleElasticsearchRequest = (fn: any) => {
 const limitedElastic = {
   indices: elastic.indices,
   clearScroll: elastic.clearScroll.bind(elastic),
-  search: (...args: Parameters<typeof elastic.search>) =>
-    scheduleElasticsearchRequest(() => elastic.search(...args)).then((response) => {
+  search: (args: elasticsearch.RequestParams.Search<any>) =>
+    scheduleElasticsearchRequest(() => elastic.search(args)).then((response) => {
       // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'.
       if (response.body.timed_out) {
         throw new Error('Elasticsearch search timed out')
@@ -95,8 +95,8 @@ const limitedElastic = {
       }
       return response
     }),
-  scroll: (...args: Parameters<typeof elastic.scroll>) =>
-    scheduleElasticsearchRequest(() => elastic.scroll(...args)).then((response) => {
+  scroll: (args: { scroll: string; scrollId?: string }) =>
+    scheduleElasticsearchRequest(() => elastic.scroll(args)).then((response) => {
       // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'.
       if (response.body.timed_out) {
         throw new Error('Elasticsearch scroll timed out')
@@ -117,10 +117,22 @@ const limitedElastic = {
       }
       return response
     }),
-  get: (...args: Parameters<typeof elastic.get>) =>
-    scheduleElasticsearchRequest(() => elastic.get(...args)),
+  get: (args: { index: string; type: '_doc'; id: string }) =>
+    scheduleElasticsearchRequest(() => elastic.get(args)),
   mget: (...args: Parameters<typeof elastic.mget>) =>
     scheduleElasticsearchRequest(() => elastic.mget(...args)),
 }
 
+export type LimitedElasticClient = typeof limitedElastic
+
+export type GetResponse = {
+  body: { _source: { value: Record<string, any> } }
+}
+
+export type SearchHit = { _id: string; _source: any }
+
+export type SearchResponse = {
+  body: { hits: { total: { value: number }; hits: SearchHit[] }; _scroll_id?: string }
+}
+
 export { limitedElastic as client }
diff --git a/graphql-api/src/queries/gene-queries.ts b/graphql-api/src/queries/gene-queries.ts
index ac7858c21..57b05e986 100644
--- a/graphql-api/src/queries/gene-queries.ts
+++ b/graphql-api/src/queries/gene-queries.ts
@@ -1,43 +1,69 @@
+import elasticsearch from '@elastic/elasticsearch'
 import { withCache } from '../cache'
 
 import { fetchAllSearchResults } from './helpers/elasticsearch-helpers'
 
-const GENE_INDICES = {
-  GRCh37: 'genes_grch37',
-  GRCh38: 'genes_grch38',
-}
+import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata'
+import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch'
 
-const _fetchGeneById = async (esClient: any, geneId: any, referenceGenome: any) => {
-  try {
-    const response = await esClient.get({
-      // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message
-      index: GENE_INDICES[referenceGenome],
-      type: '_doc',
-      id: geneId,
-    })
+type GeneIndex = 'genes_grch37' | 'genes_grch38' | 'genes_grch38_patches-2025-10-23--19-35'
+
+type GeneSearchRegion = { reference_genome: ReferenceGenome; xstart: number; xstop: number }
 
-    return response.body._source.value
-  } catch (err) {
-    // meta will not be present if the request times out in the queue before reaching ES
-    // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'.
-    if (err.meta && err.meta.body && err.meta.body.found === false) {
-      return null
+const GENE_INDICES: Record<ReferenceGenome, GeneIndex[]> = {
+  // Order matters here: later indices take precedence over earlier
+  GRCh37: ['genes_grch37'],
+  GRCh38: ['genes_grch38', 'genes_grch38_patches-2025-10-23--19-35'],
+}
+
+const _fetchGeneById = async (
+  esClient: LimitedElasticClient,
+  geneId: string,
+  referenceGenome: ReferenceGenome
+) => {
+  const indices = GENE_INDICES[referenceGenome]
+  const requests = indices.map(
+    (index) =>
+      esClient
+        .get({
+          index,
+          type: '_doc',
+          id: geneId,
+        })
+        .catch((err) => {
+          // meta will not be present if the request times out in the queue before reaching ES
+          if (err.meta && err.meta.body && err.meta.body.found === false) {
+            return null
+          }
+          throw err
+        }) as Promise<GetResponse | null>
+  )
+  return Promise.all(requests).then(
+    (responses) => {
+      const responsesWithValue = responses.filter((response) => response !== null)
+      return responsesWithValue.length > 0
+        ? responsesWithValue[responsesWithValue.length - 1]!.body._source.value
+        : null
+    },
+    (err) => {
+      throw err
     }
-    throw err
-  }
+  )
 }
 
 export const fetchGeneById = withCache(
   _fetchGeneById,
-  (_: any, geneId: any, referenceGenome: any) => `gene:${geneId}:${referenceGenome}`,
+  (_: any, geneId: string, referenceGenome: ReferenceGenome) => `gene:${geneId}:${referenceGenome}`,
   { expiration: 86400 }
 )
 
-export const fetchGeneBySymbol = async (esClient: any, geneSymbol: any, referenceGenome: any) => {
-  const response = await esClient.search({
-    // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message
-    index: GENE_INDICES[referenceGenome],
-    type: '_doc',
+export const fetchGeneBySymbol = async (
+  esClient: LimitedElasticClient,
+  geneSymbol: string,
+  referenceGenome: ReferenceGenome
+) => {
+  const indices = GENE_INDICES[referenceGenome]
+  const responses = await searchMultipleIndices(esClient, indices, {
     body: {
       query: {
         bool: {
@@ -48,20 +74,22 @@ export const fetchGeneBySymbol = async (esClient: any, geneSymbol: any, referenc
     size: 1,
   })
 
-  if (response.body.hits.total.value === 0) {
+  const responsesWithValue = responses.filter((response) => response.body.hits.total.value > 0)
+  if (responsesWithValue.length === 0) {
     return null
   }
 
-  return response.body.hits.hits[0]._source.value
+  return responsesWithValue[responsesWithValue.length - 1].body.hits.hits[0]._source.value
 }
 
-export const fetchGenesByRegion = async (esClient: any, region: any) => {
-  const { reference_genome: referenceGenome, xstart, xstop } = region
+export const fetchGenesByRegion = async (
+  esClient: LimitedElasticClient,
+  region: GeneSearchRegion
+) => {
+  const { reference_genome, xstart, xstop } = region
+  const indices = GENE_INDICES[reference_genome]
 
-  const hits = await fetchAllSearchResults(esClient, {
-    // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message
-    index: GENE_INDICES[referenceGenome],
-    type: '_doc',
+  const hits = await fetchAllSearchResultsFromMultipleIndices(esClient, indices, {
     size: 200,
     _source: [
       'value.exons',
@@ -98,28 +126,91 @@ export const fetchGenesByRegion = async (esClient: any, region: any) => {
     },
   })
 
-  return hits.map((hit: any) => hit._source.value)
+  const mergedHits = mergeHitsById(hits.flat())
+  return mergedHits.map((hit) => hit._source.value)
+}
+
+const fetchAllSearchResultsFromMultipleIndices = async (
+  esClient: LimitedElasticClient,
+  indices: string[],
+  searchParams: elasticsearch.RequestParams.Search<any>
+) => {
+  const requests = indices.map((index) =>
+    fetchAllSearchResults(esClient, {
+      index,
+      type: '_doc',
+      ...searchParams,
+    })
+  )
+  return Promise.all(requests)
 }
 
-export const fetchGenesMatchingText = async (esClient: any, query: any, referenceGenome: any) => {
+const searchMultipleIndices = async (
+  esClient: LimitedElasticClient,
+  indices: string[],
+  searchParams: elasticsearch.RequestParams.Search<any>
+): Promise<SearchResponse[]> => {
+  const requests = indices.map(
+    (index) =>
+      esClient.search({
+        index,
+        type: '_doc',
+        ...searchParams,
+      }) as Promise<SearchResponse>
+  )
+
+  return Promise.all(requests)
+}
+
+const mergeHitsById = (hits: SearchHit[]): SearchHit[] => {
+  const ids: string[] = []
+  const idsToHits: Record<string, any> = {}
+  hits.forEach((hit) => {
+    if (idsToHits[hit._id] === undefined) {
+      ids.push(hit._id)
+    }
+    idsToHits[hit._id] = hit
+  })
+  return ids.map((id) => idsToHits[id])
+}
+
+const mergeResponsesById = (responses: SearchResponse[]) => {
+  const ids: string[] = []
+  const idsToDocs: Record<string, any> = {}
+  responses.forEach((response) =>
+    response.body.hits.hits.forEach((hit) => {
+      if (idsToDocs[hit._id] === undefined) {
+        ids.push(hit._id)
+      }
+      idsToDocs[hit._id] = hit._source
+    })
+  )
+
+  return ids.map((id) => idsToDocs[id])
+}
+
+export const fetchGenesMatchingText = async (
+  esClient: LimitedElasticClient,
+  query: string,
+  referenceGenome: ReferenceGenome
+) => {
   const upperCaseQuery = query.toUpperCase()
 
   // Ensembl ID
   if (/^ENSG\d{11}$/.test(upperCaseQuery)) {
     const gene = await _fetchGeneById(esClient, upperCaseQuery, referenceGenome)
-    return [
-      {
-        ensembl_id: gene.gene_id,
-        symbol: gene.symbol,
-      },
-    ]
+    return (
+      gene && [
+        {
+          ensembl_id: gene.gene_id,
+          symbol: gene.symbol,
+        },
+      ]
+    )
   }
 
   // Symbol
-  const response = await esClient.search({
-    // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message
-    index: GENE_INDICES[referenceGenome],
-    type: '_doc',
+  const responses = await searchMultipleIndices(esClient, GENE_INDICES[referenceGenome], {
     _source: ['gene_id', 'value.gene_version', 'value.symbol'],
     body: {
       query: {
@@ -134,15 +225,16 @@ export const fetchGenesMatchingText = async (esClient: any, query: any, referenc
     size: 5,
   })
 
-  if (response.body.hits.total.value === 0) {
+  const responsesWithValue = responses.filter((response) => response.body.hits.total.value !== 0)
+  if (responsesWithValue.length === 0) {
     return []
   }
 
-  return response.body.hits.hits
-    .map((hit: any) => hit._source)
-    .map((doc: any) => ({
-      ensembl_id: doc.gene_id,
-      ensembl_version: doc.value.gene_version,
-      symbol: doc.value.symbol,
-    }))
+  const mergedDocs = mergeResponsesById(responsesWithValue)
+
+  return mergedDocs.map((doc) => ({
+    ensembl_id: doc.gene_id,
+    ensembl_version: doc.value.gene_version,
+    symbol: doc.value.symbol,
+  }))
 }
diff --git a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
index 5ec797ddb..ee70cb1fa 100644
--- a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
+++ b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
@@ -1,43 +1,42 @@
+import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch'
+
 /**
  * Search and then scroll to retrieve all pages of search results.
  *
- * @param {elasticsearch.Client} client Elasticsearch client
- * @param {Object} searchParams Argument to elasticsearch.Client#search
- * @return {Object[]} Combined list of hits from all responses
  */
-export const fetchAllSearchResults = async (client: any, searchParams: any) => {
-  const allResults: any = []
-  const responseQueue = []
+export const fetchAllSearchResults = async (client: LimitedElasticClient, searchParams: any) => {
+  const allResults: SearchHit[] = []
+  const responseQueue: SearchResponse[] = []
 
   const size = searchParams.size || 1000
   const scroll = searchParams.scroll || '30s'
 
   responseQueue.push(
-    await client.search({
+    await (client.search({
       ...searchParams,
       scroll,
       size,
-    })
+    }) as Promise<SearchResponse>)
   )
 
   while (responseQueue.length) {
-    const response = responseQueue.shift()
+    const response = responseQueue.shift()!
     allResults.push(...response.body.hits.hits)
 
     if (allResults.length === response.body.hits.total.value) {
       // eslint-disable-next-line no-await-in-loop
       await client.clearScroll({
-        scrollId: response.body._scroll_id, // eslint-disable-line no-underscore-dangle
+        scroll_id: response.body._scroll_id, // eslint-disable-line no-underscore-dangle
       })
       break
     }
 
     responseQueue.push(
       // eslint-disable-next-line no-await-in-loop
-      await client.scroll({
+      await (client.scroll({
         scroll,
         scrollId: response.body._scroll_id, // eslint-disable-line no-underscore-dangle
-      })
+      }) as Promise<SearchResponse>)
     )
   }
 

From 17d59da0c1901049133b82ac599c8a3791d6a891 Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Tue, 30 Sep 2025 15:30:34 -0400
Subject: [PATCH 3/9] Build RNU4ATAC variant patches and export to ES

---
 .../variant/patch_rnu4atac_variants.py        | 48 +++++++++++++++++++
 .../annotate_transcript_consequences.py       | 48 ++++++++++---------
 .../pipelines/export_to_elasticsearch.py      | 24 ++++++++++
 .../pipelines/variant_patches.py              | 20 ++++++++
 4 files changed, 118 insertions(+), 22 deletions(-)
 create mode 100644 data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py
 create mode 100644 data-pipeline/src/data_pipeline/pipelines/variant_patches.py

diff --git a/data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py b/data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py
new file mode 100644
index 000000000..e682c398e
--- /dev/null
+++ b/data-pipeline/src/data_pipeline/data_types/variant/patch_rnu4atac_variants.py
@@ -0,0 +1,48 @@
+import hail as hl
+
+from data_pipeline.data_types.variant.transcript_consequence.annotate_transcript_consequences import (
+    annotate_transcript_consequences_in_table,
+)
+
+
+def patch_rnu4atac_variants(vepped_path=None, freq_path=None, transcripts_data={}):
+    veps = hl.read_table(vepped_path)
+    freqs = hl.read_table(freq_path)
+    # Drop all consequences except for gene RNU4ATAC and transcript ENST00000580972
+    veps = veps.filter(veps.vep.transcript_consequences.any(lambda tc: tc.gene_symbol == "RNU4ATAC"))
+    veps = veps.annotate(
+        vep=veps.vep.annotate(
+            transcript_consequences=veps.vep.transcript_consequences.filter(
+                lambda tc: tc.transcript_id == "ENST00000580972"
+            )
+        )
+    )
+    veps = veps.filter(veps.vep.transcript_consequences.length() > 0)
+    veps = annotate_transcript_consequences_in_table(veps, transcripts_data=transcripts_data)
+
+    # We filter the data again here because annotate_transcript_consequences_in_table removes consequences with unimportant consequences terms
+    veps = veps.filter(veps.transcript_consequences.length() > 0)
+    veps = veps.annotate(
+        transcript_consequences=veps.transcript_consequences.map(
+            lambda tc: tc.annotate(
+                transcript_version="2",
+                gene_version="2",
+                is_mane_select=False,
+                is_mane_select_version=False,
+                refseq_id=hl.null(hl.tstr),
+                refseq_version=hl.null(hl.tstr),
+            )
+        )
+    )
+    veps = veps.annotate(
+        transcript_consequences=veps.transcript_consequences.map(
+            lambda tc: tc.drop("polyphen_prediction", "sift_prediction")
+        )
+    )
+
+    freqs = freqs.drop("transcript_consequences")
+    veps = veps.join(freqs)
+
+    # Include just consequences and index fields
+    veps = veps.select(veps.variant_id, veps.rsids, veps.caid, veps.vrs, veps.transcript_consequences)
+    return veps
diff --git a/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py b/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py
index 1ec656b03..103d876ce 100644
--- a/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py
+++ b/data-pipeline/src/data_pipeline/data_types/variant/transcript_consequence/annotate_transcript_consequences.py
@@ -3,13 +3,18 @@
 from .hgvs import hgvsp_from_consequence_amino_acids
 from .vep import consequence_term_rank
 
-
 OMIT_CONSEQUENCE_TERMS = hl.set(["upstream_gene_variant", "downstream_gene_variant"])
 
+# ruff doesn't like explicit comparisons to None, but we need them in here, so:
+# ruff: noqa: E711
+
 
-def annotate_transcript_consequences(variants_path, transcripts_path, mane_transcripts_path=None):
+def annotate_transcript_consequences(variants_path, transcripts_path=None, mane_transcripts_path=None):
     ds = hl.read_table(variants_path)
+    return annotate_transcript_consequences_in_table(ds, transcripts_path, mane_transcripts_path)
+
 
+def annotate_transcript_consequences_in_table(ds, transcripts_path=None, mane_transcripts_path=None):
     most_severe_consequence = ds.vep.most_severe_consequence
 
     transcript_consequences = ds.vep.transcript_consequences
@@ -62,26 +67,25 @@ def annotate_transcript_consequences(variants_path, transcripts_path, mane_trans
 
     transcript_consequences = transcript_consequences.map(lambda c: c.select(*consequences))
 
-    transcripts = hl.read_table(transcripts_path)
-
-    # TODO: This can potentially be improved by removing Table.collect
-    # See https://hail.zulipchat.com/#narrow/stream/123010-Hail-0.2E2.20support/topic/Optimize.20annotation.20with.20small.20dataset
-    # and https://github.com/Nealelab/ukb_common/blob/ad94d20f8c9f3b711e40a473425925775f0b1f30/utils/generic.py#L18
-    transcript_info = hl.dict(
-        [
-            (row.transcript_id, row.transcript_info)
-            for row in transcripts.select(
-                transcript_info=hl.struct(
-                    transcript_version=transcripts.transcript_version,
-                    gene_version=transcripts.gene.gene_version,
-                )
-            ).collect()
-        ]
-    )
-
-    transcript_consequences = transcript_consequences.map(
-        lambda csq: csq.annotate(**transcript_info.get(csq.transcript_id))
-    )
+    if transcripts_path != None:
+        transcripts = hl.read_table(transcripts_path)
+        # TODO: This can potentially be improved by removing Table.collect
+        # See https://hail.zulipchat.com/#narrow/stream/123010-Hail-0.2E2.20support/topic/Optimize.20annotation.20with.20small.20dataset
+        # and https://github.com/Nealelab/ukb_common/blob/ad94d20f8c9f3b711e40a473425925775f0b1f30/utils/generic.py#L18
+        transcript_info = hl.dict(
+            [
+                (row.transcript_id, row.transcript_info)
+                for row in transcripts.select(
+                    transcript_info=hl.struct(
+                        transcript_version=transcripts.transcript_version,
+                        gene_version=transcripts.gene.gene_version,
+                    )
+                ).collect()
+            ]
+        )
+        transcript_consequences = transcript_consequences.map(
+            lambda csq: csq.annotate(**transcript_info.get(csq.transcript_id))
+        )
 
     if mane_transcripts_path:
         mane_transcripts = hl.read_table(mane_transcripts_path)
diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
index 680a96f41..acdbe8be2 100644
--- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
+++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
@@ -143,6 +143,30 @@ def add_liftover_document_id(ds):
             "block_size": 1_000,
         },
     },
+    "gnomad_v4_variant_patches": {
+        "get_table": lambda: subset_table(
+            add_variant_document_id(
+                hl.read_table(
+                    "gs://gnomad-browser-data-pipeline/phil-scratch/output/gnomad_v4/gnomad_v4_variants_patched.ht"
+                )
+            )
+        ),
+        "args": {
+            "index": "gnomad_v4_variants_patches",
+            "index_fields": [
+                "document_id",
+                "variant_id",
+                "rsids",
+                "caid",
+                "locus",
+                "transcript_consequences.gene_id",
+                "transcript_consequences.transcript_id",
+                "vrs.alt.allele_id",
+            ],
+            "id_field": "document_id",
+            "block_size": 1_000,
+        },
+    },
     "gnomad_v4_exome_coverage": {
         "get_table": lambda: subset_table(
             hl.read_table(gnomad_v4_coverage_pipeline.get_output("exome_coverage").get_output_path())
diff --git a/data-pipeline/src/data_pipeline/pipelines/variant_patches.py b/data-pipeline/src/data_pipeline/pipelines/variant_patches.py
new file mode 100644
index 000000000..0762e6063
--- /dev/null
+++ b/data-pipeline/src/data_pipeline/pipelines/variant_patches.py
@@ -0,0 +1,20 @@
+from data_pipeline.pipeline import Pipeline, run_pipeline
+
+from data_pipeline.data_types.variant.patch_rnu4atac_variants import patch_rnu4atac_variants
+
+pipeline = Pipeline()
+
+pipeline.add_task(
+    "patch_rnu4atac_variants",
+    patch_rnu4atac_variants,
+    "/gnomad_v4/gnomad_v4_variants_patched.ht",
+    {
+        "vepped_path": "gs://gnomad-v4-data-pipeline/inputs/secondary-analyses/gnomad_v4.1.RNU4ATAC.vep115.ht",
+        "freq_path": "gs://gnomad-v4-data-pipeline/output/gnomad_v4/gnomad_v4_variants_annotated_4.ht",
+    },
+)
+
+pipeline.set_outputs({"variant_patches": "patch_rnu4atac_variants"})
+
+if __name__ == "__main__":
+    run_pipeline(pipeline)

From 517188951144b6e909b7a82eac4e8d0ecae8f8ca Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Wed, 15 Oct 2025 14:31:33 -0400
Subject: [PATCH 4/9] Use RNU4ATAC variant patches in API

---
 graphql-api/src/queries/gene-queries.ts       |  17 +-
 .../queries/helpers/elasticsearch-helpers.ts  |  16 ++
 .../gnomad-v4-variant-queries.ts              | 247 ++++++++++++------
 3 files changed, 182 insertions(+), 98 deletions(-)

diff --git a/graphql-api/src/queries/gene-queries.ts b/graphql-api/src/queries/gene-queries.ts
index 57b05e986..51e28bdaf 100644
--- a/graphql-api/src/queries/gene-queries.ts
+++ b/graphql-api/src/queries/gene-queries.ts
@@ -1,7 +1,7 @@
 import elasticsearch from '@elastic/elasticsearch'
 import { withCache } from '../cache'
 
-import { fetchAllSearchResults } from './helpers/elasticsearch-helpers'
+import { fetchAllSearchResultsFromMultipleIndices } from './helpers/elasticsearch-helpers'
 
 import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata'
 import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch'
@@ -130,21 +130,6 @@ export const fetchGenesByRegion = async (
   return mergedHits.map((hit) => hit._source.value)
 }
 
-const fetchAllSearchResultsFromMultipleIndices = async (
-  esClient: LimitedElasticClient,
-  indices: string[],
-  searchParams: elasticsearch.RequestParams.Search<any>
-) => {
-  const requests = indices.map((index) =>
-    fetchAllSearchResults(esClient, {
-      index,
-      type: '_doc',
-      ...searchParams,
-    })
-  )
-  return Promise.all(requests)
-}
-
 const searchMultipleIndices = async (
   esClient: LimitedElasticClient,
   indices: string[],
diff --git a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
index ee70cb1fa..a18f237cb 100644
--- a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
+++ b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
@@ -1,3 +1,4 @@
+import elasticsearch from '@elastic/elasticsearch'
 import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch'
 
 /**
@@ -43,6 +44,21 @@ export const fetchAllSearchResults = async (client: LimitedElasticClient, search
   return allResults
 }
 
+export const fetchAllSearchResultsFromMultipleIndices = async (
+  esClient: LimitedElasticClient,
+  indices: string[],
+  searchParams: elasticsearch.RequestParams.Search<any>
+) => {
+  const requests = indices.map((index) =>
+    fetchAllSearchResults(esClient, {
+      index,
+      type: '_doc',
+      ...searchParams,
+    })
+  )
+  return Promise.all(requests)
+}
+
 // Retrieve index metadata set by data pipeline
 export const fetchIndexMetadata = async (esClient: any, index: any) => {
   const response = await esClient.indices.getMapping({
diff --git a/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts b/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts
index 46955fb37..5d96a23a4 100644
--- a/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts
+++ b/graphql-api/src/queries/variant-datasets/gnomad-v4-variant-queries.ts
@@ -5,7 +5,10 @@ import { isRsId } from '@gnomad/identifiers'
 import { UserVisibleError } from '../../errors'
 
 import { fetchLocalAncestryPopulationsByVariant } from '../local-ancestry-queries'
-import { fetchAllSearchResults } from '../helpers/elasticsearch-helpers'
+import {
+  fetchAllSearchResults,
+  fetchAllSearchResultsFromMultipleIndices,
+} from '../helpers/elasticsearch-helpers'
 import { mergeOverlappingRegions } from '../helpers/region-helpers'
 import {
   fetchLofCurationResultsByVariant,
@@ -16,10 +19,67 @@ import {
 import { getFlagsForContext } from './shared/flags'
 import { getConsequenceForContext } from './shared/transcriptConsequence'
 import largeGenes from '../helpers/large-genes'
+import { LimitedElasticClient, SearchResponse } from '../../elasticsearch'
 
 const GNOMAD_V4_VARIANT_INDEX = 'gnomad_v4_variants'
+const GNOMAD_V4_VARIANT_INDEX_PATCHES = 'gnomad_v4_variants_patches-2025-10-14--20-02'
 
 type Subset = 'all' | 'non_ukb'
+type ESTranscriptConsequence = {
+  biotype: string
+  consequence_terms: string[]
+  gene_id: string
+  gene_symbol: string
+  gene_version: string
+  is_canonical: boolean
+  major_consequence: string
+  transcript_id: string
+  transcript_version: string
+}
+type ESPatch = {
+  variant_id: string
+  transcript_consequences: ESTranscriptConsequence[]
+}
+
+const mergeTranscriptConsequences = (
+  transcriptConsequences: ESTranscriptConsequence[],
+  patchedTranscriptConsequences?: ESTranscriptConsequence[] | null
+) => {
+  if (!patchedTranscriptConsequences) {
+    return transcriptConsequences
+  }
+
+  const result: ESTranscriptConsequence[] = []
+  transcriptConsequences.forEach((csq) => {
+    const patchedConsequence = patchedTranscriptConsequences!.find(
+      (patchedCsq) => patchedCsq.transcript_id === csq.transcript_id
+    )
+    result.push(patchedConsequence || csq)
+  })
+  return result
+}
+
+const mergeTranscriptConsequencesInVariant = (
+  variant: { variant_id: string; transcript_consequences: ESTranscriptConsequence[] },
+  patches: ESPatch[]
+) => {
+  const matchingPatch = patches.find((patch) => patch.variant_id === variant.variant_id)
+  if (matchingPatch === undefined) {
+    return variant
+  }
+
+  return {
+    ...variant,
+    transcript_consequences: mergeTranscriptConsequences(
+      variant.transcript_consequences,
+      matchingPatch.transcript_consequences
+    ),
+  }
+}
+
+const hasPositiveAC = (variant: any, subset: string) =>
+  (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
+  variant.exome.freq[subset].ac_raw > 0
 
 // ================================================================================================
 // Count query
@@ -69,30 +129,50 @@ const chooseIdField = (variantId: string) => {
   return 'variant_id'
 }
 
-const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) => {
+const fetchVariantById = async (
+  esClient: LimitedElasticClient,
+  variantId: string,
+  subset: Subset
+) => {
   const idField = chooseIdField(variantId)
-  const response = await esClient.search({
+  const query = {
+    bool: {
+      filter: { term: { [idField]: variantId } },
+    },
+  }
+
+  const variantResponsePromise = esClient.search({
     index: GNOMAD_V4_VARIANT_INDEX,
     body: {
-      query: {
-        bool: {
-          filter: { term: { [idField]: variantId } },
-        },
-      },
+      query,
     },
     size: 1,
-  })
+  }) as Promise<SearchResponse>
+  const patchResponsePromise = esClient.search({
+    index: GNOMAD_V4_VARIANT_INDEX_PATCHES,
+    body: { query },
+    size: 1,
+  }) as Promise<SearchResponse>
 
-  if (response.body.hits.total.value === 0) {
+  const variantResponse = await variantResponsePromise
+
+  if (variantResponse.body.hits.total.value === 0) {
     throw new UserVisibleError('Variant not found')
   }
 
   // An rsID may match multiple variants
-  if (response.body.hits.total.value > 1) {
+  if (variantResponse.body.hits.total.value > 1) {
     throw new UserVisibleError('Multiple variants found, query using variant ID to select one.')
   }
 
-  const variant = response.body.hits.hits[0]._source.value
+  const patchResponse = await patchResponsePromise
+  const patchedTranscriptConsequences =
+    patchResponse.body.hits.total.value > 0
+      ? (patchResponse.body.hits.hits[0]._source.value
+          .transcript_consequences as ESTranscriptConsequence[])
+      : null
+
+  const variant = variantResponse.body.hits.hits[0]._source.value
 
   const subsetGenomeFreq = variant.genome.freq.all || {}
   const subsetJointFreq = variant.joint.freq[subset] || {}
@@ -244,9 +324,10 @@ const fetchVariantById = async (esClient: any, variantId: any, subset: Subset) =
     flags: variantFlags,
     // TODO: Include RefSeq transcripts once the browser supports them.
     lof_curations: lofCurationResults,
-    transcript_consequences: (variant.transcript_consequences || []).filter((csq: any) =>
-      csq.gene_id.startsWith('ENSG')
-    ),
+    transcript_consequences: mergeTranscriptConsequences(
+      variant.transcript_consequences,
+      patchedTranscriptConsequences
+    ).filter((csq: any) => csq.gene_id.startsWith('ENSG')),
     in_silico_predictors: inSilicoPredictorsList,
   }
 
@@ -454,28 +535,30 @@ const fetchVariantsByGene = async (esClient: any, gene: any, subset: Subset) =>
       },
     }))
 
-    const hits = await fetchAllSearchResults(esClient, {
-      index: GNOMAD_V4_VARIANT_INDEX,
-      type: '_doc',
-      size: pageSize,
-      _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
-      body: {
-        query: {
-          bool: {
-            filter: [{ term: { gene_id: gene.gene_id } }, { bool: { should: rangeQueries } }],
+    const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices(
+      esClient,
+      [GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES],
+      {
+        type: '_doc',
+        size: pageSize,
+        _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
+        body: {
+          query: {
+            bool: {
+              filter: [{ term: { gene_id: gene.gene_id } }, { bool: { should: rangeQueries } }],
+            },
           },
+          sort: [{ 'locus.position': { order: 'asc' } }],
         },
-        sort: [{ 'locus.position': { order: 'asc' } }],
-      },
-    })
+      }
+    )
+
+    const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value)
 
     const shapedHits = hits
       .map((hit: any) => hit._source.value)
-      .filter(
-        (variant: any) =>
-          (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
-          variant.exome.freq[subset].ac_raw > 0
-      )
+      .filter((variant) => hasPositiveAC(variant, subset))
+      .map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches))
       .map(shapeVariantSummary(subset, { type: 'gene', geneId: gene.gene_id }))
 
     const lofCurationResults = await fetchLofCurationResultsByGene(esClient, 'v4', gene)
@@ -507,38 +590,40 @@ const fetchVariantsByRegion = async (esClient: any, region: any, subset: Subset)
   const genomeSubset = 'all'
   const jointSubset = 'all'
 
-  const hits = await fetchAllSearchResults(esClient, {
-    index: GNOMAD_V4_VARIANT_INDEX,
-    type: '_doc',
-    size: 10000,
-    _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
-    body: {
-      query: {
-        bool: {
-          filter: [
-            { term: { 'locus.contig': `chr${region.chrom}` } },
-            {
-              range: {
-                'locus.position': {
-                  gte: region.start,
-                  lte: region.stop,
+  const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices(
+    esClient,
+    [GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES],
+    {
+      type: '_doc',
+      size: 10000,
+      _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
+      body: {
+        query: {
+          bool: {
+            filter: [
+              { term: { 'locus.contig': `chr${region.chrom}` } },
+              {
+                range: {
+                  'locus.position': {
+                    gte: region.start,
+                    lte: region.stop,
+                  },
                 },
               },
-            },
-          ],
+            ],
+          },
         },
+        sort: [{ 'locus.position': { order: 'asc' } }],
       },
-      sort: [{ 'locus.position': { order: 'asc' } }],
-    },
-  })
+    }
+  )
+
+  const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value)
 
   const variants = hits
     .map((hit: any) => hit._source.value)
-    .filter(
-      (variant: any) =>
-        (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
-        variant.exome.freq[subset].ac_raw > 0
-    )
+    .filter((variant) => hasPositiveAC(variant, subset))
+    .map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches))
     .map(shapeVariantSummary(subset, { type: 'region' }))
 
   const lofCurationResults = await fetchLofCurationResultsByRegion(esClient, 'v4', region)
@@ -599,31 +684,33 @@ const fetchVariantsByTranscript = async (esClient: any, transcript: any, subset:
     },
   }))
 
-  const hits = await fetchAllSearchResults(esClient, {
-    index: GNOMAD_V4_VARIANT_INDEX,
-    type: '_doc',
-    size: 10000,
-    _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
-    body: {
-      query: {
-        bool: {
-          filter: [
-            { term: { transcript_id: transcript.transcript_id } },
-            { bool: { should: rangeQueries } },
-          ],
+  const [hits, consequencePatchHits] = await fetchAllSearchResultsFromMultipleIndices(
+    esClient,
+    [GNOMAD_V4_VARIANT_INDEX, GNOMAD_V4_VARIANT_INDEX_PATCHES],
+    {
+      type: '_doc',
+      size: 10000,
+      _source: getMultiVariantSourceFields(exomeSubset, genomeSubset, jointSubset),
+      body: {
+        query: {
+          bool: {
+            filter: [
+              { term: { transcript_id: transcript.transcript_id } },
+              { bool: { should: rangeQueries } },
+            ],
+          },
         },
+        sort: [{ 'locus.position': { order: 'asc' } }],
       },
-      sort: [{ 'locus.position': { order: 'asc' } }],
-    },
-  })
+    }
+  )
+
+  const consequencePatches: ESPatch[] = consequencePatchHits.map((hit) => hit._source.value)
 
   return hits
     .map((hit: any) => hit._source.value)
-    .filter(
-      (variant: any) =>
-        (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
-        variant.exome.freq[subset].ac_raw > 0
-    )
+    .filter((variant) => hasPositiveAC(variant, subset))
+    .map((variant) => mergeTranscriptConsequencesInVariant(variant, consequencePatches))
     .map(
       shapeVariantSummary(subset, { type: 'transcript', transcriptId: transcript.transcript_id })
     )
@@ -665,11 +752,7 @@ const fetchMatchingVariants = async (
 
   return hits
     .map((hit: any) => hit._source.value)
-    .filter(
-      (variant: any) =>
-        (variant.genome.freq.all && variant.genome.freq.all.ac_raw > 0) ||
-        variant.exome.freq[subset].ac_raw > 0
-    )
+    .filter((variant) => hasPositiveAC(variant, subset))
     .map((variant: any) => ({
       variant_id: variant.variant_id,
     }))

From 12a856344cc76ab071b2a5d2fc2716e98ba0311a Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Fri, 17 Oct 2025 14:20:49 -0400
Subject: [PATCH 5/9] Build RNU4ATAC transcript patches and export to ES

---
 .../pipelines/export_to_elasticsearch.py      | 13 +++++++++
 .../pipelines/transcript_patches.py           | 28 +++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 data-pipeline/src/data_pipeline/pipelines/transcript_patches.py

diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
index acdbe8be2..1446aa58e 100644
--- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
+++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
@@ -42,6 +42,8 @@
 from data_pipeline.pipelines.gnomad_v4_lof_curation_results import pipeline as gnomad_v4_lof_curation_results_pipeline
 
 from data_pipeline.pipelines.gene_patches import pipeline as gnomad_v4_gene_patches
+from data_pipeline.pipelines.transcript_patches import pipeline as gnomad_v4_transcript_patches
+
 
 logger = logging.getLogger("gnomad_data_pipeline")
 
@@ -119,6 +121,17 @@ def add_liftover_document_id(ds):
             "block_size": 1_000,
         },
     },
+    "transcripts_grch38_patched": {
+        "get_table": lambda: hl.read_table(
+            gnomad_v4_transcript_patches.get_output("transcripts_grch38_patched").get_output_path()
+        ),
+        "args": {
+            "index": "transcripts_grch38_patched",
+            "index_fields": ["transcript_id"],
+            "id_field": "transcript_id",
+            "block_size": 1_000,
+        },
+    },
     ##############################################################################################################
     # gnomAD v4
     ##############################################################################################################
diff --git a/data-pipeline/src/data_pipeline/pipelines/transcript_patches.py b/data-pipeline/src/data_pipeline/pipelines/transcript_patches.py
new file mode 100644
index 000000000..fe332d898
--- /dev/null
+++ b/data-pipeline/src/data_pipeline/pipelines/transcript_patches.py
@@ -0,0 +1,28 @@
+from data_pipeline.pipeline import Pipeline, run_pipeline
+
+from data_pipeline.data_types.transcript import extract_transcripts
+from data_pipeline.helpers import annotate_table
+
+pipeline = Pipeline()
+
+pipeline.add_task(
+    "extract_patched_transcripts",
+    extract_transcripts,
+    "/transcripts/transcripts_grch38_patched_base.ht",
+    {"genes_path": "gs://gnomad-browser-data-pipeline/phil-scratch/output/genes/genes_grch38_patched.ht"},
+)
+
+pipeline.add_task(
+    "annotate_patched_transcripts",
+    annotate_table,
+    "/transcripts/transcripts_grch38_annotated_1.ht",
+    {
+        "table_path": pipeline.get_task("extract_patched_transcripts"),
+        "gnomad_constraint": "gs://gnomad-v4-data-pipeline/output/constraint/gnomad_v4_constraint.ht",
+    },
+)
+
+pipeline.set_outputs({"transcripts_grch38_patched": "annotate_patched_transcripts"})
+
+if __name__ == "__main__":
+    run_pipeline(pipeline)

From 427892df28b88327ade578445c7e0f90d3139b68 Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Fri, 17 Oct 2025 14:44:39 -0400
Subject: [PATCH 6/9] Use RNU4ATAC transcript patches in API

---
 graphql-api/src/queries/gene-queries.ts       | 17 ++----
 .../queries/helpers/elasticsearch-helpers.ts  | 15 ++++-
 graphql-api/src/queries/transcript-queries.ts | 55 ++++++++++++-------
 3 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/graphql-api/src/queries/gene-queries.ts b/graphql-api/src/queries/gene-queries.ts
index 51e28bdaf..cc8b85a96 100644
--- a/graphql-api/src/queries/gene-queries.ts
+++ b/graphql-api/src/queries/gene-queries.ts
@@ -1,7 +1,10 @@
 import elasticsearch from '@elastic/elasticsearch'
 import { withCache } from '../cache'
 
-import { fetchAllSearchResultsFromMultipleIndices } from './helpers/elasticsearch-helpers'
+import {
+  fetchAllSearchResultsFromMultipleIndices,
+  getFromMultipleIndices,
+} from './helpers/elasticsearch-helpers'
 
 import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata'
 import { LimitedElasticClient, GetResponse, SearchResponse, SearchHit } from '../elasticsearch'
@@ -38,17 +41,7 @@ const _fetchGeneById = async (
           throw err
         }) as Promise<GetResponse | null>
   )
-  return Promise.all(requests).then(
-    (responses) => {
-      const responsesWithValue = responses.filter((response) => response !== null)
-      return responsesWithValue.length > 0
-        ? responsesWithValue[responsesWithValue.length - 1]!.body._source.value
-        : null
-    },
-    (err) => {
-      throw err
-    }
-  )
+  return getFromMultipleIndices(requests)
 }
 
 export const fetchGeneById = withCache(
diff --git a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
index a18f237cb..ab946c26c 100644
--- a/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
+++ b/graphql-api/src/queries/helpers/elasticsearch-helpers.ts
@@ -1,5 +1,5 @@
 import elasticsearch from '@elastic/elasticsearch'
-import { LimitedElasticClient, SearchResponse, SearchHit } from '../../elasticsearch'
+import { LimitedElasticClient, SearchResponse, SearchHit, GetResponse } from '../../elasticsearch'
 
 /**
  * Search and then scroll to retrieve all pages of search results.
@@ -69,3 +69,16 @@ export const fetchIndexMetadata = async (esClient: any, index: any) => {
   // eslint-disable-next-line no-underscore-dangle
   return Object.values(response.body)[0].mappings._meta
 }
+
+export const getFromMultipleIndices = (requests: Promise<GetResponse | null>[]) =>
+  Promise.all(requests).then(
+    (responses) => {
+      const responsesWithValue = responses.filter((response) => response !== null)
+      return responsesWithValue.length > 0
+        ? responsesWithValue[responsesWithValue.length - 1]!.body._source.value
+        : null
+    },
+    (err) => {
+      throw err
+    }
+  )
diff --git a/graphql-api/src/queries/transcript-queries.ts b/graphql-api/src/queries/transcript-queries.ts
index e36b22969..e60ffd726 100644
--- a/graphql-api/src/queries/transcript-queries.ts
+++ b/graphql-api/src/queries/transcript-queries.ts
@@ -1,24 +1,39 @@
-const TRANSCRIPT_INDICES = {
-  GRCh37: 'transcripts_grch37',
-  GRCh38: 'transcripts_grch38',
+import { ReferenceGenome } from '@gnomad/dataset-metadata/metadata'
+import { GetResponse, LimitedElasticClient } from '../elasticsearch'
+import { getFromMultipleIndices } from './helpers/elasticsearch-helpers'
+
+type TranscriptIndex =
+  | 'transcripts_grch37'
+  | 'transcripts_grch38'
+  | 'transcripts_grch38_patched-2025-10-23--19-36'
+
+const TRANSCRIPT_INDICES: Record<ReferenceGenome, TranscriptIndex[]> = {
+  GRCh37: ['transcripts_grch37'],
+  GRCh38: ['transcripts_grch38', 'transcripts_grch38_patched-2025-10-23--19-36'],
 }
 
-export const fetchTranscriptById = async (es: any, transcriptId: any, referenceGenome: any) => {
-  try {
-    const response = await es.get({
-      // @ts-expect-error TS(7053) FIXME: Element implicitly has an 'any' type because expre... Remove this comment to see the full error message
-      index: TRANSCRIPT_INDICES[referenceGenome],
-      type: '_doc',
-      id: transcriptId,
-    })
+export const fetchTranscriptById = async (
+  esClient: LimitedElasticClient,
+  transcriptId: string,
+  referenceGenome: ReferenceGenome
+) => {
+  const indices = TRANSCRIPT_INDICES[referenceGenome]
+  const requests = indices.map(
+    (index) =>
+      esClient
+        .get({
+          index,
+          type: '_doc',
+          id: transcriptId,
+        })
+        .catch((err) => {
+          // meta will not be present if the request times out in the queue before reaching ES
+          if (err.meta && err.meta.body.found === false) {
+            return null
+          }
+          throw err
+        }) as Promise<GetResponse | null>
+  )
 
-    return response.body._source.value
-  } catch (err) {
-    // meta will not be present if the request times out in the queue before reaching ES
-    // @ts-expect-error TS(2571) FIXME: Object is of type 'unknown'.
-    if (err.meta && err.meta.body.found === false) {
-      return null
-    }
-    throw err
-  }
+  return getFromMultipleIndices(requests)
 }

From 555cdd3a72e0fa9fcb6173a9948e689bc5d021df Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Thu, 23 Oct 2025 11:02:16 -0400
Subject: [PATCH 7/9] Add VEP 115 warning to RNU4ATAC

---
 browser/src/GenePage/GeneFlags.spec.tsx       |  8 +++++++
 browser/src/GenePage/GeneFlags.tsx            | 12 +++++++++++
 .../__snapshots__/GeneFlags.spec.tsx.snap     | 21 +++++++++++++++++++
 3 files changed, 41 insertions(+)

diff --git a/browser/src/GenePage/GeneFlags.spec.tsx b/browser/src/GenePage/GeneFlags.spec.tsx
index 45d29ef28..b10c4400a 100644
--- a/browser/src/GenePage/GeneFlags.spec.tsx
+++ b/browser/src/GenePage/GeneFlags.spec.tsx
@@ -29,4 +29,12 @@ describe('GeneFlags', () => {
 
     expect(tree).toMatchSnapshot()
   })
+
+  test('renders VEP 115 warning for RNU4ATAC', () => {
+    const testGene = geneFactory.build({ symbol: 'RNU4ATAC', reference_genome: 'GRCh38' })
+
+    const tree = renderer.create(<GeneFlags gene={testGene} />)
+
+    expect(tree).toMatchSnapshot()
+  })
 })
diff --git a/browser/src/GenePage/GeneFlags.tsx b/browser/src/GenePage/GeneFlags.tsx
index 3150800a8..dcf08e848 100644
--- a/browser/src/GenePage/GeneFlags.tsx
+++ b/browser/src/GenePage/GeneFlags.tsx
@@ -13,11 +13,15 @@ type Props = {
 }
 
 const allOfUsCMRGGenes = ['CBS', 'KCNE1', 'CRYAA']
+const vep115Genes = ['RNU4ATAC']
 
 const GeneFlags = ({ gene }: Props) => {
   const shouldDisplayCMRGWarning =
     gene.reference_genome === 'GRCh38' && allOfUsCMRGGenes.includes(gene.symbol)
 
+  const shouldDisplayVEP115Warning =
+    gene.reference_genome === 'GRCh38' && vep115Genes.includes(gene.symbol)
+
   return (
     <>
       {shouldDisplayCMRGWarning && (
@@ -35,6 +39,14 @@ const GeneFlags = ({ gene }: Props) => {
           ) callset to remedy this issue in the future.
         </p>
       )}
+      {shouldDisplayVEP115Warning && (
+        <p>
+          <Badge level="warning">Warning</Badge> MANE Select and variant consequence information in
+          this gene were annotated using Ensembl VEP version 115 (GENCODE v49). For more
+          information, see our{' '}
+          <ExternalLink href="https://gnomad.broadinstitute.org/help/vep">help page</ExternalLink>.
+        </p>
+      )}
       {gene.flags.includes('chip') && (
         <p>
           <Badge level="warning">Note</Badge> Analysis of allele balance and age data indicates that
diff --git a/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap b/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap
index 72aeb4bab..4d1b9affe 100644
--- a/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap
+++ b/browser/src/GenePage/__snapshots__/GeneFlags.spec.tsx.snap
@@ -32,6 +32,27 @@ exports[`GeneFlags renders CMRG flag if one of 3 relevant genes 1`] = `
 </p>
 `;
 
+exports[`GeneFlags renders VEP 115 warning for RNU4ATAC 1`] = `
+<p>
+  <span
+    className="Badge__BadgeWrapper-sc-j4izdp-1 gRPPXC"
+  >
+    Warning
+  </span>
+   MANE Select and variant consequence information in this gene were annotated using Ensembl VEP version 115 (GENCODE v49). For more information, see our
+   
+  <a
+    className="Link-sc-14lgydv-0 Link__ExternalLink-sc-14lgydv-1 kswbwW"
+    href="https://gnomad.broadinstitute.org/help/vep"
+    rel="noopener noreferrer"
+    target="_blank"
+  >
+    help page
+  </a>
+  .
+</p>
+`;
+
 exports[`GeneFlags renders chip flag if present on gene 1`] = `
 <p>
   <span

From 5063a1bf30d13feac505be84515711bac81db498 Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Thu, 23 Oct 2025 12:22:37 -0400
Subject: [PATCH 8/9] Correct typo in identifier

---
 browser/src/GenePage/GeneInfo.tsx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/browser/src/GenePage/GeneInfo.tsx b/browser/src/GenePage/GeneInfo.tsx
index ccf461f22..526a9f9f6 100644
--- a/browser/src/GenePage/GeneInfo.tsx
+++ b/browser/src/GenePage/GeneInfo.tsx
@@ -23,12 +23,12 @@ type ManeSelectTranscriptIdProps = {
 }
 
 const ManeSelectTranscriptId = ({ gene }: ManeSelectTranscriptIdProps) => {
-  const gencodeVersionOfManeSelectTransript = gene.transcripts.find(
+  const gencodeVersionOfManeSelectTranscript = gene.transcripts.find(
     (transcript: any) => transcript.transcript_id === gene.mane_select_transcript.ensembl_id
   )
   const shouldLinkToTranscriptPage =
-    gencodeVersionOfManeSelectTransript &&
-    gencodeVersionOfManeSelectTransript.transcript_version ===
+    gencodeVersionOfManeSelectTranscript &&
+    gencodeVersionOfManeSelectTranscript.transcript_version ===
       gene.mane_select_transcript.ensembl_version
 
   return (

From 4a4e2b44635eb66a45ff690c92207720a8365fe6 Mon Sep 17 00:00:00 2001
From: Phil Darnowsky <pdarnows@broadinstitute.org>
Date: Thu, 23 Oct 2025 12:27:01 -0400
Subject: [PATCH 9/9] Tighten types around MANE select ID

---
 browser/src/GenePage/GeneInfo.tsx | 49 ++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/browser/src/GenePage/GeneInfo.tsx b/browser/src/GenePage/GeneInfo.tsx
index 526a9f9f6..a3bbaa35e 100644
--- a/browser/src/GenePage/GeneInfo.tsx
+++ b/browser/src/GenePage/GeneInfo.tsx
@@ -8,39 +8,40 @@ import Link from '../Link'
 import GeneReferences from './GeneReferences'
 
 type ManeSelectTranscriptIdProps = {
-  gene: {
-    mane_select_transcript: {
-      ensembl_id: string
-      ensembl_version: string
-      refseq_id: string
-      refseq_version: string
-    }
-    transcripts: {
-      transcript_id: string
-      transcript_version: string
-    }[]
+  mane_select_transcript: {
+    ensembl_id: string
+    ensembl_version: string
+    refseq_id: string
+    refseq_version: string
   }
+  transcripts: {
+    transcript_id: string
+    transcript_version: string
+  }[]
 }
 
-const ManeSelectTranscriptId = ({ gene }: ManeSelectTranscriptIdProps) => {
-  const gencodeVersionOfManeSelectTranscript = gene.transcripts.find(
-    (transcript: any) => transcript.transcript_id === gene.mane_select_transcript.ensembl_id
+const ManeSelectTranscriptId = ({
+  mane_select_transcript,
+  transcripts,
+}: ManeSelectTranscriptIdProps) => {
+  const gencodeVersionOfManeSelectTranscript = transcripts.find(
+    (transcript) => transcript.transcript_id === mane_select_transcript.ensembl_id
   )
   const shouldLinkToTranscriptPage =
     gencodeVersionOfManeSelectTranscript &&
     gencodeVersionOfManeSelectTranscript.transcript_version ===
-      gene.mane_select_transcript.ensembl_version
+      mane_select_transcript.ensembl_version
 
   return (
     <React.Fragment>
       {shouldLinkToTranscriptPage ? (
-        <Link to={`/transcript/${gene.mane_select_transcript.ensembl_id}`}>
-          {gene.mane_select_transcript.ensembl_id}.{gene.mane_select_transcript.ensembl_version}
+        <Link to={`/transcript/${mane_select_transcript.ensembl_id}`}>
+          {mane_select_transcript.ensembl_id}.{mane_select_transcript.ensembl_version}
         </Link>
       ) : (
-        `${gene.mane_select_transcript.ensembl_id}.${gene.mane_select_transcript.ensembl_version}`
+        `${mane_select_transcript.ensembl_id}.${mane_select_transcript.ensembl_version}`
       )}{' '}
-      / {gene.mane_select_transcript.refseq_id}.{gene.mane_select_transcript.refseq_version}
+      / {mane_select_transcript.refseq_id}.{mane_select_transcript.refseq_version}
     </React.Fragment>
   )
 }
@@ -109,8 +110,14 @@ const GeneInfo = ({ gene }: GeneInfoProps) => {
             </React.Fragment>
           }
         >
-          {/* @ts-expect-error TS(2322) FIXME: Type '{ gene_id: string; gene_version: string; sym... Remove this comment to see the full error message */}
-          {gene.mane_select_transcript ? <ManeSelectTranscriptId gene={gene} /> : 'Not available'}
+          {gene.mane_select_transcript ? (
+            <ManeSelectTranscriptId
+              mane_select_transcript={gene.mane_select_transcript}
+              transcripts={gene.transcripts}
+            />
+          ) : (
+            'Not available'
+          )}
         </AttributeListItem>
       )}