Skip to content

Commit bce7252

Browse files
Build RNU4ATAC transcript patches and export to ES
1 parent 32813e5 commit bce7252

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
from data_pipeline.pipelines.gnomad_v4_lof_curation_results import pipeline as gnomad_v4_lof_curation_results_pipeline
4343

4444
from data_pipeline.pipelines.gene_patches import pipeline as gnomad_v4_gene_patches
45+
from data_pipeline.pipelines.transcript_patches import pipeline as gnomad_v4_transcript_patches
46+
4547

4648
logger = logging.getLogger("gnomad_data_pipeline")
4749

@@ -119,6 +121,17 @@ def add_liftover_document_id(ds):
119121
"block_size": 1_000,
120122
},
121123
},
124+
"transcripts_grch38_patched": {
125+
"get_table": lambda: hl.read_table(
126+
gnomad_v4_transcript_patches.get_output("transcripts_grch38_patched").get_output_path()
127+
),
128+
"args": {
129+
"index": "transcripts_grch38_patched",
130+
"index_fields": ["transcript_id"],
131+
"id_field": "transcript_id",
132+
"block_size": 1_000,
133+
},
134+
},
122135
##############################################################################################################
123136
# gnomAD v4
124137
##############################################################################################################
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from data_pipeline.pipeline import Pipeline, run_pipeline
2+
3+
from data_pipeline.data_types.transcript import extract_transcripts
4+
from data_pipeline.helpers import annotate_table
5+
6+
pipeline = Pipeline()
7+
8+
pipeline.add_task(
9+
"extract_patched_transcripts",
10+
extract_transcripts,
11+
"/transcripts/transcripts_grch38_patched_base.ht",
12+
{"genes_path": "gs://gnomad-browser-data-pipeline/phil-scratch/output/genes/genes_grch38_patched.ht"},
13+
)
14+
15+
pipeline.add_task(
16+
"annotate_patched_transcripts",
17+
annotate_table,
18+
"/transcripts/transcripts_grch38_annotated_1.ht",
19+
{
20+
"table_path": pipeline.get_task("extract_patched_transcripts"),
21+
"gnomad_constraint": "gs://gnomad-v4-data-pipeline/output/constraint/gnomad_v4_constraint.ht",
22+
},
23+
)
24+
25+
pipeline.set_outputs({"transcripts_grch38_patched": "annotate_patched_transcripts"})
26+
27+
if __name__ == "__main__":
28+
run_pipeline(pipeline)

0 commit comments

Comments
 (0)