-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
2,964 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"BroadInternalRNAWithUMIs.environment":"prod","BroadInternalRNAWithUMIs.library_name":"${this.library_name}","BroadInternalRNAWithUMIs.output_basename":"${this.collaborator_sample_id}","BroadInternalRNAWithUMIs.platform":"${this.platform}","BroadInternalRNAWithUMIs.platform_unit":"${this.platform_unit}","BroadInternalRNAWithUMIs.r1_fastq":"${this.fastq1}","BroadInternalRNAWithUMIs.r2_fastq":"${this.fastq2}","BroadInternalRNAWithUMIs.read1Structure":"${this.read1Structure}","BroadInternalRNAWithUMIs.read2Structure":"${this.read2Structure}","BroadInternalRNAWithUMIs.read_group_name":"${this.read_group_name}","BroadInternalRNAWithUMIs.reference_build":"${this.reference_build}","BroadInternalRNAWithUMIs.sample_lsid":"${this.sample_lsid}","BroadInternalRNAWithUMIs.sequencing_center":"${this.sequencing_center}","BroadInternalRNAWithUMIs.tdr_dataset_uuid":"${}","BroadInternalRNAWithUMIs.tdr_sample_id":"${}","BroadInternalRNAWithUMIs.vault_token_path":"gs://broad-dsp-gotc-arrays-prod-tokens/arrayswdl.token"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
version 1.0 | ||
|
||
import "./subworkflows/RNAWithUMIsPipeline.wdl" as RNAWithUMIs | ||
import "./subworkflows/CheckFingerprint.wdl" as FP | ||
import "./subworkflows/RNAWithUMIsTasks.wdl" as tasks | ||
import "./subworkflows/Utilities.wdl" as utils | ||
|
||
workflow BroadInternalRNAWithUMIs { | ||
|
||
String pipeline_version = "1.0.33" | ||
|
||
input { | ||
# input needs to be either "hg19" or "hg38" | ||
String reference_build | ||
|
||
String sample_lsid | ||
|
||
# RNAWithUMIs inputs | ||
File r1_fastq | ||
File r2_fastq | ||
String read1Structure | ||
String read2Structure | ||
String output_basename | ||
|
||
String platform | ||
String library_name | ||
String platform_unit | ||
String read_group_name | ||
String sequencing_center = "BI" | ||
|
||
# Terra Data Repo dataset information | ||
String? tdr_dataset_uuid | ||
String? tdr_sample_id | ||
|
||
String environment | ||
File vault_token_path | ||
} | ||
|
||
File ref = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.fasta" | ||
File refIndex = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.fasta.fai" | ||
File refDict = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.dict" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.dict" | ||
File haplotype_database_file = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.haplotype_database.txt" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.haplotype_database.txt" | ||
File refFlat = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/Homo_sapiens_assembly19.refFlat.txt" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/hg38_GENCODE_v34_refFlat.txt" | ||
File starIndex = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/star/STAR2.7.10a_genome_hg19_noALT_noHLA_noDecoy_v19_oh145.tar.gz" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/star/STAR2.7.10a_genome_GRCh38_noALT_noHLA_noDecoy_v34_oh145.tar.gz" | ||
File gtf = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/gencode.v19.genes.v7.collapsed_only.patched_contigs.gtf" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode.v34.annotation_collapsed_only.gtf" | ||
File ribosomalIntervals = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/Homo_sapiens_assembly19.rRNA.interval_list" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode_v34_rRNA.interval_list" | ||
File exonBedFile = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/gencode.v19.hg19.insert_size_intervals_geq1000bp.bed" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode.v34.GRCh38.insert_size_intervals_geq1000bp.bed" | ||
File population_vcf = if (reference_build == "hg19") then "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf" else "gs://gatk-best-practices/somatic-hg38/small_exac_common_3.hg38.vcf.gz" | ||
File population_vcf_index = if (reference_build == "hg19") then "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf.idx" else "gs://gatk-best-practices/somatic-hg38/small_exac_common_3.hg38.vcf.gz.tbi" | ||
|
||
parameter_meta { | ||
reference_build: "String used to define the reference genome build; should be set to 'hg19' or 'hg38'" | ||
sample_lsid: "The sample lsid (an identifier used to retrieve fingerrints from Mercury)" | ||
r1_fastq: "Read 1 FASTQ file" | ||
r2_fastq: "Read 2 FASTQ file" | ||
read1Structure: "String describing how the bases in a sequencing run should be allocated into logical reads for read 1" | ||
read2Structure: "String describing how the bases in a sequencing run should be allocated into logical reads for read 2" | ||
output_basename: "String used as a prefix in workflow output files" | ||
platform: "String used to describe the sequencing platform" | ||
library_name: "String used to describe the library" | ||
platform_unit: "String used to describe the platform unit" | ||
read_group_name: "String used to describe the read group name" | ||
sequencing_center: "String used to describe the sequencing center; default is set to 'BI'" | ||
environment: "The environment (dev or prod) used for determining which service to use to retrieve Mercury fingerprints" | ||
vault_token_path: "The path to the vault token used for accessing the Mercury Fingerprint Store" | ||
tdr_dataset_uuid: "Optional string used to define the Terra Data Repo (TDR) dataset to which outputs will be ingested" | ||
tdr_sample_id: "Optional string used to identify the sample being processed; this must be the primary key in the TDR dataset" | ||
} | ||
|
||
# make sure either hg19 or hg38 is supplied as reference_build input | ||
if ((reference_build != "hg19") && (reference_build != "hg38")) { | ||
call utils.ErrorWithMessage as ErrorMessageIncorrectInput { | ||
input: | ||
message = "reference_build must be supplied with either 'hg19' or 'hg38'." | ||
} | ||
} | ||
|
||
call RNAWithUMIs.RNAWithUMIsPipeline as RNAWithUMIs { | ||
input: | ||
r1_fastq = r1_fastq, | ||
r2_fastq = r2_fastq, | ||
read1Structure = read1Structure, | ||
read2Structure = read2Structure, | ||
starIndex = starIndex, | ||
output_basename = output_basename, | ||
gtf = gtf, | ||
platform = platform, | ||
library_name = library_name, | ||
platform_unit = platform_unit, | ||
read_group_name = read_group_name, | ||
sequencing_center = sequencing_center, | ||
ref = ref, | ||
refIndex = refIndex, | ||
refDict = refDict, | ||
refFlat = refFlat, | ||
ribosomalIntervals = ribosomalIntervals, | ||
exonBedFile = exonBedFile, | ||
population_vcf = population_vcf, | ||
population_vcf_index = population_vcf_index | ||
} | ||
|
||
call FP.CheckFingerprint as CheckFingerprint { | ||
input: | ||
input_bam = RNAWithUMIs.output_bam, | ||
input_bam_index = RNAWithUMIs.output_bam_index, | ||
sample_alias = RNAWithUMIs.sample_name, | ||
sample_lsid = sample_lsid, | ||
output_basename = output_basename, | ||
ref_fasta = ref, | ||
ref_fasta_index = refIndex, | ||
ref_dict = refDict, | ||
read_fingerprint_from_mercury = true, | ||
haplotype_database_file = haplotype_database_file, | ||
environment = environment, | ||
vault_token_path = vault_token_path, | ||
allow_lod_zero = true | ||
} | ||
|
||
call tasks.MergeMetrics { | ||
input: | ||
alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics, | ||
insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics, | ||
picard_rna_metrics = RNAWithUMIs.picard_rna_metrics, | ||
duplicate_metrics = RNAWithUMIs.duplicate_metrics, | ||
rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics, | ||
fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file, | ||
output_basename = RNAWithUMIs.sample_name | ||
} | ||
|
||
if (defined(tdr_dataset_uuid) && defined(tdr_sample_id)) { | ||
call tasks.formatPipelineOutputs { | ||
input: | ||
sample_id = select_first([tdr_sample_id, ""]), | ||
transcriptome_bam = RNAWithUMIs.transcriptome_bam, | ||
transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics, | ||
output_bam = RNAWithUMIs.output_bam, | ||
output_bam_index = RNAWithUMIs.output_bam_index, | ||
duplicate_metrics = RNAWithUMIs.duplicate_metrics, | ||
rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm, | ||
rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts, | ||
rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts, | ||
rnaseqc2_fragment_size_histogram = RNAWithUMIs.rnaseqc2_fragment_size_histogram, | ||
rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics, | ||
picard_rna_metrics = RNAWithUMIs.picard_rna_metrics, | ||
picard_alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics, | ||
picard_insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics, | ||
picard_insert_size_histogram = RNAWithUMIs.picard_insert_size_histogram, | ||
picard_base_distribution_by_cycle_metrics = RNAWithUMIs.picard_base_distribution_by_cycle_metrics, | ||
picard_base_distribution_by_cycle_pdf = RNAWithUMIs.picard_base_distribution_by_cycle_pdf, | ||
picard_quality_by_cycle_metrics = RNAWithUMIs.picard_quality_by_cycle_metrics, | ||
picard_quality_by_cycle_pdf = RNAWithUMIs.picard_quality_by_cycle_pdf, | ||
picard_quality_distribution_metrics = RNAWithUMIs.picard_quality_distribution_metrics, | ||
picard_quality_distribution_pdf = RNAWithUMIs.picard_quality_distribution_pdf, | ||
picard_fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file, | ||
picard_fingerprint_detail_metrics = CheckFingerprint.fingerprint_detail_metrics_file, | ||
unified_metrics = MergeMetrics.unified_metrics, | ||
contamination = RNAWithUMIs.contamination, | ||
contamination_error = RNAWithUMIs.contamination_error, | ||
fastqc_html_report = RNAWithUMIs.fastqc_html_report, | ||
fastqc_percent_reads_with_adapter = RNAWithUMIs.fastqc_percent_reads_with_adapter | ||
} | ||
|
||
call tasks.updateOutputsInTDR { | ||
input: | ||
tdr_dataset_uuid = select_first([tdr_dataset_uuid, ""]), | ||
outputs_json = formatPipelineOutputs.pipeline_outputs_json | ||
} | ||
} | ||
|
||
output { | ||
File transcriptome_bam = RNAWithUMIs.transcriptome_bam | ||
File output_bam = RNAWithUMIs.output_bam | ||
File output_bam_index = RNAWithUMIs.output_bam_index | ||
|
||
File duplicate_metrics = RNAWithUMIs.duplicate_metrics | ||
File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics | ||
|
||
File rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm | ||
File rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts | ||
File rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts | ||
File rnaseqc2_fragment_size_histogram = RNAWithUMIs.rnaseqc2_fragment_size_histogram | ||
File rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics | ||
File picard_rna_metrics = RNAWithUMIs.picard_rna_metrics | ||
File picard_alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics | ||
File picard_insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics | ||
File picard_insert_size_histogram = RNAWithUMIs.picard_insert_size_histogram | ||
File picard_base_distribution_by_cycle_metrics = RNAWithUMIs.picard_base_distribution_by_cycle_metrics | ||
File picard_base_distribution_by_cycle_pdf = RNAWithUMIs.picard_base_distribution_by_cycle_pdf | ||
File picard_quality_by_cycle_metrics = RNAWithUMIs.picard_quality_by_cycle_metrics | ||
File picard_quality_by_cycle_pdf = RNAWithUMIs.picard_quality_by_cycle_pdf | ||
File picard_quality_distribution_metrics = RNAWithUMIs.picard_quality_distribution_metrics | ||
File picard_quality_distribution_pdf = RNAWithUMIs.picard_quality_distribution_pdf | ||
File? picard_fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file | ||
File? picard_fingerprint_detail_metrics = CheckFingerprint.fingerprint_detail_metrics_file | ||
File unified_metrics = MergeMetrics.unified_metrics | ||
Float contamination = RNAWithUMIs.contamination | ||
Float contamination_error = RNAWithUMIs.contamination_error | ||
File fastqc_html_report = RNAWithUMIs.fastqc_html_report | ||
Float fastqc_percent_reads_with_adapter = RNAWithUMIs.fastqc_percent_reads_with_adapter | ||
} | ||
|
||
meta { | ||
allowNestedInputs: true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
version 1.0 | ||
|
||
import "./Utilities.wdl" as utils | ||
import "./InternalTasks.wdl" as InternalTasks | ||
import "./Qc.wdl" as Qc | ||
|
||
|
||
## Copyright Broad Institute, 2022 | ||
## | ||
## This WDL pipeline implements A CheckFingerprint Task | ||
## It runs the Picard tool 'CheckFingerprint' against a supplied input file (VCF, CRAM, BAM or SAM) using a set of 'fingerprint' genotypes. | ||
## These genotypes can either be generated by pulling them from the (Broad-internal) Mercury Fingerprint Store or be supplied as inputs to the pipeline. | ||
## | ||
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. | ||
## For program versions, see docker containers. | ||
## | ||
## LICENSING : | ||
## This script is released under the WDL source code license (BSD-3) (see LICENSE in | ||
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may | ||
## be subject to different licenses. Users are responsible for checking that they are | ||
## authorized to run all programs before running this script. Please see the docker | ||
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed | ||
## licensing information pertaining to the included programs. | ||
workflow CheckFingerprint { | ||
|
||
String pipeline_version = "1.0.20" | ||
|
||
input { | ||
File? input_vcf | ||
File? input_vcf_index | ||
File? input_bam | ||
File? input_bam_index | ||
|
||
# The name of the sample in the input_vcf. Not required if there is only one sample in the VCF | ||
String? input_sample_alias | ||
|
||
# If this is true, we will read fingerprints from Mercury | ||
# Otherwise, we will use the optional input fingerprint VCFs below | ||
Boolean read_fingerprint_from_mercury = false | ||
File? fingerprint_genotypes_vcf | ||
File? fingerprint_genotypes_vcf_index | ||
|
||
String? sample_lsid | ||
String sample_alias | ||
|
||
String output_basename | ||
|
||
File ref_fasta | ||
File ref_fasta_index | ||
File ref_dict | ||
|
||
File haplotype_database_file | ||
Boolean allow_lod_zero = false | ||
|
||
String? environment | ||
File? vault_token_path | ||
} | ||
|
||
if (defined(input_vcf) && defined(input_bam)) { | ||
call utils.ErrorWithMessage as ErrorMessageDoubleInput { | ||
input: | ||
message = "input_vcf and input_bam cannot both be defined as input" | ||
} | ||
} | ||
|
||
if (read_fingerprint_from_mercury && (!defined(sample_lsid) || !defined(environment) || !defined(vault_token_path))) { | ||
call utils.ErrorWithMessage as ErrorMessageIncompleteForReadingFromMercury { | ||
input: | ||
message = "sample_lsid, environment, and vault_token_path must defined when reading from Mercury" | ||
} | ||
} | ||
|
||
# sample_alias may contain spaces, so make a filename-safe version for the downloaded fingerprint file | ||
call InternalTasks.MakeSafeFilename { | ||
input: | ||
name = sample_alias | ||
} | ||
|
||
if (read_fingerprint_from_mercury) { | ||
call InternalTasks.DownloadGenotypes { | ||
input: | ||
sample_alias = sample_alias, | ||
sample_lsid = select_first([sample_lsid]), | ||
output_vcf_base_name = MakeSafeFilename.output_safe_name + ".reference.fingerprint", | ||
haplotype_database_file = haplotype_database_file, | ||
ref_fasta = ref_fasta, | ||
ref_fasta_index = ref_fasta_index, | ||
ref_dict = ref_dict, | ||
environment = select_first([environment]), | ||
vault_token_path = select_first([vault_token_path]) | ||
} | ||
} | ||
|
||
Boolean fingerprint_downloaded_from_mercury = select_first([DownloadGenotypes.fingerprint_retrieved, false]) | ||
|
||
File? fingerprint_vcf_to_use = if (fingerprint_downloaded_from_mercury) then DownloadGenotypes.reference_fingerprint_vcf else fingerprint_genotypes_vcf | ||
File? fingerprint_vcf_index_to_use = if (fingerprint_downloaded_from_mercury) then DownloadGenotypes.reference_fingerprint_vcf_index else fingerprint_genotypes_vcf_index | ||
|
||
if ((defined(fingerprint_vcf_to_use)) && (defined(input_vcf) || defined(input_bam))) { | ||
call Qc.CheckFingerprintTask { | ||
input: | ||
input_bam = input_bam, | ||
input_bam_index = input_bam_index, | ||
input_vcf = input_vcf, | ||
input_vcf_index = input_vcf_index, | ||
input_sample_alias = input_sample_alias, | ||
genotypes = select_first([fingerprint_vcf_to_use]), | ||
genotypes_index = fingerprint_vcf_index_to_use, | ||
expected_sample_alias = sample_alias, | ||
output_basename = output_basename, | ||
haplotype_database_file = haplotype_database_file, | ||
ref_fasta = ref_fasta, | ||
ref_fasta_index = ref_fasta_index, | ||
allow_lod_zero = allow_lod_zero | ||
} | ||
} | ||
|
||
output { | ||
Boolean fingerprint_read_from_mercury = fingerprint_downloaded_from_mercury | ||
File? reference_fingerprint_vcf = fingerprint_vcf_to_use | ||
File? reference_fingerprint_vcf_index = fingerprint_vcf_index_to_use | ||
File? fingerprint_summary_metrics_file = CheckFingerprintTask.summary_metrics | ||
File? fingerprint_detail_metrics_file = CheckFingerprintTask.detail_metrics | ||
Float? lod_score = CheckFingerprintTask.lod | ||
} | ||
meta { | ||
allowNestedInputs: true | ||
} | ||
} |
Oops, something went wrong.