Skip to content

Commit

Permalink
adding files for TCap wdl
Browse files Browse the repository at this point in the history
  • Loading branch information
MicahR-Y committed Aug 26, 2024
1 parent d507386 commit 5381736
Show file tree
Hide file tree
Showing 10 changed files with 2,964 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ workflows:
primaryDescriptorPath: /PECGS-QUICviz/QUICviz.wdl
testParameterFiles:
- /PECGS-QUICviz/QUICviz.inputs.json
- name: BroadInternalRNAWithUMIs
subclass: WDL
primaryDescriptorPath: /TCapRNAPipeline/BroadInternalRNAWithUMIs.wdl
testParameterFiles:
- /TCapRNAPipeline/BroadInternalRNAWithUMIs.inputs.json
1 change: 1 addition & 0 deletions TCapRNAPipeline/BroadInternalRNAWithUMIs.inputs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"BroadInternalRNAWithUMIs.environment":"prod","BroadInternalRNAWithUMIs.library_name":"${this.library_name}","BroadInternalRNAWithUMIs.output_basename":"${this.collaborator_sample_id}","BroadInternalRNAWithUMIs.platform":"${this.platform}","BroadInternalRNAWithUMIs.platform_unit":"${this.platform_unit}","BroadInternalRNAWithUMIs.r1_fastq":"${this.fastq1}","BroadInternalRNAWithUMIs.r2_fastq":"${this.fastq2}","BroadInternalRNAWithUMIs.read1Structure":"${this.read1Structure}","BroadInternalRNAWithUMIs.read2Structure":"${this.read2Structure}","BroadInternalRNAWithUMIs.read_group_name":"${this.read_group_name}","BroadInternalRNAWithUMIs.reference_build":"${this.reference_build}","BroadInternalRNAWithUMIs.sample_lsid":"${this.sample_lsid}","BroadInternalRNAWithUMIs.sequencing_center":"${this.sequencing_center}","BroadInternalRNAWithUMIs.tdr_dataset_uuid":"${}","BroadInternalRNAWithUMIs.tdr_sample_id":"${}","BroadInternalRNAWithUMIs.vault_token_path":"gs://broad-dsp-gotc-arrays-prod-tokens/arrayswdl.token"}
205 changes: 205 additions & 0 deletions TCapRNAPipeline/BroadInternalRNAWithUMIs.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
version 1.0

import "./subworkflows/RNAWithUMIsPipeline.wdl" as RNAWithUMIs
import "./subworkflows/CheckFingerprint.wdl" as FP
import "./subworkflows/RNAWithUMIsTasks.wdl" as tasks
import "./subworkflows/Utilities.wdl" as utils

workflow BroadInternalRNAWithUMIs {

String pipeline_version = "1.0.33"

input {
# input needs to be either "hg19" or "hg38"
String reference_build

String sample_lsid

# RNAWithUMIs inputs
File r1_fastq
File r2_fastq
String read1Structure
String read2Structure
String output_basename

String platform
String library_name
String platform_unit
String read_group_name
String sequencing_center = "BI"

# Terra Data Repo dataset information
String? tdr_dataset_uuid
String? tdr_sample_id

String environment
File vault_token_path
}

File ref = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.fasta"
File refIndex = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.fasta.fai" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.fasta.fai"
File refDict = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.dict" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.dict"
File haplotype_database_file = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.haplotype_database.txt" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/Homo_sapiens_assembly38_noALT_noHLA_noDecoy.haplotype_database.txt"
File refFlat = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/Homo_sapiens_assembly19.refFlat.txt" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/hg38_GENCODE_v34_refFlat.txt"
File starIndex = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/star/STAR2.7.10a_genome_hg19_noALT_noHLA_noDecoy_v19_oh145.tar.gz" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/star/STAR2.7.10a_genome_GRCh38_noALT_noHLA_noDecoy_v34_oh145.tar.gz"
File gtf = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/gencode.v19.genes.v7.collapsed_only.patched_contigs.gtf" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode.v34.annotation_collapsed_only.gtf"
File ribosomalIntervals = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/Homo_sapiens_assembly19.rRNA.interval_list" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode_v34_rRNA.interval_list"
File exonBedFile = if (reference_build == "hg19") then "gs://gcp-public-data--broad-references/hg19/v0/annotation/gencode.v19.hg19.insert_size_intervals_geq1000bp.bed" else "gs://gcp-public-data--broad-references/Homo_sapiens_assembly38_noALT_noHLA_noDecoy/v0/annotation/gencode.v34.GRCh38.insert_size_intervals_geq1000bp.bed"
File population_vcf = if (reference_build == "hg19") then "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf" else "gs://gatk-best-practices/somatic-hg38/small_exac_common_3.hg38.vcf.gz"
File population_vcf_index = if (reference_build == "hg19") then "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf.idx" else "gs://gatk-best-practices/somatic-hg38/small_exac_common_3.hg38.vcf.gz.tbi"

parameter_meta {
reference_build: "String used to define the reference genome build; should be set to 'hg19' or 'hg38'"
sample_lsid: "The sample lsid (an identifier used to retrieve fingerrints from Mercury)"
r1_fastq: "Read 1 FASTQ file"
r2_fastq: "Read 2 FASTQ file"
read1Structure: "String describing how the bases in a sequencing run should be allocated into logical reads for read 1"
read2Structure: "String describing how the bases in a sequencing run should be allocated into logical reads for read 2"
output_basename: "String used as a prefix in workflow output files"
platform: "String used to describe the sequencing platform"
library_name: "String used to describe the library"
platform_unit: "String used to describe the platform unit"
read_group_name: "String used to describe the read group name"
sequencing_center: "String used to describe the sequencing center; default is set to 'BI'"
environment: "The environment (dev or prod) used for determining which service to use to retrieve Mercury fingerprints"
vault_token_path: "The path to the vault token used for accessing the Mercury Fingerprint Store"
tdr_dataset_uuid: "Optional string used to define the Terra Data Repo (TDR) dataset to which outputs will be ingested"
tdr_sample_id: "Optional string used to identify the sample being processed; this must be the primary key in the TDR dataset"
}

# make sure either hg19 or hg38 is supplied as reference_build input
if ((reference_build != "hg19") && (reference_build != "hg38")) {
call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
input:
message = "reference_build must be supplied with either 'hg19' or 'hg38'."
}
}

call RNAWithUMIs.RNAWithUMIsPipeline as RNAWithUMIs {
input:
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
read1Structure = read1Structure,
read2Structure = read2Structure,
starIndex = starIndex,
output_basename = output_basename,
gtf = gtf,
platform = platform,
library_name = library_name,
platform_unit = platform_unit,
read_group_name = read_group_name,
sequencing_center = sequencing_center,
ref = ref,
refIndex = refIndex,
refDict = refDict,
refFlat = refFlat,
ribosomalIntervals = ribosomalIntervals,
exonBedFile = exonBedFile,
population_vcf = population_vcf,
population_vcf_index = population_vcf_index
}

call FP.CheckFingerprint as CheckFingerprint {
input:
input_bam = RNAWithUMIs.output_bam,
input_bam_index = RNAWithUMIs.output_bam_index,
sample_alias = RNAWithUMIs.sample_name,
sample_lsid = sample_lsid,
output_basename = output_basename,
ref_fasta = ref,
ref_fasta_index = refIndex,
ref_dict = refDict,
read_fingerprint_from_mercury = true,
haplotype_database_file = haplotype_database_file,
environment = environment,
vault_token_path = vault_token_path,
allow_lod_zero = true
}

call tasks.MergeMetrics {
input:
alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics,
insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics,
picard_rna_metrics = RNAWithUMIs.picard_rna_metrics,
duplicate_metrics = RNAWithUMIs.duplicate_metrics,
rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics,
fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file,
output_basename = RNAWithUMIs.sample_name
}

if (defined(tdr_dataset_uuid) && defined(tdr_sample_id)) {
call tasks.formatPipelineOutputs {
input:
sample_id = select_first([tdr_sample_id, ""]),
transcriptome_bam = RNAWithUMIs.transcriptome_bam,
transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics,
output_bam = RNAWithUMIs.output_bam,
output_bam_index = RNAWithUMIs.output_bam_index,
duplicate_metrics = RNAWithUMIs.duplicate_metrics,
rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm,
rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts,
rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts,
rnaseqc2_fragment_size_histogram = RNAWithUMIs.rnaseqc2_fragment_size_histogram,
rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics,
picard_rna_metrics = RNAWithUMIs.picard_rna_metrics,
picard_alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics,
picard_insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics,
picard_insert_size_histogram = RNAWithUMIs.picard_insert_size_histogram,
picard_base_distribution_by_cycle_metrics = RNAWithUMIs.picard_base_distribution_by_cycle_metrics,
picard_base_distribution_by_cycle_pdf = RNAWithUMIs.picard_base_distribution_by_cycle_pdf,
picard_quality_by_cycle_metrics = RNAWithUMIs.picard_quality_by_cycle_metrics,
picard_quality_by_cycle_pdf = RNAWithUMIs.picard_quality_by_cycle_pdf,
picard_quality_distribution_metrics = RNAWithUMIs.picard_quality_distribution_metrics,
picard_quality_distribution_pdf = RNAWithUMIs.picard_quality_distribution_pdf,
picard_fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file,
picard_fingerprint_detail_metrics = CheckFingerprint.fingerprint_detail_metrics_file,
unified_metrics = MergeMetrics.unified_metrics,
contamination = RNAWithUMIs.contamination,
contamination_error = RNAWithUMIs.contamination_error,
fastqc_html_report = RNAWithUMIs.fastqc_html_report,
fastqc_percent_reads_with_adapter = RNAWithUMIs.fastqc_percent_reads_with_adapter
}

call tasks.updateOutputsInTDR {
input:
tdr_dataset_uuid = select_first([tdr_dataset_uuid, ""]),
outputs_json = formatPipelineOutputs.pipeline_outputs_json
}
}

output {
File transcriptome_bam = RNAWithUMIs.transcriptome_bam
File output_bam = RNAWithUMIs.output_bam
File output_bam_index = RNAWithUMIs.output_bam_index

File duplicate_metrics = RNAWithUMIs.duplicate_metrics
File transcriptome_duplicate_metrics = RNAWithUMIs.transcriptome_duplicate_metrics

File rnaseqc2_gene_tpm = RNAWithUMIs.rnaseqc2_gene_tpm
File rnaseqc2_gene_counts = RNAWithUMIs.rnaseqc2_gene_counts
File rnaseqc2_exon_counts = RNAWithUMIs.rnaseqc2_exon_counts
File rnaseqc2_fragment_size_histogram = RNAWithUMIs.rnaseqc2_fragment_size_histogram
File rnaseqc2_metrics = RNAWithUMIs.rnaseqc2_metrics
File picard_rna_metrics = RNAWithUMIs.picard_rna_metrics
File picard_alignment_summary_metrics = RNAWithUMIs.picard_alignment_summary_metrics
File picard_insert_size_metrics = RNAWithUMIs.picard_insert_size_metrics
File picard_insert_size_histogram = RNAWithUMIs.picard_insert_size_histogram
File picard_base_distribution_by_cycle_metrics = RNAWithUMIs.picard_base_distribution_by_cycle_metrics
File picard_base_distribution_by_cycle_pdf = RNAWithUMIs.picard_base_distribution_by_cycle_pdf
File picard_quality_by_cycle_metrics = RNAWithUMIs.picard_quality_by_cycle_metrics
File picard_quality_by_cycle_pdf = RNAWithUMIs.picard_quality_by_cycle_pdf
File picard_quality_distribution_metrics = RNAWithUMIs.picard_quality_distribution_metrics
File picard_quality_distribution_pdf = RNAWithUMIs.picard_quality_distribution_pdf
File? picard_fingerprint_summary_metrics = CheckFingerprint.fingerprint_summary_metrics_file
File? picard_fingerprint_detail_metrics = CheckFingerprint.fingerprint_detail_metrics_file
File unified_metrics = MergeMetrics.unified_metrics
Float contamination = RNAWithUMIs.contamination
Float contamination_error = RNAWithUMIs.contamination_error
File fastqc_html_report = RNAWithUMIs.fastqc_html_report
Float fastqc_percent_reads_with_adapter = RNAWithUMIs.fastqc_percent_reads_with_adapter
}

meta {
allowNestedInputs: true
}
}
130 changes: 130 additions & 0 deletions TCapRNAPipeline/subworkflows/CheckFingerprint.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
version 1.0

import "./Utilities.wdl" as utils
import "./InternalTasks.wdl" as InternalTasks
import "./Qc.wdl" as Qc


## Copyright Broad Institute, 2022
##
## This WDL pipeline implements A CheckFingerprint Task
## It runs the Picard tool 'CheckFingerprint' against a supplied input file (VCF, CRAM, BAM or SAM) using a set of 'fingerprint' genotypes.
## These genotypes can either be generated by pulling them from the (Broad-internal) Mercury Fingerprint Store or be supplied as inputs to the pipeline.
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script. Please see the docker
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
## licensing information pertaining to the included programs.
workflow CheckFingerprint {

String pipeline_version = "1.0.20"

input {
File? input_vcf
File? input_vcf_index
File? input_bam
File? input_bam_index

# The name of the sample in the input_vcf. Not required if there is only one sample in the VCF
String? input_sample_alias

# If this is true, we will read fingerprints from Mercury
# Otherwise, we will use the optional input fingerprint VCFs below
Boolean read_fingerprint_from_mercury = false
File? fingerprint_genotypes_vcf
File? fingerprint_genotypes_vcf_index

String? sample_lsid
String sample_alias

String output_basename

File ref_fasta
File ref_fasta_index
File ref_dict

File haplotype_database_file
Boolean allow_lod_zero = false

String? environment
File? vault_token_path
}

if (defined(input_vcf) && defined(input_bam)) {
call utils.ErrorWithMessage as ErrorMessageDoubleInput {
input:
message = "input_vcf and input_bam cannot both be defined as input"
}
}

if (read_fingerprint_from_mercury && (!defined(sample_lsid) || !defined(environment) || !defined(vault_token_path))) {
call utils.ErrorWithMessage as ErrorMessageIncompleteForReadingFromMercury {
input:
message = "sample_lsid, environment, and vault_token_path must defined when reading from Mercury"
}
}

# sample_alias may contain spaces, so make a filename-safe version for the downloaded fingerprint file
call InternalTasks.MakeSafeFilename {
input:
name = sample_alias
}

if (read_fingerprint_from_mercury) {
call InternalTasks.DownloadGenotypes {
input:
sample_alias = sample_alias,
sample_lsid = select_first([sample_lsid]),
output_vcf_base_name = MakeSafeFilename.output_safe_name + ".reference.fingerprint",
haplotype_database_file = haplotype_database_file,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
ref_dict = ref_dict,
environment = select_first([environment]),
vault_token_path = select_first([vault_token_path])
}
}

Boolean fingerprint_downloaded_from_mercury = select_first([DownloadGenotypes.fingerprint_retrieved, false])

File? fingerprint_vcf_to_use = if (fingerprint_downloaded_from_mercury) then DownloadGenotypes.reference_fingerprint_vcf else fingerprint_genotypes_vcf
File? fingerprint_vcf_index_to_use = if (fingerprint_downloaded_from_mercury) then DownloadGenotypes.reference_fingerprint_vcf_index else fingerprint_genotypes_vcf_index

if ((defined(fingerprint_vcf_to_use)) && (defined(input_vcf) || defined(input_bam))) {
call Qc.CheckFingerprintTask {
input:
input_bam = input_bam,
input_bam_index = input_bam_index,
input_vcf = input_vcf,
input_vcf_index = input_vcf_index,
input_sample_alias = input_sample_alias,
genotypes = select_first([fingerprint_vcf_to_use]),
genotypes_index = fingerprint_vcf_index_to_use,
expected_sample_alias = sample_alias,
output_basename = output_basename,
haplotype_database_file = haplotype_database_file,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
allow_lod_zero = allow_lod_zero
}
}

output {
Boolean fingerprint_read_from_mercury = fingerprint_downloaded_from_mercury
File? reference_fingerprint_vcf = fingerprint_vcf_to_use
File? reference_fingerprint_vcf_index = fingerprint_vcf_index_to_use
File? fingerprint_summary_metrics_file = CheckFingerprintTask.summary_metrics
File? fingerprint_detail_metrics_file = CheckFingerprintTask.detail_metrics
Float? lod_score = CheckFingerprintTask.lod
}
meta {
allowNestedInputs: true
}
}
Loading

0 comments on commit 5381736

Please sign in to comment.