diff --git a/CoverageProfiler/CoverageProfiler.wdl b/CoverageProfiler/CoverageProfiler.wdl index 03b1f5e..8328eaf 100644 --- a/CoverageProfiler/CoverageProfiler.wdl +++ b/CoverageProfiler/CoverageProfiler.wdl @@ -58,10 +58,7 @@ workflow coverageProfile { Float? DepthOfCoverageMeanCoverage = DepthOfCoverage.mean_coverage File? SamtoolsDepthProfile = SamtoolsDepth.depth_profile File? SamtoolsCovProfilePlot = CovProfileViz.cov_profile_plot - Float? SamtoolsAvgChrCovStd = CovProfileViz.avg_chr_cov_std - File? SamtoolsAvgChrCovPerChr = CovProfileViz.avg_chr_cov_per_chr Float? SamtoolsAvgCovMean = CovProfileViz.avg_cov_mean - File? SamtoolsAvgChrCovPerChrPlot = CovProfileViz.avg_chr_cov_per_chr_plot } meta { author: "Yueyao Gao" @@ -150,7 +147,9 @@ workflow coverageProfile { Int? mem_gb Int? cpu Int? disk_size_gb - String samtools_docker = "euformatics/samtools:1.20" + Int preemptible = 1 + Int maxRetries = 3 + String samtools_docker = "quay.io/biocontainers/samtools:1.20--h50ea8bc_0" } command <<< # Create directories for input & output @@ -177,8 +176,8 @@ workflow coverageProfile { cpu: select_first([cpu, 1]) docker: samtools_docker disks: "local-disk ~{disk_size_gb} SSD" - preemptible: 0 - maxRetries: 3 + preemptible: preemptible + maxRetries: maxRetries } } task CovProfileViz { @@ -189,7 +188,8 @@ workflow coverageProfile { String CovProfileViz_docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.0" Int mem_gb = 32 Int? cpu - Int? preemptible = 3 + Int? preemptible = 1 + Int MaxRetries = 3 Int? disk_size_gb = 500 } command <<< @@ -205,16 +205,10 @@ workflow coverageProfile { -o output mv output/*_samtools_cov_with_gc.png output/~{sampleName}Sample_Cov_profile.png - mv output/*_avg_cov_per_chr.png output/~{sampleName}Avg_Cov_per_chr.png - mv output/*_avg_cov_std.txt output/~{sampleName}_Per_Chr_Cov_std.txt - mv output/*_avg_cov_per_chr.csv output/~{sampleName}_Per_Chr_Avg_cov.csv mv output/*_avg_cov_mean.txt output/~{sampleName}_Avg_Cov_mean.txt >>> output { File cov_profile_plot = "output/~{sampleName}Sample_Cov_profile.png" - File avg_chr_cov_per_chr_plot = "output/~{sampleName}Avg_Cov_per_chr.png" - Float avg_chr_cov_std = read_float("output/~{sampleName}_Per_Chr_Cov_std.txt") - File avg_chr_cov_per_chr = "output/~{sampleName}_Per_Chr_Avg_cov.csv" Float avg_cov_mean = read_float("output/~{sampleName}_Avg_Cov_mean.txt") } runtime { @@ -223,6 +217,6 @@ workflow coverageProfile { docker: CovProfileViz_docker disks: "local-disk ~{disk_size_gb} SSD" preemptible: preemptible - maxRetries: 3 + maxRetries: MaxRetries } } \ No newline at end of file diff --git a/CoverageProfiler/README.md b/CoverageProfiler/README.md new file mode 100644 index 0000000..ad20c65 --- /dev/null +++ b/CoverageProfiler/README.md @@ -0,0 +1,93 @@ +# CoverageProfile WDL Workflow + +## Overview + +The `coverageProfile` workflow calculates the depth of coverage of an input sample and visualizes it. It supports two tools for depth calculation: Samtools and DepthOfCoverage (GATK). Visualization is available for Samtools and Exome data. + +## Inputs + +- `String sampleName` - The name of the sample. +- `String coverageTool` - The tool to use for coverage calculation. Default is "Samtools". +- `File alignedBam` - BAM file with aligned reads. +- `File alignedBamIndex` - Index file for the BAM. +- `File referenceFasta` - Reference genome in FASTA format. +- `File referenceDict` - Dictionary file for the reference genome. +- `File referenceFai` - Index file for the reference genome. +- `File intervals` - Intervals file. +- `File? interval_GCcontent_track` - (Optional) GC content track for visualization. +- `Int MinBaseQuality` - Minimum base quality for coverage calculation. Default is 20. +- `Int MinMappingQuality` - Minimum mapping quality for coverage calculation. Default is 20. +- `Boolean visualise_coverage` - Whether to visualize the coverage. Default is false. + +## Outputs + +- `File? DepthOfCoverageIntervalCov` - Depth of coverage interval summary (only for DepthOfCoverage tool). +- `Float? DepthOfCoverageMeanCoverage` - Mean coverage from DepthOfCoverage tool. +- `File? SamtoolsDepthProfile` - Depth profile generated by Samtools. +- `File? SamtoolsCovProfilePlot` - Coverage profile plot generated by visualization task. +- `Float? SamtoolsAvgCovMean` - Average coverage mean from Samtools Depth. + + +## Workflow + +### Main Workflow + +1. **Choose Tool:** Based on the `coverageTool` input, the workflow branches to use either Samtools or DepthOfCoverage. + +2. **Samtools Workflow:** + - Convert intervals to BED format using `IntervalListToBed`. + - Calculate depth using `SamtoolsDepth`. + - If `visualise_coverage` is true, visualize the coverage using `CovProfileViz`. The visualization requires the depth profile and a GC content track. It's currently only work with exome data. + +3. **DepthOfCoverage Workflow:** + - Calculate depth using `DepthOfCoverage`. + +### Tasks + +#### DepthOfCoverage + +Calculates the depth of coverage using GATK's DepthOfCoverage tool. + +**Inputs:** +- Various inputs for the BAM file, reference genome, intervals, and quality thresholds. + +**Outputs:** +- `File sample_interval_summary` - Interval summary. +- `Float mean_coverage` - Mean coverage. + +#### IntervalListToBed + +Converts an interval list to a BED file. + +**Inputs:** +- `File intervals` - Intervals file. + +**Outputs:** +- `File bed_intervals` - Converted BED file. + +#### SamtoolsDepth + +Calculates depth of coverage using Samtools. + +**Inputs:** +- Various inputs for the BAM file, intervals, and quality thresholds. + +**Outputs:** +- `File depth_profile` - Depth profile. + +#### CovProfileViz + +Visualizes the coverage profile. + +**Inputs:** +- `File SamtoolsDepthProfile` - Depth profile from Samtools. +- `File? GCcontentTrack` - Optional GC content track. +- Other parameters for customization. + +**Outputs:** +- `File cov_profile_plot` - Coverage profile plot. +- `File avg_chr_cov_per_chr_plot` - Plot of average coverage per chromosome. +- `Float avg_chr_cov_std` - Standard deviation of average coverage per chromosome. +- `File avg_chr_cov_per_chr` - Average coverage per chromosome. +- `Float avg_cov_mean` - Mean average coverage. +