diff --git a/CNV-Profiler/CNV-Profiler.inputs.json b/CNV-Profiler/CNV-Profiler.inputs.json new file mode 100644 index 0000000..e10b888 --- /dev/null +++ b/CNV-Profiler/CNV-Profiler.inputs.json @@ -0,0 +1,37 @@ +{ + "CNV_Profiler.CramToBam.mem": "Int (optional, default = 64)", + "CNV_Profiler.SamtoolsDepth.cpu": "Int? (optional)", + "CNV_Profiler.GetPaddedCnvBed.mem_gb": "Int (optional, default = 1)", + "CNV_Profiler.SamtoolsDepth.disk_size_gb": "Int? (optional)", + "CNV_Profiler.cnvDepthProfiler.maxRetries": "Int (optional, default = 1)", + "CNV_Profiler.cramOrBamIndexFile": "File", + "CNV_Profiler.HeterozygosityCheck.maxRetries": "Int (optional, default = 1)", + "CNV_Profiler.HeterozygosityCheck.cpu": "Int (optional, default = 8)", + "CNV_Profiler.cramOrBamFile": "File", + "CNV_Profiler.HeterozygosityCheck.HG2_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz\")", + "CNV_Profiler.HeterozygosityCheck.mem_gb": "Int (optional, default = 64)", + "CNV_Profiler.cnvDepthProfiler.cpu": "Int (optional, default = 8)", + "CNV_Profiler.cnvDepthProfiler.preemptible": "Int (optional, default = 0)", + "CNV_Profiler.SamtoolsDepth.minBaseQuality": "Int (optional, default = 20)", + "CNV_Profiler.heterozygosityCheck": "Boolean (optional, default = false)", + "CNV_Profiler.CramToBam.cpu": "Int (optional, default = 8)", + "CNV_Profiler.HeterozygosityCheck.disk_size_gb": "Int (optional, default = 500)", + "CNV_Profiler.hardFilteredVcfFile": "File? (optional)", + "CNV_Profiler.GetPaddedCnvBed.cpu": "Int (optional, default = 1)", + "CNV_Profiler.referenceFasta": "File", + "CNV_Profiler.GetPaddedCnvBed.disk_size_gb": "Int (optional, default = 10)", + "CNV_Profiler.cnvDepthProfiler.mem_gb": "Int (optional, default = 64)", + "CNV_Profiler.cnvDepthProfiler.disk_size_gb": "Int (optional, default = 500)", + "CNV_Profiler.sampleName": "String", + "CNV_Profiler.cnvProfiler_Docker": "String (optional, default = \"us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2\")", + "CNV_Profiler.referenceDict": "File", + "CNV_Profiler.SamtoolsDepth.samtools_docker": "String (optional, default = \"euformatics/samtools:1.20\")", + "CNV_Profiler.referenceFastaIndex": "File", + "CNV_Profiler.SamtoolsDepth.minMappingQuality": "Int (optional, default = 20)", + "CNV_Profiler.HeterozygosityCheck.HG1_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz\")", + "CNV_Profiler.SamtoolsDepth.mem_gb": "Int? (optional)", + "CNV_Profiler.CramToBam.disk_size": "Int (optional, default = 500)", + "CNV_Profiler.HeterozygosityCheck.preemptible": "Int (optional, default = 0)", + "CNV_Profiler.cnvBedFile": "File" +} + diff --git a/CNV-Profiler/CNV-Profiler.wdl b/CNV-Profiler/CNV-Profiler.wdl new file mode 100644 index 0000000..1678f8d --- /dev/null +++ b/CNV-Profiler/CNV-Profiler.wdl @@ -0,0 +1,279 @@ +version 1.0 + +workflow CNV_Profiler { + input{ + String sampleName + String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2" + File cramOrBamFile + File cramOrBamIndexFile + File referenceFasta + File referenceFastaIndex + File referenceDict + File cnvBedFile + Boolean heterozygosityCheck = false + File? hardFilteredVcfFile + } + if (basename(cramOrBamFile) != basename(cramOrBamFile, ".cram")) { + call CramToBam { + input: + sampleName = sampleName, + cramFile = cramOrBamFile, + cramIndexFile = cramOrBamIndexFile, + referenceFasta = referenceFasta, + referenceFastaIndex = referenceFastaIndex, + referenceDict = referenceDict + } + } + File alignedBam = select_first([cramOrBamFile, CramToBam.output_bam]) + File alignedBai = select_first([cramOrBamIndexFile, CramToBam.output_bai]) + call GetPaddedCnvBed { + input: + cnvBedFile = cnvBedFile, + cnvProfiler_Docker = cnvProfiler_Docker + } + call SamtoolsDepth { + input: + sampleName = sampleName, + alignedBam = alignedBam, + alignedBai = alignedBai, + target_bed = GetPaddedCnvBed.paddedCnvBed + + } + call cnvDepthProfiler { + input: + sampleName = sampleName, + depthProfile = SamtoolsDepth.depth_profile, + cnvBedFile = cnvBedFile, + cnvProfiler_Docker = cnvProfiler_Docker + } + if (heterozygosityCheck) { + call HeterozygosityCheck { + input: + sampleName = sampleName, + hardFilteredVcfFile = hardFilteredVcfFile, + cnvBedFile = cnvBedFile, + cnvProfiler_Docker = cnvProfiler_Docker + } + } + output { + File samtools_depth_profile = SamtoolsDepth.depth_profile + Array[File] cnv_depth_profile = cnvDepthProfiler.cnv_depth_profile + Array[File]? heterozygosity_plot = HeterozygosityCheck.heterozygosity_plot + } + meta { + description: "This workflow takes a BAM or CRAM file and a CNV bed file as input and generates a coverage profile for the CNV regions in the bed file. Optionally, it can also generate a heterozygosity plot using a hard-filtered VCF file." + author: "Yueyao Gao" + email: "tag@broadinstitute.org" + } +} + + +task CramToBam { + input { + File referenceFasta + File referenceFastaIndex + File referenceDict + #cram and crai must be optional since Normal cram is optional + File? cramFile + File? cramIndexFile + String sampleName + Int disk_size = 500 + Int mem = 64 + Int cpu = 8 + } + + Int machine_mem = if defined(mem) then mem * 1000 else 6000 + + #Calls samtools view to do the conversion + command <<< + set -e + set -o pipefail + + samtools view -h -T ~{referenceFasta} ~{cramFile} | + samtools view -b -o ~{sampleName}.bam - + samtools index -b ~{sampleName}.bam + mv ~{sampleName}.bam.bai ~{sampleName}.bai + >>> + + runtime { + docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" + cpu: cpu + memory: machine_mem + " MB" + disks: "local-disk " + disk_size + " SSD" + } + + output { + File output_bam = "~{sampleName}.bam" + File output_bai = "~{sampleName}.bai" + } +} + +task GetPaddedCnvBed { + input { + File cnvBedFile + String cnvProfiler_Docker + Int mem_gb = 1 + Int cpu = 1 + Int disk_size_gb = 10 + } + + command <<< + source activate env_viz + python3 <>> + runtime { + docker: cnvProfiler_Docker + cpu: cpu + memory: mem_gb + " GB" + disks: "local-disk " + disk_size_gb + " HDD" + } + output { + File paddedCnvBed = "padded_cnv.bed" + } +} + +task SamtoolsDepth { + input { + String sampleName + File alignedBam + File alignedBai + File target_bed + Int minBaseQuality = 20 + Int minMappingQuality = 20 + Int? mem_gb + Int? cpu + Int? disk_size_gb + String samtools_docker = "euformatics/samtools:1.20" + } + command <<< + # Create directories for input & output + mkdir input + mkdir output + readlink -f ~{alignedBam} > input/bam_path.txt + + # Run samtools depth + # Counting fragments instead of reads using -s option + samtools depth \ + -b ~{target_bed} \ + -f input/bam_path.txt \ + --min-BQ ~{minBaseQuality} \ + --min-MQ ~{minMappingQuality} \ + -s \ + -o output/~{sampleName}_samtools.depth + + >>> + output { + File depth_profile = "output/~{sampleName}_samtools.depth" + } + runtime { + memory: select_first([mem_gb, 7]) * 1000 + " MB" + cpu: select_first([cpu, 1]) + docker: samtools_docker + disks: "local-disk ~{disk_size_gb} SSD" + preemptible: 0 + maxRetries: 3 + } +} + +task cnvDepthProfiler{ + input { + String sampleName + String cnvProfiler_Docker + File depthProfile + File cnvBedFile + Int mem_gb = 64 + Int cpu = 8 + Int preemptible = 0 + Int disk_size_gb = 500 + Int maxRetries = 1 + } + command <<< + set -e + mkdir output + + # Run the coverage profile visualization script + conda run --no-capture-output \ + -n env_viz \ + python3 /BaseImage/CovProfileViz/scripts/CNV_Depth_Profiler.py \ + -c ~{depthProfile} \ + -b ~{cnvBedFile} \ + -n output/~{sampleName} + + >>> + output { + Array[File] cnv_depth_profile = glob("output/*png") + } + runtime { + memory: mem_gb + " GB" + cpu: cpu + docker: cnvProfiler_Docker + disks: "local-disk ~{disk_size_gb} SSD" + preemptible: preemptible + maxRetries: maxRetries + } +} + +task HeterozygosityCheck{ + input { + String sampleName + String cnvProfiler_Docker + File HG1_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz" + File HG2_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz" + File? hardFilteredVcfFile + File cnvBedFile + Int mem_gb = 64 + Int cpu = 8 + Int preemptible = 0 + Int disk_size_gb = 500 + Int maxRetries = 1 + } + command <<< + set -e + mkdir output + + # Run the coverage profile visualization script + conda run --no-capture-output \ + -n env_viz \ + python3 /BaseImage/CovProfileViz/scripts/CNV_SNP_HET_Profiler.py \ + -v1 ~{hardFilteredVcfFile} \ + -v2 ~{HG1_vcf_path} \ + -v3 ~{HG2_vcf_path} \ + -b ~{cnvBedFile} \ + -n1 ~{sampleName} \ + -n2 HG001 \ + -n3 HG002 \ + -o output/~{sampleName} + + >>> + output { + Array[File] heterozygosity_plot = glob("output/*png") + } + runtime { + memory: mem_gb + " GB" + cpu: cpu + docker: cnvProfiler_Docker + disks: "local-disk ~{disk_size_gb} SSD" + preemptible: preemptible + maxRetries: maxRetries + } +} + + + + + + +