Skip to content

Commit

Permalink
first commit of CNV profiler
Browse files Browse the repository at this point in the history
  • Loading branch information
yueyaog committed Jul 2, 2024
1 parent d507386 commit 47a44af
Show file tree
Hide file tree
Showing 2 changed files with 316 additions and 0 deletions.
37 changes: 37 additions & 0 deletions CNV-Profiler/CNV-Profiler.inputs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"CNV_Profiler.CramToBam.mem": "Int (optional, default = 64)",
"CNV_Profiler.SamtoolsDepth.cpu": "Int? (optional)",
"CNV_Profiler.GetPaddedCnvBed.mem_gb": "Int (optional, default = 1)",
"CNV_Profiler.SamtoolsDepth.disk_size_gb": "Int? (optional)",
"CNV_Profiler.cnvDepthProfiler.maxRetries": "Int (optional, default = 1)",
"CNV_Profiler.cramOrBamIndexFile": "File",
"CNV_Profiler.HeterozygosityCheck.maxRetries": "Int (optional, default = 1)",
"CNV_Profiler.HeterozygosityCheck.cpu": "Int (optional, default = 8)",
"CNV_Profiler.cramOrBamFile": "File",
"CNV_Profiler.HeterozygosityCheck.HG2_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz\")",
"CNV_Profiler.HeterozygosityCheck.mem_gb": "Int (optional, default = 64)",
"CNV_Profiler.cnvDepthProfiler.cpu": "Int (optional, default = 8)",
"CNV_Profiler.cnvDepthProfiler.preemptible": "Int (optional, default = 0)",
"CNV_Profiler.SamtoolsDepth.minBaseQuality": "Int (optional, default = 20)",
"CNV_Profiler.heterozygosityCheck": "Boolean (optional, default = false)",
"CNV_Profiler.CramToBam.cpu": "Int (optional, default = 8)",
"CNV_Profiler.HeterozygosityCheck.disk_size_gb": "Int (optional, default = 500)",
"CNV_Profiler.hardFilteredVcfFile": "File? (optional)",
"CNV_Profiler.GetPaddedCnvBed.cpu": "Int (optional, default = 1)",
"CNV_Profiler.referenceFasta": "File",
"CNV_Profiler.GetPaddedCnvBed.disk_size_gb": "Int (optional, default = 10)",
"CNV_Profiler.cnvDepthProfiler.mem_gb": "Int (optional, default = 64)",
"CNV_Profiler.cnvDepthProfiler.disk_size_gb": "Int (optional, default = 500)",
"CNV_Profiler.sampleName": "String",
"CNV_Profiler.cnvProfiler_Docker": "String (optional, default = \"us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2\")",
"CNV_Profiler.referenceDict": "File",
"CNV_Profiler.SamtoolsDepth.samtools_docker": "String (optional, default = \"euformatics/samtools:1.20\")",
"CNV_Profiler.referenceFastaIndex": "File",
"CNV_Profiler.SamtoolsDepth.minMappingQuality": "Int (optional, default = 20)",
"CNV_Profiler.HeterozygosityCheck.HG1_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz\")",
"CNV_Profiler.SamtoolsDepth.mem_gb": "Int? (optional)",
"CNV_Profiler.CramToBam.disk_size": "Int (optional, default = 500)",
"CNV_Profiler.HeterozygosityCheck.preemptible": "Int (optional, default = 0)",
"CNV_Profiler.cnvBedFile": "File"
}

279 changes: 279 additions & 0 deletions CNV-Profiler/CNV-Profiler.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
version 1.0

workflow CNV_Profiler {
input{
String sampleName
String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2"
File cramOrBamFile
File cramOrBamIndexFile
File referenceFasta
File referenceFastaIndex
File referenceDict
File cnvBedFile
Boolean heterozygosityCheck = false
File? hardFilteredVcfFile
}
if (basename(cramOrBamFile) != basename(cramOrBamFile, ".cram")) {
call CramToBam {
input:
sampleName = sampleName,
cramFile = cramOrBamFile,
cramIndexFile = cramOrBamIndexFile,
referenceFasta = referenceFasta,
referenceFastaIndex = referenceFastaIndex,
referenceDict = referenceDict
}
}
File alignedBam = select_first([cramOrBamFile, CramToBam.output_bam])
File alignedBai = select_first([cramOrBamIndexFile, CramToBam.output_bai])
call GetPaddedCnvBed {
input:
cnvBedFile = cnvBedFile,
cnvProfiler_Docker = cnvProfiler_Docker
}
call SamtoolsDepth {
input:
sampleName = sampleName,
alignedBam = alignedBam,
alignedBai = alignedBai,
target_bed = GetPaddedCnvBed.paddedCnvBed

}
call cnvDepthProfiler {
input:
sampleName = sampleName,
depthProfile = SamtoolsDepth.depth_profile,
cnvBedFile = cnvBedFile,
cnvProfiler_Docker = cnvProfiler_Docker
}
if (heterozygosityCheck) {
call HeterozygosityCheck {
input:
sampleName = sampleName,
hardFilteredVcfFile = hardFilteredVcfFile,
cnvBedFile = cnvBedFile,
cnvProfiler_Docker = cnvProfiler_Docker
}
}
output {
File samtools_depth_profile = SamtoolsDepth.depth_profile
Array[File] cnv_depth_profile = cnvDepthProfiler.cnv_depth_profile
Array[File]? heterozygosity_plot = HeterozygosityCheck.heterozygosity_plot
}
meta {
description: "This workflow takes a BAM or CRAM file and a CNV bed file as input and generates a coverage profile for the CNV regions in the bed file. Optionally, it can also generate a heterozygosity plot using a hard-filtered VCF file."
author: "Yueyao Gao"
email: "[email protected]"
}
}


task CramToBam {
input {
File referenceFasta
File referenceFastaIndex
File referenceDict
#cram and crai must be optional since Normal cram is optional
File? cramFile
File? cramIndexFile
String sampleName
Int disk_size = 500
Int mem = 64
Int cpu = 8
}

Int machine_mem = if defined(mem) then mem * 1000 else 6000

#Calls samtools view to do the conversion
command <<<
set -e
set -o pipefail

samtools view -h -T ~{referenceFasta} ~{cramFile} |
samtools view -b -o ~{sampleName}.bam -
samtools index -b ~{sampleName}.bam
mv ~{sampleName}.bam.bai ~{sampleName}.bai
>>>

runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
cpu: cpu
memory: machine_mem + " MB"
disks: "local-disk " + disk_size + " SSD"
}

output {
File output_bam = "~{sampleName}.bam"
File output_bai = "~{sampleName}.bai"
}
}

task GetPaddedCnvBed {
input {
File cnvBedFile
String cnvProfiler_Docker
Int mem_gb = 1
Int cpu = 1
Int disk_size_gb = 10
}

command <<<
source activate env_viz
python3 <<CODE
padded_cnv_interval_list = []
with open(~{cnvBedFile}, 'r') as f:
for line in f:
chr = line.strip().split('\t')[0]
start = line.strip().split('\t')[1]
end = line.strip().split('\t')[2]
svlen = int(end) - int(start) + 1
padded_cnv_interval_list.append(f'{chr}:{int(start)-svlen*2}-{int(end)+svlen*2}')
with open('padded_cnv.bed', 'a') as f:
for interval in padded_cnv_interval_list:
f.write(interval + '\n')
CODE
>>>
runtime {
docker: cnvProfiler_Docker
cpu: cpu
memory: mem_gb + " GB"
disks: "local-disk " + disk_size_gb + " HDD"
}
output {
File paddedCnvBed = "padded_cnv.bed"
}
}
task SamtoolsDepth {
input {
String sampleName
File alignedBam
File alignedBai
File target_bed
Int minBaseQuality = 20
Int minMappingQuality = 20
Int? mem_gb
Int? cpu
Int? disk_size_gb
String samtools_docker = "euformatics/samtools:1.20"
}
command <<<
# Create directories for input & output
mkdir input
mkdir output
readlink -f ~{alignedBam} > input/bam_path.txt
# Run samtools depth
# Counting fragments instead of reads using -s option
samtools depth \
-b ~{target_bed} \
-f input/bam_path.txt \
--min-BQ ~{minBaseQuality} \
--min-MQ ~{minMappingQuality} \
-s \
-o output/~{sampleName}_samtools.depth
>>>
output {
File depth_profile = "output/~{sampleName}_samtools.depth"
}
runtime {
memory: select_first([mem_gb, 7]) * 1000 + " MB"
cpu: select_first([cpu, 1])
docker: samtools_docker
disks: "local-disk ~{disk_size_gb} SSD"
preemptible: 0
maxRetries: 3
}
}
task cnvDepthProfiler{
input {
String sampleName
String cnvProfiler_Docker
File depthProfile
File cnvBedFile
Int mem_gb = 64
Int cpu = 8
Int preemptible = 0
Int disk_size_gb = 500
Int maxRetries = 1
}
command <<<
set -e
mkdir output
# Run the coverage profile visualization script
conda run --no-capture-output \
-n env_viz \
python3 /BaseImage/CovProfileViz/scripts/CNV_Depth_Profiler.py \
-c ~{depthProfile} \
-b ~{cnvBedFile} \
-n output/~{sampleName}
>>>
output {
Array[File] cnv_depth_profile = glob("output/*png")
}
runtime {
memory: mem_gb + " GB"
cpu: cpu
docker: cnvProfiler_Docker
disks: "local-disk ~{disk_size_gb} SSD"
preemptible: preemptible
maxRetries: maxRetries
}
}
task HeterozygosityCheck{
input {
String sampleName
String cnvProfiler_Docker
File HG1_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz"
File HG2_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz"
File? hardFilteredVcfFile
File cnvBedFile
Int mem_gb = 64
Int cpu = 8
Int preemptible = 0
Int disk_size_gb = 500
Int maxRetries = 1
}
command <<<
set -e
mkdir output
# Run the coverage profile visualization script
conda run --no-capture-output \
-n env_viz \
python3 /BaseImage/CovProfileViz/scripts/CNV_SNP_HET_Profiler.py \
-v1 ~{hardFilteredVcfFile} \
-v2 ~{HG1_vcf_path} \
-v3 ~{HG2_vcf_path} \
-b ~{cnvBedFile} \
-n1 ~{sampleName} \
-n2 HG001 \
-n3 HG002 \
-o output/~{sampleName}
>>>
output {
Array[File] heterozygosity_plot = glob("output/*png")
}
runtime {
memory: mem_gb + " GB"
cpu: cpu
docker: cnvProfiler_Docker
disks: "local-disk ~{disk_size_gb} SSD"
preemptible: preemptible
maxRetries: maxRetries
}
}

0 comments on commit 47a44af

Please sign in to comment.