-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
{ | ||
"CNV_Profiler.CramToBam.mem": "Int (optional, default = 64)", | ||
"CNV_Profiler.SamtoolsDepth.cpu": "Int? (optional)", | ||
"CNV_Profiler.GetPaddedCnvBed.mem_gb": "Int (optional, default = 1)", | ||
"CNV_Profiler.SamtoolsDepth.disk_size_gb": "Int? (optional)", | ||
"CNV_Profiler.cnvDepthProfiler.maxRetries": "Int (optional, default = 1)", | ||
"CNV_Profiler.cramOrBamIndexFile": "File", | ||
"CNV_Profiler.HeterozygosityCheck.maxRetries": "Int (optional, default = 1)", | ||
"CNV_Profiler.HeterozygosityCheck.cpu": "Int (optional, default = 8)", | ||
"CNV_Profiler.cramOrBamFile": "File", | ||
"CNV_Profiler.HeterozygosityCheck.HG2_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz\")", | ||
"CNV_Profiler.HeterozygosityCheck.mem_gb": "Int (optional, default = 64)", | ||
"CNV_Profiler.cnvDepthProfiler.cpu": "Int (optional, default = 8)", | ||
"CNV_Profiler.cnvDepthProfiler.preemptible": "Int (optional, default = 0)", | ||
"CNV_Profiler.SamtoolsDepth.minBaseQuality": "Int (optional, default = 20)", | ||
"CNV_Profiler.heterozygosityCheck": "Boolean (optional, default = false)", | ||
"CNV_Profiler.CramToBam.cpu": "Int (optional, default = 8)", | ||
"CNV_Profiler.HeterozygosityCheck.disk_size_gb": "Int (optional, default = 500)", | ||
"CNV_Profiler.hardFilteredVcfFile": "File? (optional)", | ||
"CNV_Profiler.GetPaddedCnvBed.cpu": "Int (optional, default = 1)", | ||
"CNV_Profiler.referenceFasta": "File", | ||
"CNV_Profiler.GetPaddedCnvBed.disk_size_gb": "Int (optional, default = 10)", | ||
"CNV_Profiler.cnvDepthProfiler.mem_gb": "Int (optional, default = 64)", | ||
"CNV_Profiler.cnvDepthProfiler.disk_size_gb": "Int (optional, default = 500)", | ||
"CNV_Profiler.sampleName": "String", | ||
"CNV_Profiler.cnvProfiler_Docker": "String (optional, default = \"us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2\")", | ||
"CNV_Profiler.referenceDict": "File", | ||
"CNV_Profiler.SamtoolsDepth.samtools_docker": "String (optional, default = \"euformatics/samtools:1.20\")", | ||
"CNV_Profiler.referenceFastaIndex": "File", | ||
"CNV_Profiler.SamtoolsDepth.minMappingQuality": "Int (optional, default = 20)", | ||
"CNV_Profiler.HeterozygosityCheck.HG1_vcf_path": "File (optional, default = \"gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz\")", | ||
"CNV_Profiler.SamtoolsDepth.mem_gb": "Int? (optional)", | ||
"CNV_Profiler.CramToBam.disk_size": "Int (optional, default = 500)", | ||
"CNV_Profiler.HeterozygosityCheck.preemptible": "Int (optional, default = 0)", | ||
"CNV_Profiler.cnvBedFile": "File" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,279 @@ | ||
version 1.0 | ||
|
||
workflow CNV_Profiler { | ||
input{ | ||
String sampleName | ||
String cnvProfiler_Docker = "us-central1-docker.pkg.dev/tag-team-160914/gptag-dockers/covprofileviz:0.0.2" | ||
File cramOrBamFile | ||
File cramOrBamIndexFile | ||
File referenceFasta | ||
File referenceFastaIndex | ||
File referenceDict | ||
File cnvBedFile | ||
Boolean heterozygosityCheck = false | ||
File? hardFilteredVcfFile | ||
} | ||
if (basename(cramOrBamFile) != basename(cramOrBamFile, ".cram")) { | ||
call CramToBam { | ||
input: | ||
sampleName = sampleName, | ||
cramFile = cramOrBamFile, | ||
cramIndexFile = cramOrBamIndexFile, | ||
referenceFasta = referenceFasta, | ||
referenceFastaIndex = referenceFastaIndex, | ||
referenceDict = referenceDict | ||
} | ||
} | ||
File alignedBam = select_first([cramOrBamFile, CramToBam.output_bam]) | ||
File alignedBai = select_first([cramOrBamIndexFile, CramToBam.output_bai]) | ||
call GetPaddedCnvBed { | ||
input: | ||
cnvBedFile = cnvBedFile, | ||
cnvProfiler_Docker = cnvProfiler_Docker | ||
} | ||
call SamtoolsDepth { | ||
input: | ||
sampleName = sampleName, | ||
alignedBam = alignedBam, | ||
alignedBai = alignedBai, | ||
target_bed = GetPaddedCnvBed.paddedCnvBed | ||
|
||
} | ||
call cnvDepthProfiler { | ||
input: | ||
sampleName = sampleName, | ||
depthProfile = SamtoolsDepth.depth_profile, | ||
cnvBedFile = cnvBedFile, | ||
cnvProfiler_Docker = cnvProfiler_Docker | ||
} | ||
if (heterozygosityCheck) { | ||
call HeterozygosityCheck { | ||
input: | ||
sampleName = sampleName, | ||
hardFilteredVcfFile = hardFilteredVcfFile, | ||
cnvBedFile = cnvBedFile, | ||
cnvProfiler_Docker = cnvProfiler_Docker | ||
} | ||
} | ||
output { | ||
File samtools_depth_profile = SamtoolsDepth.depth_profile | ||
Array[File] cnv_depth_profile = cnvDepthProfiler.cnv_depth_profile | ||
Array[File]? heterozygosity_plot = HeterozygosityCheck.heterozygosity_plot | ||
} | ||
meta { | ||
description: "This workflow takes a BAM or CRAM file and a CNV bed file as input and generates a coverage profile for the CNV regions in the bed file. Optionally, it can also generate a heterozygosity plot using a hard-filtered VCF file." | ||
author: "Yueyao Gao" | ||
email: "[email protected]" | ||
} | ||
} | ||
|
||
|
||
task CramToBam { | ||
input { | ||
File referenceFasta | ||
File referenceFastaIndex | ||
File referenceDict | ||
#cram and crai must be optional since Normal cram is optional | ||
File? cramFile | ||
File? cramIndexFile | ||
String sampleName | ||
Int disk_size = 500 | ||
Int mem = 64 | ||
Int cpu = 8 | ||
} | ||
|
||
Int machine_mem = if defined(mem) then mem * 1000 else 6000 | ||
|
||
#Calls samtools view to do the conversion | ||
command <<< | ||
set -e | ||
set -o pipefail | ||
|
||
samtools view -h -T ~{referenceFasta} ~{cramFile} | | ||
samtools view -b -o ~{sampleName}.bam - | ||
samtools index -b ~{sampleName}.bam | ||
mv ~{sampleName}.bam.bai ~{sampleName}.bai | ||
>>> | ||
|
||
runtime { | ||
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735" | ||
cpu: cpu | ||
memory: machine_mem + " MB" | ||
disks: "local-disk " + disk_size + " SSD" | ||
} | ||
|
||
output { | ||
File output_bam = "~{sampleName}.bam" | ||
File output_bai = "~{sampleName}.bai" | ||
} | ||
} | ||
|
||
task GetPaddedCnvBed { | ||
input { | ||
File cnvBedFile | ||
String cnvProfiler_Docker | ||
Int mem_gb = 1 | ||
Int cpu = 1 | ||
Int disk_size_gb = 10 | ||
} | ||
|
||
command <<< | ||
source activate env_viz | ||
python3 <<CODE | ||
padded_cnv_interval_list = [] | ||
with open(~{cnvBedFile}, 'r') as f: | ||
for line in f: | ||
chr = line.strip().split('\t')[0] | ||
start = line.strip().split('\t')[1] | ||
end = line.strip().split('\t')[2] | ||
svlen = int(end) - int(start) + 1 | ||
padded_cnv_interval_list.append(f'{chr}:{int(start)-svlen*2}-{int(end)+svlen*2}') | ||
with open('padded_cnv.bed', 'a') as f: | ||
for interval in padded_cnv_interval_list: | ||
f.write(interval + '\n') | ||
CODE | ||
>>> | ||
runtime { | ||
docker: cnvProfiler_Docker | ||
cpu: cpu | ||
memory: mem_gb + " GB" | ||
disks: "local-disk " + disk_size_gb + " HDD" | ||
} | ||
output { | ||
File paddedCnvBed = "padded_cnv.bed" | ||
} | ||
} | ||
task SamtoolsDepth { | ||
input { | ||
String sampleName | ||
File alignedBam | ||
File alignedBai | ||
File target_bed | ||
Int minBaseQuality = 20 | ||
Int minMappingQuality = 20 | ||
Int? mem_gb | ||
Int? cpu | ||
Int? disk_size_gb | ||
String samtools_docker = "euformatics/samtools:1.20" | ||
} | ||
command <<< | ||
# Create directories for input & output | ||
mkdir input | ||
mkdir output | ||
readlink -f ~{alignedBam} > input/bam_path.txt | ||
# Run samtools depth | ||
# Counting fragments instead of reads using -s option | ||
samtools depth \ | ||
-b ~{target_bed} \ | ||
-f input/bam_path.txt \ | ||
--min-BQ ~{minBaseQuality} \ | ||
--min-MQ ~{minMappingQuality} \ | ||
-s \ | ||
-o output/~{sampleName}_samtools.depth | ||
>>> | ||
output { | ||
File depth_profile = "output/~{sampleName}_samtools.depth" | ||
} | ||
runtime { | ||
memory: select_first([mem_gb, 7]) * 1000 + " MB" | ||
cpu: select_first([cpu, 1]) | ||
docker: samtools_docker | ||
disks: "local-disk ~{disk_size_gb} SSD" | ||
preemptible: 0 | ||
maxRetries: 3 | ||
} | ||
} | ||
task cnvDepthProfiler{ | ||
input { | ||
String sampleName | ||
String cnvProfiler_Docker | ||
File depthProfile | ||
File cnvBedFile | ||
Int mem_gb = 64 | ||
Int cpu = 8 | ||
Int preemptible = 0 | ||
Int disk_size_gb = 500 | ||
Int maxRetries = 1 | ||
} | ||
command <<< | ||
set -e | ||
mkdir output | ||
# Run the coverage profile visualization script | ||
conda run --no-capture-output \ | ||
-n env_viz \ | ||
python3 /BaseImage/CovProfileViz/scripts/CNV_Depth_Profiler.py \ | ||
-c ~{depthProfile} \ | ||
-b ~{cnvBedFile} \ | ||
-n output/~{sampleName} | ||
>>> | ||
output { | ||
Array[File] cnv_depth_profile = glob("output/*png") | ||
} | ||
runtime { | ||
memory: mem_gb + " GB" | ||
cpu: cpu | ||
docker: cnvProfiler_Docker | ||
disks: "local-disk ~{disk_size_gb} SSD" | ||
preemptible: preemptible | ||
maxRetries: maxRetries | ||
} | ||
} | ||
task HeterozygosityCheck{ | ||
input { | ||
String sampleName | ||
String cnvProfiler_Docker | ||
File HG1_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA12878/smoke.hard-filtered.vcf.gz" | ||
File HG2_vcf_path = "gs://dragenv4_2_validation/dragen_4_2_4/hg19/NA24385/NA24385.hard-filtered.vcf.gz" | ||
File? hardFilteredVcfFile | ||
File cnvBedFile | ||
Int mem_gb = 64 | ||
Int cpu = 8 | ||
Int preemptible = 0 | ||
Int disk_size_gb = 500 | ||
Int maxRetries = 1 | ||
} | ||
command <<< | ||
set -e | ||
mkdir output | ||
# Run the coverage profile visualization script | ||
conda run --no-capture-output \ | ||
-n env_viz \ | ||
python3 /BaseImage/CovProfileViz/scripts/CNV_SNP_HET_Profiler.py \ | ||
-v1 ~{hardFilteredVcfFile} \ | ||
-v2 ~{HG1_vcf_path} \ | ||
-v3 ~{HG2_vcf_path} \ | ||
-b ~{cnvBedFile} \ | ||
-n1 ~{sampleName} \ | ||
-n2 HG001 \ | ||
-n3 HG002 \ | ||
-o output/~{sampleName} | ||
>>> | ||
output { | ||
Array[File] heterozygosity_plot = glob("output/*png") | ||
} | ||
runtime { | ||
memory: mem_gb + " GB" | ||
cpu: cpu | ||
docker: cnvProfiler_Docker | ||
disks: "local-disk ~{disk_size_gb} SSD" | ||
preemptible: preemptible | ||
maxRetries: maxRetries | ||
} | ||
} | ||