Skip to content

Commit cdfb1c7

Browse files
author
Shixiang Wang (王诗翔)
committed
Add a whole workflow to debug
1 parent 1f1dbd2 commit cdfb1c7

File tree

5 files changed

+107
-1
lines changed

5 files changed

+107
-1
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ rv
123123

124124
### Pipeline (WES bam data only)
125125

126-
- for one tumor-normal pair, you can refer to [one-pair.R](https://github.com/ShixiangWang/gcap/blob/master/test-workflow/one-pair.R).
126+
- for one tumor-normal pair, you can refer to [one-pair.R](https://github.com/ShixiangWang/gcap/blob/master/test-workflow/one-pair.R). [test-workflow/debug](test-workflow/debug) contains a full workflow for data obtained from SRA.
127127
- for multiple tumor-normal pairs, you can refer to [two-pair.R](https://github.com/ShixiangWang/gcap/blob/master/test-workflow/two-pairs.R).
128128

129129
To run **gcap** from bam files, a machine with **at least 80GB RAM** is required for

test-workflow/debug/0-dump-sra.sh

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
mkdir -p /data3/wsx/share/gcap_debug
3+
cd /data3/wsx/share
4+
5+
export PATH=$HOME/soft/sratoolkit/bin:$PATH
6+
7+
for i in ERR5242993 ERR5243012
8+
do
9+
echo handling $i
10+
parallel-fastq-dump -t 20 -O gcap_debug/ --split-3 --gzip -s $i
11+
done

test-workflow/debug/1-align.sh

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
source activate circlemap
3+
cd /data3/wsx/share/gcap_debug
4+
5+
cores=24
6+
7+
mkdir bam
8+
sn=$(ls *.fastq.gz | tr ' ' '\n' | sed 's/_[12].fastq.gz//' | sort | uniq)
9+
#sn=ERR5243012
10+
INDEX=/data1/database/human/hg38/bwa_index/hg38_p7
11+
12+
for id in ${sn}; do
13+
14+
fastp -i ${id}_1.fastq.gz -I ${id}_2.fastq.gz -o ${id}_1.fq.gz -O ${id}_2.fq.gz -h ${id}.html -j ${id}.json --thread 16 --dont_overwrite
15+
16+
if [ ! -f bam/${id}.bam ]
17+
then
18+
if [ ! -f bam/${id}.sam ]
19+
then
20+
fq1=${id}_1.fq.gz
21+
fq2=${id}_2.fq.gz
22+
echo "Start aligning for ${id}"
23+
bwa mem -M -t $cores -R "@RG\tID:${id}\tSM:${id}\tLB:WXS\tPL:Illumina" ${INDEX} ${fq1} ${fq2} \
24+
> bam/${id}.sam 2>bam/${id}_bwa.log
25+
else
26+
echo "BWA align for ${id} is done before, directly go to sam > bam step"
27+
fi
28+
29+
if [ $? -eq 0 ]
30+
then
31+
if [ ! -f bam/${id}.bam ]
32+
then
33+
samtools sort -@ $cores bam/${id}.sam -o bam/${id}.bam 2>bam/${id}_bam.log
34+
samtools index bam/${id}.bam
35+
if [ $? -eq 0 ]
36+
then
37+
echo "Removing sam files"
38+
rm bam/${id}.sam
39+
else
40+
echo "Failed when using samtools sort, please check"
41+
exit 1
42+
fi
43+
fi
44+
echo "Done for ${id}." `date`
45+
else
46+
echo "Failed for ${id} in bwa." `date`
47+
fi
48+
49+
fi
50+
51+
done

test-workflow/debug/2-prepare.sh

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Set up conda env
2+
# mamba create -n cancerit -c bioconda cancerit-allelecount
3+
4+
cd /data3/wsx/share/gcap_reference
5+
wget -c https://zenodo.org/records/6524005/files/1000G_loci_hg38.tar.gz
6+
wget -c https://zenodo.org/records/6524005/files/GC_correction_hg38.txt.gz
7+
wget -c https://zenodo.org/records/6524005/files/RT_correction_hg38.txt.gz
8+
9+
tar zxvf 1000G_loci_hg38.tar.gz
10+
gunzip GC_correction_hg38.txt.gz
11+
gunzip RT_correction_hg38.txt.gz

test-workflow/debug/3-gcap.R

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# remotes::install_github("ShixiangWang/ascat@v3-for-gcap-v1", subdir = "ASCAT")
2+
# remotes::install_github("ShixiangWang/gcap")
3+
# install.packages("https://cran.r-project.org/src/contrib/Archive/xgboost/xgboost_1.5.2.1.tar.gz", repos = NULL)
4+
5+
library(gcap)
6+
7+
# id为PRJEB42904,wes_395LC是tumor,id:ERR5242993,wes_395N是normal,id:ERR5243012
8+
9+
# hg38 ----------------
10+
gcap.workflow(
11+
tumourseqfile = "~/share/gcap_debug/bam/ERR5242993.bam",
12+
normalseqfile = "~/share/gcap_debug/bam/ERR5243012.bam",
13+
tumourname = "wes_395LC",
14+
normalname = "wes_395N",
15+
jobname = "wes_395",
16+
outdir = "~/share/gcap_debug/gcap_result",
17+
allelecounter_exe = "~/miniconda3/envs/cancerit/bin/alleleCounter",
18+
g1000allelesprefix = file.path(
19+
"~/share/gcap_reference/1000G_loci_hg38/",
20+
"1kg.phase3.v5a_GRCh38nounref_allele_index_chr"
21+
),
22+
g1000lociprefix = file.path("~/share/gcap_reference/1000G_loci_hg38/",
23+
"1kg.phase3.v5a_GRCh38nounref_loci_chrstring_chr"
24+
),
25+
GCcontentfile = "~/share/gcap_reference/GC_correction_hg38.txt",
26+
replictimingfile = "~/share/gcap_reference/RT_correction_hg38.txt",
27+
skip_finished_ASCAT = TRUE,
28+
skip_ascat_call = FALSE,
29+
result_file_prefix = "wes_395",
30+
genome_build = "hg38",
31+
model = "XGB11"
32+
)
33+

0 commit comments

Comments
 (0)