From b8cf8a2e004b57ce7952a83b3c3bd18d8b0f44fb Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Tue, 28 Jun 2022 11:24:50 +0000 Subject: [PATCH 1/9] change primer file formatting --- workflow/scripts/{bed-to-bedpe.py => bed-to-tsv.py} | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename workflow/scripts/{bed-to-bedpe.py => bed-to-tsv.py} (85%) diff --git a/workflow/scripts/bed-to-bedpe.py b/workflow/scripts/bed-to-tsv.py similarity index 85% rename from workflow/scripts/bed-to-bedpe.py rename to workflow/scripts/bed-to-tsv.py index 4a3b343df..098a18cb7 100644 --- a/workflow/scripts/bed-to-bedpe.py +++ b/workflow/scripts/bed-to-tsv.py @@ -23,10 +23,9 @@ df_sense["chrom"], df_sense["start"], df_sense["end"], - df_antisense["chrom"], df_antisense["start"], df_antisense["end"], ] -headers = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"] +headers = ["chrom", "left_start", "left_end", "right_start", "right_end"] df_bedpe = pd.concat(data, axis=1, keys=headers) -df_bedpe.to_csv(snakemake.output[0], header=None, index=None, sep="\t", mode="a") +df_bedpe.to_csv(snakemake.output[0], index=None, sep="\t", mode="a") From da5d5c15387cae764e394b47ef90244f423130e5 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Tue, 28 Jun 2022 11:25:02 +0000 Subject: [PATCH 2/9] change read clipping --- workflow/rules/read_clipping.smk | 49 +++++++++++--------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/workflow/rules/read_clipping.smk b/workflow/rules/read_clipping.smk index 8abdde7ed..72dccfa96 100644 --- a/workflow/rules/read_clipping.smk +++ b/workflow/rules/read_clipping.smk @@ -25,59 +25,44 @@ rule bed_to_bedpe: input: check_bed_for_URL(config["preprocessing"]["amplicon-primers"]), output: - "resources/primer.bedpe", + "resources/primer.tsv", log: "logs/bed-to-bedpe.log", conda: "../envs/python.yaml" script: - "../scripts/bed-to-bedpe.py" + "../scripts/bed-to-tsv.py" -rule bamclipper: +rule trim_primers_fgbio: input: bam="results/{date}/read-sorted/{read_type}~position/{sample}.initial.bam", bai="results/{date}/read-sorted/{read_type}~position/{sample}.initial.bam.bai", - bedpe="resources/primer.bedpe", - output: - temp( - "results/{date}/read-clipping/softclipped/{read_type}/{sample}/{sample}.initial.primerclipped.bam" + ref="resources/genomes/{reference}.fasta".format( + reference=config["preprocessing"]["amplicon-reference"] ), - params: - output_dir=get_output_dir, - cwd=lambda w: os.getcwd(), - bed_path=lambda w, input: os.path.join(os.getcwd(), input.bedpe), - bam=lambda w, input: os.path.basename(input.bam), + primers="resources/primer.tsv", + output: + "results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam" log: - "logs/{date}/bamclipper/{read_type}/{sample}.log", + "logs/{date}/fgbio_primer_clipping/{read_type}/{sample}.log" conda: - "../envs/bamclipper.yaml" - threads: 6 + "../envs/fgbio.yaml" shell: - "(cp {input.bam} {params.output_dir} &&" - " cp {input.bai} {params.output_dir} &&" - " cd {params.output_dir} &&" - " bamclipper.sh -b {params.bam} -p {params.bed_path} -n {threads} -u 5 -d 5) " - " > {params.cwd}/{log} 2>&1" + "fgbio TrimPrimers -i {input.bam} -p {input.primers} -o {output} -H true -r {input.ref} > {log} 2>&1" -rule fgbio: +rule filter_bam_fgbio: input: - bam="results/{date}/read-clipping/softclipped/{read_type}/{sample}/{sample}.initial.primerclipped.bam", - bai="results/{date}/read-clipping/softclipped/{read_type}/{sample}/{sample}.initial.primerclipped.bam.bai", - ref="resources/genomes/{reference}.fasta".format( - reference=config["preprocessing"]["amplicon-reference"] - ), + bam="results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam" output: - temp( - "results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam" - ), + "results/{date}/read-clipping/hc_filtered/{read_type}/{sample}/{sample}.bam" log: - "logs/{date}/fgbio/{read_type}/{sample}.log", + "logs/{date}/fgbio_filter_bam/{read_type}/{sample}.log" conda: "../envs/fgbio.yaml" shell: - "fgbio --sam-validation-stringency=LENIENT ClipBam -i {input.bam} -o {output} -H true -r {input.ref} > {log} 2>&1" + "fgbio FilterBam -i {input.bam} -o {output} --min-insert-size 100 --remove-single-end-mappings > {log} 2>&1" rule samtools_fastq_pe: @@ -132,7 +117,7 @@ rule plot_primer_clipping: ), params: samples=lambda wildcards: get_samples_for_date(wildcards.date), - bedpe="resources/primer.bedpe", + bedpe="resources/primer.tsv", log: "logs/{date}/plot-primer-clipping.log", conda: From c82a050c7d24d4996e25fa496c889316ebbd2575 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Tue, 28 Jun 2022 11:33:54 +0000 Subject: [PATCH 3/9] fmt --- workflow/rules/read_clipping.smk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/workflow/rules/read_clipping.smk b/workflow/rules/read_clipping.smk index 72dccfa96..40901ae87 100644 --- a/workflow/rules/read_clipping.smk +++ b/workflow/rules/read_clipping.smk @@ -43,9 +43,9 @@ rule trim_primers_fgbio: ), primers="resources/primer.tsv", output: - "results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam" + "results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam", log: - "logs/{date}/fgbio_primer_clipping/{read_type}/{sample}.log" + "logs/{date}/fgbio_primer_clipping/{read_type}/{sample}.log", conda: "../envs/fgbio.yaml" shell: @@ -54,11 +54,11 @@ rule trim_primers_fgbio: rule filter_bam_fgbio: input: - bam="results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam" + bam="results/{date}/read-clipping/hardclipped/{read_type}/{sample}/{sample}.bam", output: - "results/{date}/read-clipping/hc_filtered/{read_type}/{sample}/{sample}.bam" + "results/{date}/read-clipping/hc_filtered/{read_type}/{sample}/{sample}.bam", log: - "logs/{date}/fgbio_filter_bam/{read_type}/{sample}.log" + "logs/{date}/fgbio_filter_bam/{read_type}/{sample}.log", conda: "../envs/fgbio.yaml" shell: From 7ecd981f1b0fa2624988cf85cc663e5875ac3dc2 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 29 Jun 2022 10:21:34 +0000 Subject: [PATCH 4/9] fix primer clipping plot --- workflow/scripts/plot-primer-clipping.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/workflow/scripts/plot-primer-clipping.py b/workflow/scripts/plot-primer-clipping.py index c6dabb8e3..967323235 100644 --- a/workflow/scripts/plot-primer-clipping.py +++ b/workflow/scripts/plot-primer-clipping.py @@ -14,7 +14,9 @@ # read primer bedpe to df PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=None) -PRIMER.drop(PRIMER.columns[[0, 3]], axis=1, inplace=True) +print(PRIMER) +PRIMER.drop(PRIMER.columns[[0]], axis=1, inplace=True) +print(PRIMER) PRIMER.columns = ["p1_start", "p1_end", "p2_start", "p2_end"] # convert df to interval trees From b205c827ae76235939c189326e3716a43aef61ff Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 29 Jun 2022 10:21:34 +0000 Subject: [PATCH 5/9] fix primer clipping plot --- workflow/scripts/plot-primer-clipping.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/plot-primer-clipping.py b/workflow/scripts/plot-primer-clipping.py index c6dabb8e3..7d8289ddc 100644 --- a/workflow/scripts/plot-primer-clipping.py +++ b/workflow/scripts/plot-primer-clipping.py @@ -13,8 +13,10 @@ from intervaltree import IntervalTree # read primer bedpe to df -PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=None) -PRIMER.drop(PRIMER.columns[[0, 3]], axis=1, inplace=True) +PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=0) +print(PRIMER) +PRIMER.drop(PRIMER.columns[[0]], axis=1, inplace=True) +print(PRIMER) PRIMER.columns = ["p1_start", "p1_end", "p2_start", "p2_end"] # convert df to interval trees From 209f0ba7409201d396ee555b4293dbc9970912a6 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 29 Jun 2022 10:45:10 +0000 Subject: [PATCH 6/9] fix --- workflow/scripts/plot-primer-clipping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/plot-primer-clipping.py b/workflow/scripts/plot-primer-clipping.py index 7d8289ddc..967323235 100644 --- a/workflow/scripts/plot-primer-clipping.py +++ b/workflow/scripts/plot-primer-clipping.py @@ -13,7 +13,7 @@ from intervaltree import IntervalTree # read primer bedpe to df -PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=0) +PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=None) print(PRIMER) PRIMER.drop(PRIMER.columns[[0]], axis=1, inplace=True) print(PRIMER) From 9f4150d3cf0eaee58cbfa08be33c340b40aecca2 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 29 Jun 2022 10:45:28 +0000 Subject: [PATCH 7/9] fix --- workflow/scripts/plot-primer-clipping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/plot-primer-clipping.py b/workflow/scripts/plot-primer-clipping.py index 967323235..7d8289ddc 100644 --- a/workflow/scripts/plot-primer-clipping.py +++ b/workflow/scripts/plot-primer-clipping.py @@ -13,7 +13,7 @@ from intervaltree import IntervalTree # read primer bedpe to df -PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=None) +PRIMER = pd.read_csv(snakemake.params.get("bedpe", ""), delimiter="\t", header=0) print(PRIMER) PRIMER.drop(PRIMER.columns[[0]], axis=1, inplace=True) print(PRIMER) From 46c6139b9ba144f1c5287cae5a6e2e8b02e5c406 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 29 Jun 2022 10:47:09 +0000 Subject: [PATCH 8/9] typo --- workflow/scripts/plot-primer-clipping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/plot-primer-clipping.py b/workflow/scripts/plot-primer-clipping.py index 7d8289ddc..a99b55a8f 100644 --- a/workflow/scripts/plot-primer-clipping.py +++ b/workflow/scripts/plot-primer-clipping.py @@ -118,7 +118,7 @@ def count_intervals(file): "uncut primer within", "cut primer exact", "cut primer within", - "no mathing win", + "no matching win", ], } ) From 2429757993bf290ec50715e0b4db707fae2da4c8 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Thu, 30 Jun 2022 09:45:15 +0000 Subject: [PATCH 9/9] test --- workflow/rules/read_clipping.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/read_clipping.smk b/workflow/rules/read_clipping.smk index 40901ae87..4502dfcbb 100644 --- a/workflow/rules/read_clipping.smk +++ b/workflow/rules/read_clipping.smk @@ -21,7 +21,7 @@ rule samtools_sort: "0.74.0/bio/samtools/sort" -rule bed_to_bedpe: +rule bed_to_tsv: input: check_bed_for_URL(config["preprocessing"]["amplicon-primers"]), output: