Skip to content

Commit b8aa5c3

Browse files
refactor: logs into compressed file (#159)
* Refactor logs into compressed file * fmt * fmt * fmt * fmt * Additional all-filter fix * fmt
1 parent 1a518b7 commit b8aa5c3

6 files changed

Lines changed: 122 additions & 58 deletions

File tree

workflow/Snakefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ rule all:
7474
"results/{date}/{date}.tar.gz",
7575
date=get_date(),
7676
),
77+
expand(
78+
"logs/{date}_logs.tar.gz",
79+
date=get_date(),
80+
),
7781
output:
7882
touch(
7983
expand(

workflow/rules/outputs_DADA2.smk

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,3 +615,15 @@ rule zip_report:
615615
cp results/{wildcards.date}/{wildcards.date}.tar.gz {params.outpath}
616616
rm -r results/{wildcards.date}/16S-report
617617
"""
618+
619+
620+
rule concatenate_logs:
621+
input:
622+
"results/{date}/{date}.tar.gz",
623+
output:
624+
"logs/{date}_logs.tar.gz",
625+
shell:
626+
"""
627+
tar -czvf {output} logs/{wildcards.date}/
628+
rm -r logs/{wildcards.date}
629+
"""

workflow/rules/outputs_vsearch.smk

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,3 +613,15 @@ rule zip_report:
613613
cp results/{wildcards.date}/{wildcards.date}.tar.gz {params.outpath}
614614
rm -r results/{wildcards.date}/16S-report
615615
"""
616+
617+
618+
rule concatenate_logs:
619+
input:
620+
"results/{date}/{date}.tar.gz",
621+
output:
622+
"logs/{date}_logs.tar.gz",
623+
shell:
624+
"""
625+
tar -czvf {output} logs/{wildcards.date}/
626+
rm -r logs/{wildcards.date}
627+
"""

workflow/rules/reduced_analysis.smk

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,3 +833,15 @@ rule export_parameters:
833833
"../envs/python.yaml"
834834
script:
835835
"../scripts/yaml_to_table.py"
836+
837+
838+
rule concatenate_logs:
839+
input:
840+
"results/{date}/{date}.tar.gz",
841+
output:
842+
"logs/{date}_logs.tar.gz",
843+
shell:
844+
"""
845+
tar -czvf {output} logs/{wildcards.date}/
846+
rm -r logs/{wildcards.date}
847+
"""

workflow/rules/visualisation.smk

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,34 @@ rule visualise_trimmed:
3030
"--verbose 2> {log}"
3131

3232

33-
rule visualise_joined:
34-
input:
35-
"results/{date}/out/joined-seqs.qza",
36-
output:
37-
"results/{date}/visual/joined-seqs.qzv",
38-
log:
39-
"logs/{date}/visualisation/visualise-joined.log",
40-
conda:
41-
"../envs/qiime-only-env.yaml"
42-
shell:
43-
"qiime demux summarize "
44-
"--i-data {input} "
45-
"--o-visualization {output} "
46-
"--verbose 2> {log}"
33+
if config["datatype"] == "SampleData[PairedEndSequencesWithQuality]":
34+
35+
rule visualise_joined:
36+
input:
37+
"results/{date}/out/joined-seqs.qza",
38+
output:
39+
"results/{date}/visual/joined-seqs.qzv",
40+
log:
41+
"logs/{date}/visualisation/visualise-joined.log",
42+
conda:
43+
"../envs/qiime-only-env.yaml"
44+
shell:
45+
"qiime demux summarize "
46+
"--i-data {input} "
47+
"--o-visualization {output} "
48+
"--verbose 2> {log}"
49+
50+
rule unzip_joined:
51+
input:
52+
"results/{date}/visual/joined-seqs.qzv",
53+
output:
54+
temp(directory("results/{date}/visual/joined-seqs")),
55+
log:
56+
"logs/{date}/outputs/unzip-joined.log",
57+
conda:
58+
"../envs/python.yaml"
59+
script:
60+
"../scripts/rename_qzv.py"
4761

4862

4963
rule unzip_samples:
@@ -72,19 +86,6 @@ rule unzip_trimmed:
7286
"../scripts/rename_qzv.py"
7387

7488

75-
rule unzip_joined:
76-
input:
77-
"results/{date}/visual/joined-seqs.qzv",
78-
output:
79-
temp(directory("results/{date}/visual/joined-seqs")),
80-
log:
81-
"logs/{date}/outputs/unzip-joined.log",
82-
conda:
83-
"../envs/python.yaml"
84-
script:
85-
"../scripts/rename_qzv.py"
86-
87-
8889
rule visualise_table:
8990
input:
9091
"results/{date}/out/table-cluster-lengthfilter.qza",

workflow/scripts/complete_filter.py

Lines changed: 54 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,37 +11,45 @@
1111

1212
sys.stderr = open(snakemake.log[0], "w")
1313

14+
config = snakemake.config
15+
datatype = str(config["datatype"])
16+
1417
samples = pd.read_csv(
1518
str(snakemake.input.samples) + "/paired-seqs/data/per-sample-fastq-counts.tsv",
1619
sep="\t",
1720
header=0,
1821
index_col=0,
1922
)
20-
samples.drop(
21-
["reverse sequence count"],
22-
axis=1,
23-
inplace=True,
24-
)
23+
if datatype == "SampleData[PairedEndSequencesWithQuality]":
24+
samples.drop(
25+
["reverse sequence count"],
26+
axis=1,
27+
inplace=True,
28+
)
2529
samples.rename(columns={"forward sequence count": "Raw reads"}, inplace=True)
2630
trimmed = pd.read_csv(
2731
str(snakemake.input.trimmed) + "/trimmed-seqs/data/per-sample-fastq-counts.tsv",
2832
sep="\t",
2933
header=0,
3034
index_col=0,
3135
)
32-
trimmed.drop(
33-
["reverse sequence count"],
34-
axis=1,
35-
inplace=True,
36-
)
36+
if datatype == "SampleData[PairedEndSequencesWithQuality]":
37+
trimmed.drop(
38+
["reverse sequence count"],
39+
axis=1,
40+
inplace=True,
41+
)
3742
trimmed.rename(columns={"forward sequence count": "Reads after trimming"}, inplace=True)
38-
joined = pd.read_csv(
39-
str(snakemake.input.joined) + "/joined-seqs/data/per-sample-fastq-counts.tsv",
40-
sep="\t",
41-
header=0,
42-
index_col=0,
43-
)
44-
joined.rename(columns={"forward sequence count": "Reads after joining"}, inplace=True)
43+
if datatype == "SampleData[PairedEndSequencesWithQuality]":
44+
joined = pd.read_csv(
45+
str(snakemake.input.joined) + "/joined-seqs/data/per-sample-fastq-counts.tsv",
46+
sep="\t",
47+
header=0,
48+
index_col=0,
49+
)
50+
joined.rename(
51+
columns={"forward sequence count": "Reads after joining"}, inplace=True
52+
)
4553

4654
first = pd.read_csv(
4755
str(snakemake.input.first) + "/metadata.tsv", sep="\t", header=0, index_col=0
@@ -92,20 +100,35 @@
92100
)
93101
complete.rename(columns={"0": "Reads after abundance filter"}, inplace=True)
94102

95-
merged_df = pd.concat(
96-
[
97-
samples,
98-
trimmed,
99-
joined,
100-
first,
101-
human,
102-
wo_chimera,
103-
length,
104-
before_abundance,
105-
complete,
106-
],
107-
axis=1,
108-
)
103+
if datatype == "SampleData[PairedEndSequencesWithQuality]":
104+
merged_df = pd.concat(
105+
[
106+
samples,
107+
trimmed,
108+
joined,
109+
first,
110+
human,
111+
wo_chimera,
112+
length,
113+
before_abundance,
114+
complete,
115+
],
116+
axis=1,
117+
)
118+
elif datatype == "SampleData[SequencesWithQuality]":
119+
merged_df = pd.concat(
120+
[
121+
samples,
122+
trimmed,
123+
first,
124+
human,
125+
wo_chimera,
126+
length,
127+
before_abundance,
128+
complete,
129+
],
130+
axis=1,
131+
)
109132

110133
merged_df = merged_df.fillna(0)
111134

0 commit comments

Comments
 (0)