-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathSnakefile
116 lines (101 loc) · 3.28 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from snakemake.utils import min_version
min_version("8.9.0")
##################################################
# Config
##################################################
configfile: "config.yml"
##################################################
# Python modules
##################################################
from os.path import join, basename, dirname, exists, splitext , abspath
from tempfile import gettempdir
##################################################
# Some settings
##################################################
OUTDIR = config['outdir']
OUTDIR_filtering = join(config['outdir'], "filtering")
##################################################
# Rule includes
##################################################
# Data collection
include: "rules/data_collection.smk"
include: "rules/metadata.smk"
include: "rules/fasta.smk"
include: "rules/deduplication.smk"
# Filtering
include: "rules/chromosomal.smk"
# Embedding
include: "rules/mashdb.smk"
# Sequence annotation
include: "rules/viral.smk"
include: "rules/bgc.smk"
include: "rules/amr.smk"
include: "rules/features.smk"
include: "rules/proteins.smk"
include: "rules/typing.smk"
# Metadata annotation
include: "rules/ecosystem.smk"
include: "rules/disease.smk"
include: "rules/locations.smk"
## Downstream analysis
include: "rules/krona.smk"
include: "rules/module_downstream.smk"
##################################################
# ALL rule
##################################################
rule all:
input:
### Data Collection
rules.nuccore_queries.output,
rules.biosample_queries.output,
rules.biosample_extraction.output,
rules.join_NABT.output,
rules.nuccorechr_queries.output,
rules.nuccorechr_fasta_join.output,
rules.deduplication.output,
rules.fasta_queries.output,
rules.rmlst_api.output,
### Filtering
rules.rmlst_blastn.output,
rules.chr_putative_ids.output,
### VIRAL
# rules.virsorter2_2.output,
# rules.dramv_annotate.output,
# rules.dramv_summarize.output,
# rules.viral_curation.output,
### BGC
# rules.bgc_split.output,
# rules.antismash_join.output,
### AMR
# rules.seqkit_split_amr.output,
# rules.AMRFinderPlus_getdb.output,
# rules.AMRFinderPlus_join.output,
# rules.rgi_getdb.output,
# rules.rgi_run.output,
# rules.hamronize.output,
### Features
# rules.features_gbk.output,
# rules.features_json.output,
rules.ipg_join.output,
# rules.eggnog_join.output,
### Typing
# rules.mob_typer.output,
# rules.pmlst_join.output,
# ### Ecosystem
rules.ecosystem_taxid.output,
### Disease Ontology
rules.disease_infer_check.output,
### Location
rules.locations_infer.output,
### Downstream analyses
rules.bio_table.output,
rules.taxid_table.output,
rules.final_fasta.output,
rules.ecopaths_table.output,
rules.krona_taxonomy.output,
rules.createmash.output,
# rules.nucc_table.output,
# rules.dstream_summary.output,
# rules.dstream_compare.output,
# rules.dstream_server_data.output,
# rules.dstream_blastndb.output