-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgenemark.wdl
executable file
·142 lines (131 loc) · 3.57 KB
/
genemark.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
version 1.0
workflow genemark {
input {
File imgap_input_fasta
String imgap_project_id
String imgap_project_type
String container
}
if(imgap_project_type == "isolate") {
call gm_isolate {
input:
input_fasta = imgap_input_fasta,
project_id = imgap_project_id,
container=container
}
}
if(imgap_project_type == "metagenome") {
call gm_meta {
input:
input_fasta = imgap_input_fasta,
project_id = imgap_project_id,
container=container
}
}
call clean_and_unify {
input:
iso_genes_fasta = gm_isolate.genes,
meta_genes_fasta = gm_meta.genes,
iso_proteins_fasta = gm_isolate.proteins,
meta_proteins_fasta = gm_meta.proteins,
iso_gff = gm_isolate.gff,
meta_gff = gm_meta.gff,
project_id = imgap_project_id,
container=container
}
output {
File gff = clean_and_unify.gff
File genes = clean_and_unify.genes
File proteins = clean_and_unify.proteins
}
}
task gm_isolate {
input {
String bin="/opt/omics/bin/gms2.pl"
File input_fasta
String project_id
String prefix=sub(project_id, ":", "_")
String container
}
command <<<
set -euo pipefail
~{bin} --seq ~{input_fasta} --genome-type auto \
--output ~{prefix}_genemark.gff --format gff \
--fnn ~{prefix}_genemark_genes.fna \
--faa ~{prefix}_genemark_proteins.faa
>>>
runtime {
time: "1:00:00"
memory: "86G"
docker: container
}
output {
File gff = "~{prefix}_genemark.gff"
File genes = "~{prefix}_genemark_genes.fna"
File proteins = "~{prefix}_genemark_proteins.faa"
}
}
task gm_meta {
input {
String bin="/opt/omics/bin/gmhmmp2"
String model="/opt/omics/programs/gms2_linux_64/mgm_11.mod"
File input_fasta
String project_id
String prefix=sub(project_id, ":", "_")
String container
}
command <<<
set -euo pipefail
~{bin} --Meta ~{model} --incomplete_at_gaps 30 \
-o ~{prefix}_genemark.gff \
--format gff --NT ~{prefix}_genemark_genes.fna \
--AA ~{prefix}_genemark_proteins.faa --seq ~{input_fasta}
>>>
runtime {
time: "1:00:00"
memory: "86G"
docker: container
}
output {
File gff = "~{prefix}_genemark.gff"
File genes = "~{prefix}_genemark_genes.fna"
File proteins = "~{prefix}_genemark_proteins.faa"
}
}
task clean_and_unify {
input {
File? iso_genes_fasta
File? meta_genes_fasta
File? iso_proteins_fasta
File? meta_proteins_fasta
File? iso_gff
File? meta_gff
String unify_bin="/opt/omics/bin/structural_annotation/unify_gene_ids.py"
String project_id
String prefix=sub(project_id, ":", "_")
String container
}
command <<<
set -ueo pipefail
sed -i 's/\*/X/g' ~{iso_proteins_fasta} ~{meta_proteins_fasta}
~{unify_bin} ~{iso_gff} ~{meta_gff} \
~{iso_genes_fasta} ~{meta_genes_fasta} \
~{iso_proteins_fasta} ~{meta_proteins_fasta}
mv ~{iso_proteins_fasta} . 2> /dev/null
mv ~{meta_proteins_fasta} . 2> /dev/null
mv ~{iso_genes_fasta} . 2> /dev/null
mv ~{meta_genes_fasta} . 2> /dev/null
mv ~{iso_gff} . 2> /dev/null
mv ~{meta_gff} . 2> /dev/null
>>>
runtime {
time: "1:00:00"
memory: "86G"
docker: container
}
output {
File gff = "~{prefix}_genemark.gff"
File genes = "~{prefix}_genemark_genes.fna"
File proteins = "~{prefix}_genemark_proteins.faa"
}
}