-
Notifications
You must be signed in to change notification settings - Fork 1
/
VarTools.py
545 lines (506 loc) · 26.3 KB
/
VarTools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
#!/usr/bin/python3
# -*- coding:utf-8 -*-
##################################################
# Author: Shi-Yuan Tong
# Email: [email protected]
# Created Time: 2021-5-14
# log:
# 21.05.31: 根据新的框架修改,为了拓展性、适配性更好
# 21.08.31: 完善框架,修改了一些已知错误
# 21.09.03: 修改传入参数,添加GRIPT版case-control (未根据源码而是根据论文复现,会存在与原工具的结果差异)
# 21.09.08: GRIPT方法的case-control测试完毕,开始复现TRAPD版的burden
# 21.10.08: 重构 bam qc
# 21.10.14: 引入高速模式,调整传参模式, 重构部分代码
# 21.10.29: 集成多款calling
##################################################
import os
import sys
import time
import argparse
from script.function import *
from script.case_control import build_snvdb
def format_time(seconds):
if seconds < 60:
tim = '%d s.' % seconds
elif seconds < 3600:
m = seconds // 60
s = seconds % 60
tim = '%d min %d s.' % (m, s)
elif seconds < 86400:
h = seconds // 3600
m = (seconds % 3600) // 60
s = (seconds % 3600) % 60
tim = '%d h %d min %d s.' % (h, m, s)
else:
d = seconds // 86400
h = (seconds % 86400) // 3600
m = (seconds % 86400) % 3600 // 60
s = (seconds % 86400) % 3600 % 60
tim = '%d day %d h %d min %d s.' % (d, h, m, s)
return tim
def check_bed(bed):
if bed and not os.path.isfile(bed):
sys.exit('[ Error: Can not find bed file!]')
def f2v_args(args):
argsd = {}
if not args.in_dir:
sys.exit('[ Error: Parameter is incomplete ! ]')
else:
argsd['in_dir'] = args.in_dir
argsd['out_dir'] = args.out_dir
argsd['bed'] = args.bed
check_bed(argsd['bed'])
argsd['prefix'] = args.prefix
argsd['vcf'] = args.vcf
argsd['fastqc'] = args.fastqc
argsd['qualimap'] = args.qualimap
argsd['fast_mark_dup'] = args.fast_mark_dup
argsd['rm_dup'] = args.rm_dup
argsd['fast_rm_dup'] = args.fast_rm_dup
argsd['thread'] = args.thread
argsd['tmp_dir'] = args.tmp_dir
argsd['keep_tmp'] = args.keep_tmp
argsd['config'] = args.config
argsd['gender_rate'] = args.gender_rate
return argsd
def bqc_args(args):
argsd = {}
if not args.bam:
sys.exit('[ Error: Parameter is incomplete ! ]')
else:
argsd['bam'] = args.bam
argsd['out_dir'] = args.out_dir
argsd['bed'] = args.bed
check_bed(argsd['bed'])
argsd['qualimap'] = args.qualimap
argsd['thread'] = args.thread
argsd['tmp_dir'] = args.tmp_dir
argsd['keep_tmp'] = args.keep_tmp
argsd['gender_rate'] = args.gender_rate
return argsd
def tGT_args(args):
argsd = {}
if not args.proband or not args.father or not args.mother:
sys.exit('[ Error: Trio sample incomplete ! ]')
else:
argsd['p_gvcf'] = os.path.realpath(args.proband)
argsd['f_gvcf'] = os.path.realpath(args.father)
argsd['m_gvcf'] = os.path.realpath(args.mother)
if args.sibling:
siblings = args.sibling
argsd['s_gvcfs'] = [os.path.realpath(_.strip(' ')) for _ in siblings.split(',')]
print('[ Msg: find %d sibling. ]' % len(argsd['s_gvcfs']))
else:
argsd['s_gvcfs'] = ''
argsd['out_dir'] = args.out_dir
argsd['bed'] = args.bed
check_bed(argsd['bed'])
argsd['prefix'] = args.prefix
argsd['tmp_dir'] = args.tmp_dir
argsd['keep_tmp'] = args.keep_tmp
argsd['config'] = args.config
return argsd
def sGT_args(args):
argsd = {}
if not args.gvcf:
sys.exit('[ Error: Sample incomplete ! ]')
else:
argsd['gvcf'] = os.path.realpath(args.gvcf)
argsd['out_dir'] = args.out_dir
argsd['bed'] = args.bed
check_bed(argsd['bed'])
argsd['tmp_dir'] = args.tmp_dir
argsd['keep_tmp'] = args.keep_tmp
argsd['prefix'] = args.prefix
argsd['config'] = args.config
argsd['caller'] = args.caller
argsd['novcfqc'] = args.noqc
argsd['noflt'] = args.noflt
return argsd
def fp_args(args):
argsd = {}
if not args.in_dir:
sys.exit('[ Error: Parameter is incomplete ! ]')
else:
argsd['in_dir'] = args.in_dir
argsd['outfile'] = args.outfile
argsd['file_type'] = args.file_type
argsd['overlap_rate'] = args.overlap_rate
argsd['tmp_dir'] = args.tmp_dir
return argsd
def cc_args(args):
argsd = {}
if (args.case or args.case_matrix) and (args.control or args.control_matrix):
argsd['case'] = args.case
argsd['case_matrix'] = args.case_matrix
argsd['control'] = args.control
argsd['control_matrix'] = args.control_matrix
else:
sys.exit('[ Error: Parameter is incomplete ! ]')
argsd['out_dir'] = args.out_dir
argsd['cutoff'] = args.cutoff
argsd['mode'] = args.mode
argsd['fp'] = args.fp
argsd['gene'] = args.gene
argsd['score'] = args.score
if args.gene and args.score:
print('[ Msg: Use the metrics that user set to calculate case-control. ]')
argsd['cc_default'] = False
elif not args.gene and not args.score:
argsd['cc_default'] = True
print('[ Msg: Use default method to calculate case-control. ]')
else:
print('[ Warn: Parameter is incomplete ! and use default method to calculate case-control. ]')
argsd['cc_default'] = True
argsd['config'] = args.config
return argsd
def gd_args(args):
argsd = {}
if not args.bam:
sys.exit('[ Error: Parameter is incomplete ! ]')
else:
argsd['bam'] = args.bam
argsd['bed'] = args.bed
check_bed(argsd['bed'])
argsd['thread'] = args.thread
argsd['tmp_dir'] = args.tmp_dir
argsd['rate'] = args.rate
return argsd
def call_args(args):
argsd = {}
if not args.bam:
sys.exit('[ Error: Sample incomplete ! ]')
else:
argsd['bam'] = os.path.realpath(args.bam)
argsd['out_dir'] = args.out_dir
argsd['bed'] = args.bed
check_bed(argsd['bed'])
argsd['thread'] = args.thread
argsd['tmp_dir'] = args.tmp_dir
argsd['keep_tmp'] = args.keep_tmp
argsd['prefix'] = args.prefix
argsd['config'] = args.config
argsd['caller'] = args.caller
argsd['novcfqc'] = args.noqc
argsd['noflt'] = args.noflt
return argsd
def anno_args(args):
argsd = {}
if not args.invcf:
sys.exit('[ Error: Args incomplete ! ]')
else:
argsd['vcf'] = os.path.realpath(args.invcf)
argsd['out_dir'] = args.out_dir
argsd['thread'] = args.thread
argsd['keep_tmp'] = args.keep_tmp
if args.prefix:
argsd['prefix'] = args.prefix
else:
argsd['prefix'] = os.path.basename(argsd['vcf']).split('.vcf')[0]
argsd['config'] = args.config
argsd['mode'] = args.mode
return argsd
def ta_args(args):
argsd = {}
if not args.invcf:
sys.exit('[ Error: Args incomplete ! ]')
else:
argsd['vcf'] = os.path.realpath(args.invcf)
argsd['out_dir'] = args.out_dir
argsd['thread'] = args.thread
argsd['keep_tmp'] = args.keep_tmp
if args.prefix:
argsd['prefix'] = args.prefix
else:
argsd['prefix'] = os.path.basename(argsd['vcf']).split('.vcf')[0]
argsd['config'] = args.config
argsd['mode'] = args.mode
argsd['pn'] = args.sample_name
argsd['fn'] = args.father_name
argsd['mn'] = args.mother_name
return argsd
def ana_args():
description = '=' * 77 + '\nVarTools 0.1.0 20211011\nWorkflow of WGS/WES analysis.\n' + '=' * 77
print(description)
func_description = '''
Usage: VarTools.py <command> [options]
function of VarTools:
(1) f2v: analysis from fastq to gvcf.
(2) tGT: from gvcf created by GATK to vcf in trio mode.
(3) sGT: from gvcf created by GATK to vcf in single mode.
(4) fp: create false positive database from vcf files or avinput files.
(5) cc: case-control analysis with GRIPT.
(6) bqc: bam quality check.
(7) gd: gender identify.
(8) call: call variants from bam.
(9) tSV: call trio SV with clinSV (only for WGS).
(10) sSV: call single SV with clinSV (only for WGS).
(11) tA: trio analysis.
(12) sA: single case analysis.
(13) bt: Burden testing with TRAPD.
(14) anno: annotation for small variants.
To get help on a particular command, call it with -h/--help.
'''
function = {
'f2v': 'analysis from fastq to gvcf.',
'tGT': 'from gvcf created by GATK to vcf in trio mode.',
'sGT': 'from gvcf created by GATK to vcf in single mode.',
'bqc': 'bam quality check.',
'fp': 'create false positive database from vcf files or avinput files.',
'cc': 'case-control analysis with GRIPT.',
'bt': 'Burden testing with TRAPD.',
'tSV': 'call trio SV with clinSV (only for WGS).',
'sSV': 'call single SV with clinSV (only for WGS).',
'sA': 'single case analysis.',
'tA': 'trio analysis.',
'gd': 'identify gender from bam coverage.',
'call': 'call variants from bam.',
'anno': 'annotation for small variants.'
}
if len(sys.argv) == 1 or sys.argv[1] in ['--help', 'help', '-h']:
sys.exit(func_description)
elif sys.argv[1] == 'f2v':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s f2v [options] -i INDIR')
parser.description = function['f2v']
parser.add_argument('f2v')
parser.add_argument('-i', '--in_dir', required=True, help='directory of single sample raw data.')
parser.add_argument('-o', '--out_dir', default='./', help='output directory of result, [./].')
parser.add_argument('-b', '--bed', default=False, help='regions of interest.')
parser.add_argument('-p', '--prefix', default=False,
help='prefix of output file, if not, will use input directory name.')
parser.add_argument('--vcf', action="store_true", help='Whether to generate vcf file.')
parser.add_argument('--fastqc', action="store_true",
help='in default, fastp will give reports, '
'set it if you want fastqc to check fastq with raw/clean data.')
parser.add_argument('--qualimap', action="store_true",
help='bamQC with qualimap. but maybe it is slowly with large bam.')
parser.add_argument('--fast_mark_dup', action='store_true',
help='use sambamba to mark duplication, but when bam is very big, may get something wrong.')
parser.add_argument('--rm_dup', action='store_true',
help='remove duplication rather than mark.')
parser.add_argument('--fast_rm_dup', action='store_true',
help='use fastp to remove duplication, and will skip mark duplication in follow-up steps. '
'if --fast_rm_dup option is enabled, '
'then --fast_mark_dup and --rm_dup options are ignored.')
parser.add_argument('--tmp_dir', default=False,
help='temp directory, if not, it will create in the result directory.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('-t', '--thread', default=1, type=int, help='thread of component softwares, [1].')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
parser.add_argument('-r', '--gender_rate', default=20, type=float,
help='coverage rate of X/Y for calculate gender [20].')
args = parser.parse_args()
args_dict = f2v_args(args)
elif sys.argv[1] == 'bqc':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s bqc [options] -b in.bam --bed bed')
parser.description = function['bqc']
parser.add_argument('bqc')
parser.add_argument('-b', '--bam', required=True, help='bam file for qc. (after sort and index).')
parser.add_argument('--bed', required=True,
help='regions of interest. If WGS file, can use bed in \'lib\' or set by your self.')
parser.add_argument('-o', '--out_dir', default='./result', help='output directory of result, [./result].')
parser.add_argument('--qualimap', action="store_true",
help='bamQC with qualimap. but maybe it is slowly with large bam.')
parser.add_argument('-t', '--thread', default=1, type=int, help='thread of component softwares, [1].')
parser.add_argument('--tmp_dir', default=False,
help='temp directory, if not, it will create in the result directory.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('-r', '--gender_rate', default=20, type=float,
help='coverage rate of X/Y for calculate gender [20].')
args = parser.parse_args()
args_dict = bqc_args(args)
elif sys.argv[1] == 'tGT':
parser = argparse.ArgumentParser(prog='VarTool.py',
usage='%(prog)s tGT [options] -p g.vcf.gz -f g.vcf.gz -m g.vcf.gz')
parser.description = function['tGT']
parser.add_argument('tGT')
parser.add_argument('-p', '--proband', required=True, help='g.vcf of proband.')
parser.add_argument('-f', '--father', required=True, help='g.vcf of father.')
parser.add_argument('-m', '--mother', required=True, help='g.vcf of mother.')
parser.add_argument('-s', '--sibling', help='g.vcf of siblings, more than one use \',\' to split.')
parser.add_argument('-o', '--out_dir', default='./result', help='output directory of result, [./result].')
parser.add_argument('-b', '--bed', default=False, help='regions of interest.')
parser.add_argument('--prefix', default=False,
help='prefix of output file.[].')
parser.add_argument('--tmp_dir', default=False,
help='temp directory, if not, it will create in the result directory.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
args = parser.parse_args()
args_dict = tGT_args(args)
elif sys.argv[1] == 'sGT':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s sGT [options] -g g.vcf.gz')
parser.description = function['sGT']
parser.add_argument('sGT')
parser.add_argument('-g', '--gvcf', required=True, help='directory of proband raw data.')
parser.add_argument('-o', '--out_dir', default='./result', help='output directory of result, [./result].')
parser.add_argument('-b', '--bed', default=False, help='regions of interest.')
parser.add_argument('-c', '--caller', default='gatk_hard', help='caller gatk_hard/vqsr, [gatk_hard].')
parser.add_argument('-p', '--prefix', default='', help='prefix of output file.[].')
parser.add_argument('--tmp_dir', default=False,
help='temp directory, if not, it will create in the result directory.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
parser.add_argument('--noqc', action='store_true', help='do not vcf quality check.')
parser.add_argument('--noflt', action='store_true', help='do not filter raw vcf by base line.')
args = parser.parse_args()
args_dict = sGT_args(args)
elif sys.argv[1] == 'fp':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s fp [options] -i INDIR')
parser.description = function['fp']
parser.add_argument('fp')
parser.add_argument('-i', '--in_dir', required=True, help='directory of single sample raw data.')
parser.add_argument('-o', '--outfile', default='fp.txt',
help='out file of result, if exist, will add result to end, [fp.txt].')
parser.add_argument('--file_type', default='vcf',
help='file type , vcf/region. region:chr,start,end,ref,alt, 1-based, split by tab. [vcf].')
parser.add_argument('--overlap_rate', default=0.5, type=float,
help='overlap rate of variants to build false positive database, [0.5].')
parser.add_argument('--tmp_dir', default='./.tmp_dir_for_fp', help='temp directory [./.tmp_dir_for_fp].')
args = parser.parse_args()
args_dict = fp_args(args)
elif sys.argv[1] == 'cc':
parser = argparse.ArgumentParser(prog='VarTool.py',
usage='%(prog)s cc [options] -c/-m case.dir/case.matrix '
'-C/-M control.dir/control.matrix')
parser.description = function['cc']
parser.add_argument('cc')
parser.add_argument('-c', '--case', help='input directory of case.')
parser.add_argument('-C', '--control', help='input directory of control.')
parser.add_argument('-m', '--case_matrix', help='matrix of case create by this program.')
parser.add_argument('-M', '--control_matrix', help='matrix of control create by this program.')
parser.add_argument('-o', '--out_dir', default='./result', help='output directory of result, [./result].')
parser.add_argument('-t', '--cutoff', default=0, type=float, help='variant score cutoff, [0].')
parser.add_argument('--mode', default='AD', help='mode of disease, AD/AR, [AD].')
parser.add_argument('--fp', help='the false positive database that filted.')
parser.add_argument('--gene', help='the columns name of gene in file.')
parser.add_argument('--score', help='the columns name of metrics score in file.')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
args = parser.parse_args()
args_dict = cc_args(args)
elif sys.argv[1] == 'gd':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s gd [options] -b bam -d bed')
parser.description = function['gd']
parser.add_argument('gd')
parser.add_argument('-b', '--bam', required=True, help='bam file for gender identify. (after sort and index).')
parser.add_argument('-d', '--bed', required=True,
help='regions of interest. If WGS file, can use bed in \'lib\' or set by your self.')
parser.add_argument('--tmp_dir', default='./.tmp_dir_for_gd', help='temp directory [./.tmp_dir_for_gd].')
parser.add_argument('-t', '--thread', default=1, type=int, help='thread of component softwares, [1].')
parser.add_argument('-r', '--rate', default=20, type=float, help='coverage rate of X/Y [20].')
args = parser.parse_args()
args_dict = gd_args(args)
elif sys.argv[1] == 'call':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s call [options] -i bam')
parser.description = function['call']
parser.add_argument('call')
parser.add_argument('-i', '--bam', required=True, help='bam file for calling, must be sorted and index.')
parser.add_argument('-o', '--out_dir', default='./result', help='output directory of result, [./result].')
parser.add_argument('-b', '--bed', default=False, help='regions of interest.')
parser.add_argument('-c', '--caller', default='gatk',
help='caller, now support gatk(_hard)/deepvariant/bcftools/vardict/strelka2 [gatk].')
parser.add_argument('-p', '--prefix', default='result', help='prefix of output file.[result].')
parser.add_argument('--tmp_dir', default=False,
help='temp directory, if not, it will create in the result directory.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
parser.add_argument('--noqc', action='store_true', help='do not vcf quality check.')
parser.add_argument('-t', '--thread', default=1, type=int, help='thread of component softwares, [1].')
parser.add_argument('--noflt', action='store_true', help='do not filter raw vcf by base line.')
args = parser.parse_args()
args_dict = call_args(args)
elif sys.argv[1] == 'anno':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s anno [options] -i VCF')
parser.description = function['anno']
parser.add_argument('anno')
parser.add_argument('-i', '--invcf', required=True, help='input vcf file.')
parser.add_argument('-o', '--out_dir', default='./', help='output directory of result, [./].')
parser.add_argument('-p', '--prefix', default=False,
help='prefix of output file, if not, will use input vcf name.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
parser.add_argument('-t', '--thread', default=1, type=int, help='thread of component softwares, [1].')
parser.add_argument('-m', '--mode', default='FA', choices=['FA', 'TA'],
help='two mode TA/FA, TA: annotate all info, FA: filter with annotation,[FA].')
args = parser.parse_args()
args_dict = anno_args(args)
elif sys.argv[1] == 'tA':
parser = argparse.ArgumentParser(prog='VarTool.py', usage='%(prog)s tA [options] -i VCF')
parser.description = function['tA']
parser.add_argument('tA')
parser.add_argument('-i', '--invcf', required=True, help='input vcf file.')
parser.add_argument('-o', '--out_dir', default='./', help='output directory of result, [./].')
parser.add_argument('-p', '--prefix', default=False,
help='prefix of output file, if not, will use input vcf name.')
parser.add_argument('--keep_tmp', action='store_true', help='keep temp directory.')
parser.add_argument('--config', default=False, help='you can change config in \'lib\' or set by your need.')
parser.add_argument('-t', '--thread', default=1, type=int, help='thread of component softwares, [1].')
parser.add_argument('-m', '--mode', default='FA', choices=['FA', 'TA'],
help='two mode TA/FA, TA: annotate all info, FA: filter with annotation,[FA].')
parser.add_argument('--sample_name', default='',
help='sample name of proband in file, if not, chose the first sample.')
parser.add_argument('--father_name', default='',
help='sample name of father in file, if not, chose the name have F/Fa/Father/f/fa/father.')
parser.add_argument('--mother_name', default='',
help='sample name of mother in file, if not, chose the name have M/Mo/Mather/m/mo/mother.')
args = parser.parse_args()
args_dict = ta_args(args)
else:
sys.exit('[ Error: Can not identify the function of <%s>]' % sys.argv[1])
# 脚本所在路径
args_dict['fun'] = sys.argv[1]
args_dict['script_path'] = os.path.split(os.path.realpath(__file__))[0]
return args_dict
def main():
# 设置参数
args = ana_args()
if args['fun'] == 'f2v':
f2v(args['in_dir'], args['out_dir'], args['bed'], args['prefix'],
args['vcf'], args['fastqc'], args['qualimap'],
args['fast_mark_dup'], args['rm_dup'], args['fast_rm_dup'],
args['thread'], args['script_path'], args['config'], args['tmp_dir'], args['keep_tmp'], args['gender_rate'])
elif args['fun'] == 'bqc':
bamQC(args['bam'], args['bed'], args['out_dir'], args['tmp_dir'],
args['script_path'], args['thread'], args['qualimap'], args['keep_tmp'], args['gender_rate'])
elif args['fun'] == 'tGT':
trio_gt(args['p_gvcf'], args['f_gvcf'], args['m_gvcf'], args['s_gvcfs'],
args['out_dir'], args['script_path'], args['config'],
args['keep_tmp'], args['tmp_dir'], args['bed'], args['prefix'])
elif args['fun'] == 'sGT':
single_gt(args['gvcf'], args['out_dir'], args['script_path'], args['bed'], args['tmp_dir'], args['keep_tmp'],
args['prefix'], args['config'], args['caller'], args['novcfqc'], args['noflt'])
elif args['fun'] == 'fp':
rm_tmp = False
if not os.path.isdir(args['tmp_dir']):
rm_tmp = True
os.makedirs(args['tmp_dir'])
tmp_dir = os.path.abspath(args['tmp_dir'])
build_snvdb(args['in_dir'], args['outfile'], tmp_dir,
args['script_path'], args['file_type'], args['overlap_rate'], rm_tmp)
elif args['fun'] == 'cc':
burden_test(args['case'], args['control'], args['case_matrix'], args['control_matrix'],
args['out_dir'], args['fp'], args['mode'], args['cutoff'],
args['cc_default'], args['gene'], args['score'], args['script_path'], args['config'])
elif args['fun'] == 'gd':
gender_identify(args['bam'], args['bed'], args['tmp_dir'], args['thread'], args['script_path'], args['rate'])
elif args['fun'] == 'tA':
trio_filter(args['vcf'], args['prefix'], args['out_dir'],
args['script_path'], args['config'], args['thread'], args['mode'], args['pn'], args['fn'],
args['mn'])
elif args['fun'] == 'sA':
single_filter()
elif args['fun'] == 'call':
variants_call(args['bam'], args['out_dir'], args['caller'], args['bed'], args['prefix'], args['thread'],
args['tmp_dir'], args['keep_tmp'], args['config'], args['script_path'],
args['novcfqc'], args['noflt'])
elif args['fun'] == 'anno':
anno_variants(args['vcf'], args['prefix'], args['out_dir'],
args['script_path'], args['config'], args['thread'], args['mode'])
if __name__ == '__main__':
start_time = time.perf_counter()
main()
end_words = '=' * 77 + '\nThanks for using VarTools! \nYou can report bugs to [email protected]\n' + '=' * 77
end_time = time.perf_counter()
tim = format_time(end_time - start_time)
print('[ Msg: Use time : %s ]' % tim)
print(end_words)