-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathCONST.h
118 lines (84 loc) · 3.31 KB
/
CONST.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// Metal - A fast methylation alignment and calling tool for WGBS data.
// Copyright (C) 2017 Jonas Fischer
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// Jonas Fischer [email protected]
#ifndef CONST_H
#define CONST_H
#include <cstdint>
#include <vector>
#include <array>
#include <limits>
namespace MyConst {
// -------- SET THESE VARIABLES --------
//
//
// maximum read length of the reads in bp
constexpr unsigned int READLEN = 100;
// Number of cores that this program is allowed to occupy at any given point
#define CORENUM 32
// closed interval borders for distances allowed between paired reads
constexpr uint32_t MINPDIST = 50;
constexpr uint32_t MAXPDIST = 550;
// number of chromosomes in organism
constexpr unsigned int CHROMNUM = 24;
// --------------------------------------
// --------------------
// ------INTERNAL------
// --------------------
// seed used for spaced k-mer hashing
const std::vector<bool> SEED = {1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1};
constexpr uint32_t SEEDBITS = 0b11011110111111011111111111111101;
// (more than) size in bp of biggest chromosome in organism
constexpr unsigned int CHROMMAX = 1000000000;
// (more than) number of CpGs in organism
constexpr unsigned int CPGMAX = 100000000;
// number of reads that should be read per batch
// (note that very large values may increase the required amount of RAM drastically)
// recommended is 300 000
constexpr unsigned int CHUNKSIZE = 300000;
// Length of a kmer in bp
// WARNING: THIS PARAMETER MUST EQUAL SEED.size() !!!
constexpr unsigned int KMERLEN = 32;
// Kmer bitmask
constexpr uint32_t KMERMASK = (KMERLEN == 32 ? 0xffffffff : ((uint64_t)1 << KMERLEN) - 1);
// minimum number of k-mers required to test for match
// recommended is 5 for read length 100
constexpr uint16_t QTHRESH = 5;
// size of hash table
// recommended is 1 << 30
constexpr uint64_t HTABSIZE = 1ULL << 30;
// window length for meta CpGs
// recommended is 2048
constexpr unsigned int WINLEN = 2048;
// number of mismatches we allow initially (can be extended by ADDMIS constant)
// recommended is 2
constexpr uint8_t MISCOUNT = 2;
// number of mismatches we allow additionally for shift-and and alignment
// recommended is 4
constexpr uint8_t ADDMIS = 4;
// maximum number of times a k-mer is allowed to occur in the whole genome
// recommended is 1500
constexpr uint64_t KMERCUTOFF = 1500;
// minimum distance for kmercount to prune to best matched kmer
constexpr int32_t KMERDIST = 10;
// Only hash every SKIPMODth reference k-mer
constexpr unsigned int SKIPMOD = 2;
// dummy index for CpGs
constexpr uint32_t CPGDUMMY = std::numeric_limits<uint32_t>::max();
// Checks the usefulness of the set parameters
void sanityChecks();
}
#endif /* CONST_H */