-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfasta.hpp
104 lines (83 loc) · 2.66 KB
/
fasta.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#ifndef __FASTA_HPP__
#define __FASTA_HPP__
#include <string>
#include <vector>
using namespace std;
const long MAX_GENOME = 1000000000;
const long MAX_THREADCONTAINER = 10000000;
const long MAX_PARTNUMBER = 500;
const long MAX_PARTNUMBERFORPERFECT = 40000;
const long PART_BASE = 1000000; // 1MB
// Clustering information.
struct hit
{
long id;
double identity;
char strand;
};
// For single genome search.
struct Genome
{
long size; //genome length
long id; //index
string descript; // genome name
string cont; // genome content
};
// Total genomes info container.
struct GenomeClustInfo
{
long size; //genome length
long id; //index
bool rep; // if it is representive of clustering
double mumindex; //another index
string descript; //genome name
vector<hit> clusters, clusterunits;
};
void filter_n(string &seq_rc);
// Reverse complement sequence.
void reverse_complement(string &seq_rc, bool nucleotides_only);
// Trim sequence.
void trim(string &line, long &start, long &end);
void load_fasta(string filename,
string &S,
vector<string> &descr,
vector<long> &startpos);
void load_part_genomes(string filename,
vector<Genome> &partgenomes,
vector<GenomeClustInfo> &totalgenomes,
long previous,
long number);
void load_part_genomes_internal(string filename,
vector<Genome> &partgenomes,
vector<GenomeClustInfo> &totalgenomes,
long previous,
int &number,
long totalsize,
bool &ifend,
int memiden);
void load_part_genomes_all(string filename, vector<Genome> &partgenomes);
// Load part genomes into memory between parts.
void load_part_genomes_mem(vector<Genome> &allpartgenomes,
vector<Genome> &partgenomes,
vector<GenomeClustInfo> &totalgenomes,
long previous,
long number);
// Load part genomes into memory.
void load_part_genomes_internal_mem(vector<Genome> &allpartgenomes,
vector<Genome> &partgenomes,
vector<GenomeClustInfo> &totalgenomes,
long previous,
int &number,
long totalsize,
bool &ifend,
int memiden);
void test_part(vector<Genome> &partgenomes);
// Make block for Suffix Array construction.
void make_block_ref(vector<Genome> &partgenomes,
string &S,
vector<GenomeClustInfo> &totalgenomes,
vector<long> &descr,
vector<long> &startpos);
// Load total genomes.
void load_total_genomes(string filename, vector<GenomeClustInfo> &totalgenomes);
#endif // __FASTA_HPP__