forked from yangao07/abPOA
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.c
More file actions
128 lines (113 loc) · 5.01 KB
/
example.c
File metadata and controls
128 lines (113 loc) · 5.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* example.c libabpoa usage example
To compile:
gcc -g example.c -I ./include -L ./lib -labpoa -lz -o example
or:
gcc -g example.c -I ./include ./lib/libabpoa.a -lz -o example
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "include/abpoa.h"
// AaCcGgTtNn ==> 0,1,2,3,4
unsigned char _nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
};
int main(void) {
int i, j, n_seqs = 10;
char seqs[10][100] = {
"CGTCAATCTATCGAAGCATACGCGGGCAGAGCCGAAGACCTCGGCAATCCA",
"CCACGTCAATCTATCGAAGCATACGCGGCAGCCGAACTCGACCTCGGCAATCAC",
"CGTCAATCTATCGAAGCATACGCGGCAGAGCCCGGAAGACCTCGGCAATCAC",
"CGTCAATGCTAGTCGAAGCAGCTGCGGCAGAGCCGAAGACCTCGGCAATCAC",
"CGTCAATCTATCGAAGCATTCTACGCGGCAGAGCCGACCTCGGCAATCAC",
"CGTCAATCTAGAAGCATACGCGGCAAGAGCCGAAGACCTCGGCCAATCAC",
"CGTCAATCTATCGGTAAAGCATACGCTCTGTAGCCGAAGACCTCGGCAATCAC",
"CGTCAATCTATCTTCAAGCATACGCGGCAGAGCCGAAGACCTCGGCAATC",
"CGTCAATGGATCGAGTACGCGGCAGAGCCGAAGACCTCGGCAATCAC",
"CGTCAATCTAATCGAAGCATACGCGGCAGAGCCGTCTACCTCGGCAATCACGT"
};
// initialize variables
abpoa_t *ab = abpoa_init();
abpoa_para_t *abpt = abpoa_init_para();
// alignment parameters
// abpt->align_mode = 0; // 0:global 1:local, 2:extension
// abpt->match = 2; // match score
// abpt->mismatch = 4; // mismatch penalty
// abpt->gap_mode = ABPOA_CONVEX_GAP; // gap penalty mode
// abpt->gap_open1 = 4; // gap open penalty #1
// abpt->gap_ext1 = 2; // gap extension penalty #1
// abpt->gap_open2 = 24; // gap open penalty #2
// abpt->gap_ext2 = 1; // gap extension penalty #2
// gap_penalty = min{gap_open1 + gap_len * gap_ext1, gap_open2 + gap_len * gap_ext2}
// abpt->bw = 10; // extra band used in adaptive banded DP
// abpt->bf = 0.01;
// output options
abpt->out_msa = 1; // generate Row-Column multiple sequence alignment(RC-MSA), set 0 to disable
abpt->out_cons = 1; // generate consensus sequence, set 0 to disable
abpt->w = 6, abpt->k = 9; abpt->min_w = 10; // minimizer-based seeding and partition
abpt->progressive_poa = 1;
abpoa_post_set_para(abpt);
// collect sequence length, trasform ACGT to 0123
int *seq_lens = (int*)malloc(sizeof(int) * n_seqs);
uint8_t **bseqs = (uint8_t**)malloc(sizeof(uint8_t*) * n_seqs);
for (i = 0; i < n_seqs; ++i) {
seq_lens[i] = strlen(seqs[i]);
bseqs[i] = (uint8_t*)malloc(sizeof(uint8_t) * seq_lens[i]);
for (j = 0; j < seq_lens[i]; ++j)
bseqs[i][j] = _nt4_table[(int)seqs[i][j]];
}
// 1. output to stdout
fprintf(stdout, "=== output to stdout ===\n");
// perform abpoa-msa
abpoa_msa(ab, abpt, n_seqs, NULL, seq_lens, bseqs, stdout, NULL, NULL, NULL, NULL, NULL, NULL);
// 2. variables to store result
uint8_t **cons_seq; int **cons_cov, *cons_l, cons_n=0;
uint8_t **msa_seq; int msa_l=0;
// perform abpoa-msa
ab->abs->n_seq = 0; // To re-use ab, n_seq needs to be set as 0
abpoa_msa(ab, abpt, n_seqs, NULL, seq_lens, bseqs, NULL, &cons_seq, &cons_cov, &cons_l, &cons_n, &msa_seq, &msa_l);
fprintf(stdout, "=== output to variables ===\n");
for (i = 0; i < cons_n; ++i) {
fprintf(stdout, ">Consensus_sequence\n");
for (j = 0; j < cons_l[i]; ++j)
fprintf(stdout, "%c", "ACGTN"[cons_seq[i][j]]);
fprintf(stdout, "\n");
}
fprintf(stdout, ">Multiple_sequence_alignment\n");
for (i = 0; i < n_seqs; ++i) {
for (j = 0; j < msa_l; ++j) {
fprintf(stdout, "%c", "ACGTN-"[msa_seq[i][j]]);
}
fprintf(stdout, "\n");
}
if (cons_n) {
for (i = 0; i < cons_n; ++i) {
free(cons_seq[i]); free(cons_cov[i]);
} free(cons_seq); free(cons_cov); free(cons_l);
}
if (msa_l) {
for (i = 0; i < n_seqs; ++i) free(msa_seq[i]); free(msa_seq);
}
/* generate DOT partial order graph plot */
abpt->out_pog = strdup("example.png"); // dump parital order graph to file
if (abpt->out_pog != NULL) abpoa_dump_pog(ab, abpt);
for (i = 0; i < n_seqs; ++i) free(bseqs[i]); free(bseqs); free(seq_lens);
abpoa_free(ab); abpoa_free_para(abpt);
return 0;
}