-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsuffix_automaton.cpp
137 lines (116 loc) · 4.22 KB
/
suffix_automaton.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include "test_utils.hpp"
#include "linear/matrix.hpp"
#include "strings/suffix_automaton.hpp"
#include "strings/sparse_suffix_automaton.hpp"
#include "strings/vector_suffix_automaton.hpp"
void speed_test_build() {
map<pair<string, int>, string> table;
for (int scale = 10; scale <= 18; scale++) {
START_ACC3(gsa, ssa, vsa);
LOOP_FOR_DURATION_OR_RUNS_TRACKED (2s, now, 10'000, runs) {
print_time(now, 2s, "speed test build {}", 1 << scale);
string s = rand_string(1 << scale, 'a', 'z');
START(gsa);
suffix_automaton gsa(s);
ADD_TIME(gsa);
START(ssa);
sparse_suffix_automaton ssa(s);
ADD_TIME(ssa);
START(vsa);
vector_suffix_automaton vsa(s);
ADD_TIME(vsa);
}
table[{"gsa", 1 << scale}] = FORMAT_EACH(gsa, runs);
table[{"ssa", 1 << scale}] = FORMAT_EACH(ssa, runs);
table[{"vsa", 1 << scale}] = FORMAT_EACH(vsa, runs);
}
print_time_table(table, "Suffix automaton build");
}
void speed_test_contains() {
map<pair<string, int>, string> table;
for (int scale = 10; scale <= 18; scale++) {
int len = 1 << scale;
string s = rand_string(len, 'a', 'z');
START_ACC3(gsa, ssa, vsa);
suffix_automaton gsa(s);
sparse_suffix_automaton ssa(s);
vector_suffix_automaton vsa(s);
LOOP_FOR_DURATION_OR_RUNS_TRACKED (2s, now, 250'000, runs) {
print_time(now, 2s, "speed test contains {} {}", 1 << scale, len);
string pat = rand_string(len / 64, 'a', 'z');
START(gsa);
auto c0 = gsa.count_matches(pat);
ADD_TIME(gsa);
START(ssa);
auto c1 = ssa.count_matches(pat);
ADD_TIME(ssa);
START(vsa);
auto c2 = vsa.count_matches(pat);
ADD_TIME(vsa);
assert(c0 == c1 && c0 == c2);
}
table[{"gsa", 1 << scale}] = FORMAT_EACH(gsa, runs);
table[{"ssa", 1 << scale}] = FORMAT_EACH(ssa, runs);
table[{"vsa", 1 << scale}] = FORMAT_EACH(vsa, runs);
}
print_time_table(table, "Suffix automaton count matches");
}
template <typename SA = suffix_automaton<>>
void stress_test_suffix_automaton() {
int errors = 0;
for (int i = 0; i < 1000; i++) {
string s = rand_string(10000, 'a', 'z');
SA sa(s);
for (int j = 0; j < 200; j++) {
errors += !sa.contains(s.substr(intd(0, 2000)(mt), intd(100, 500)(mt)));
}
}
for (int n = 1; n < 20; n++) {
string s(n, 'a');
SA sa(s);
int got = sa.count_distinct_substrings();
errors += got != n + 1;
}
for (int n = 0; n < 30; n++) {
for (int m = 1; m < 30; m++) {
string s = string(n, 'a') + string(m, 'b') + string(n, 'a');
SA sa(s);
int got = sa.count_distinct_substrings();
int expected = n * n + 2 * n * m + m + n + 1;
errors += got != expected;
}
}
for (int n = 1; n < 25; n++) {
for (int m = 1; m < 25; m++) {
for (int k = 1; k < 25; k++) {
string s = string(n, 'a') + string(m, 'b') + string(n, 'a') +
string(k, 'c') + string(n, 'a');
SA sa(s);
int got = sa.count_distinct_substrings();
int expected = 3 * n * (n + k + m) + n + (m + 1) * (k + 1);
errors += got != expected;
got = sa.count_matches("aaa");
expected = 3 * max(n - 2, 0);
errors += got != expected;
}
}
}
for (int n = 1; n < 20; n++) {
string s(n, '\0');
iota(begin(s), end(s), 'a');
SA sa(s);
int got = sa.count_distinct_substrings();
errors += got != 1 + n * (n + 1) / 2;
}
if (errors > 0) {
println("ERRORS: {}", errors);
}
}
int main() {
RUN_SHORT(stress_test_suffix_automaton<suffix_automaton<>>());
RUN_SHORT(stress_test_suffix_automaton<sparse_suffix_automaton<>>());
RUN_SHORT(stress_test_suffix_automaton<vector_suffix_automaton<>>());
RUN_BLOCK(speed_test_build());
RUN_BLOCK(speed_test_contains());
return 0;
}