Skip to content

Commit

Permalink
Fuzz RE2::Set and FilteredRE2.
Browse files Browse the repository at this point in the history
Change-Id: Ief0a26ce80211e444580c0a03528d678081e4bef
Reviewed-on: https://code-review.googlesource.com/c/re2/+/61050
Reviewed-by: Alex Chernyakhovsky <[email protected]>
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Feb 20, 2023
1 parent 9049cd2 commit 3a8436a
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 6 deletions.
37 changes: 35 additions & 2 deletions re2/fuzzing/re2_fuzzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
#include <string>
#include <vector>

#include "re2/filtered_re2.h"
#include "re2/re2.h"
#include "re2/regexp.h"
#include "re2/set.h"
#include "re2/walker-inl.h"

using re2::StringPiece;
Expand Down Expand Up @@ -96,7 +98,7 @@ class SubstringWalker : public re2::Regexp::Walker<int> {
};

void TestOneInput(StringPiece pattern, const RE2::Options& options,
StringPiece text) {
RE2::Anchor anchor, StringPiece text) {
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
// Otherwise, we will waste time on inputs that have long runs of various
// character classes. The fuzzer has shown itself to be easily capable of
Expand Down Expand Up @@ -209,6 +211,29 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
dummy += re.NamedCapturingGroups().size();
dummy += re.CapturingGroupNames().size();
dummy += RE2::QuoteMeta(pattern).size();

RE2::Set set(options, anchor);
int index = set.Add(pattern, /*error=*/NULL); // -1 on error
if (index != -1 && set.Compile()) {
std::vector<int> matches;
set.Match(text, &matches);
}

re2::FilteredRE2 filter;
index = -1; // not clobbered on error
filter.Add(pattern, options, &index);
if (index != -1) {
std::vector<std::string> atoms;
filter.Compile(&atoms);
// Pretend that all atoms match, which
// triggers the AND-OR tree maximally.
std::vector<int> matched_atoms;
matched_atoms.reserve(atoms.size());
for (size_t i = 0; i < atoms.size(); ++i)
matched_atoms.push_back(static_cast<int>(i));
std::vector<int> matches;
filter.AllMatches(text, matched_atoms, &matches);
}
}

// Entry point for libFuzzer.
Expand Down Expand Up @@ -242,9 +267,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
options.set_word_boundary(fdp.ConsumeBool());
options.set_one_line(fdp.ConsumeBool());

// ConsumeEnum<RE2::Anchor>() would require RE2::Anchor to specify
// kMaxValue, so just use PickValueInArray<RE2::Anchor>() instead.
RE2::Anchor anchor = fdp.PickValueInArray<RE2::Anchor>({
RE2::UNANCHORED,
RE2::ANCHOR_START,
RE2::ANCHOR_BOTH,
});

std::string pattern = fdp.ConsumeRandomLengthString(999);
std::string text = fdp.ConsumeRandomLengthString(999);

TestOneInput(pattern, options, text);
TestOneInput(pattern, options, anchor, text);
return 0;
}
8 changes: 4 additions & 4 deletions util/fuzz.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);

int main(int argc, char** argv) {
uint8_t data[32];
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 32; j++) {
uint8_t data[4096];
for (int i = 0; i < 4096; i++) {
for (int j = 0; j < 4096; j++) {
data[j] = random() & 0xFF;
}
LLVMFuzzerTestOneInput(data, 32);
LLVMFuzzerTestOneInput(data, 4096);
}
return 0;
}

0 comments on commit 3a8436a

Please sign in to comment.