diff --git a/re2/dfa.cc b/re2/dfa.cc index f292ff10b..583303ee6 100644 --- a/re2/dfa.cc +++ b/re2/dfa.cc @@ -56,6 +56,10 @@ namespace re2 { // Controls whether the DFA should bail out early if the NFA would be faster. static bool dfa_should_bail_when_slow = true; +void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) { + dfa_should_bail_when_slow = b; +} + // Changing this to true compiles in prints that trace execution of the DFA. // Generates a lot of output -- only useful for debugging. static const bool ExtraDebug = false; @@ -1966,10 +1970,6 @@ int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) { return GetDFA(kind)->BuildAllStates(cb); } -void Prog::TEST_dfa_should_bail_when_slow(bool b) { - dfa_should_bail_when_slow = b; -} - // Computes min and max for matching string. // Won't return strings bigger than maxlen. bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) { diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc index 8306f887f..af1129f23 100644 --- a/re2/fuzzing/re2_fuzzer.cc +++ b/re2/fuzzing/re2_fuzzer.cc @@ -12,6 +12,7 @@ #include "re2/prefilter.h" #include "re2/re2.h" +#include "re2/regexp.h" using re2::StringPiece; @@ -50,6 +51,10 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options, if (backslash_p > 1) return; + // The default is 1000. Even 100 turned out to be too generous + // for fuzzing, empirically speaking, so let's try 10 instead. + re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10); + RE2 re(pattern, options); if (!re.ok()) return; diff --git a/re2/parse.cc b/re2/parse.cc index 3bba6137f..87ff2ca95 100644 --- a/re2/parse.cc +++ b/re2/parse.cc @@ -44,12 +44,12 @@ namespace re2 { -// Reduce the maximum repeat count by an order of magnitude when fuzzing. -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -static const int kMaxRepeat = 100; -#else -static const int kMaxRepeat = 1000; -#endif +// Controls the maximum repeat count permitted by the parser. +static int maximum_repeat_count = 1000; + +void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) { + maximum_repeat_count = i; +} // Regular expression parse state. // The list of parsed regexps so far is maintained as a vector of @@ -568,7 +568,9 @@ int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) { bool Regexp::ParseState::PushRepetition(int min, int max, const StringPiece& s, bool nongreedy) { - if ((max != -1 && max < min) || min > kMaxRepeat || max > kMaxRepeat) { + if ((max != -1 && max < min) || + min > maximum_repeat_count || + max > maximum_repeat_count) { status_->set_code(kRegexpRepeatSize); status_->set_error_arg(s); return false; @@ -591,7 +593,7 @@ bool Regexp::ParseState::PushRepetition(int min, int max, stacktop_ = re; if (min >= 2 || max >= 2) { RepetitionWalker w; - if (w.Walk(stacktop_, kMaxRepeat) == 0) { + if (w.Walk(stacktop_, maximum_repeat_count) == 0) { status_->set_code(kRegexpRepeatSize); status_->set_error_arg(s); return false; diff --git a/re2/prog.h b/re2/prog.h index f12564131..8ca98807a 100644 --- a/re2/prog.h +++ b/re2/prog.h @@ -310,10 +310,6 @@ class Prog { // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY. int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb); - // Controls whether the DFA should bail out early if the NFA would be faster. - // FOR TESTING ONLY. - static void TEST_dfa_should_bail_when_slow(bool b); - // Compute bytemap. void ComputeByteMap(); @@ -402,6 +398,10 @@ class Prog { // Computes hints for ByteRange instructions in [begin, end). void ComputeHints(std::vector* flat, int begin, int end); + // Controls whether the DFA should bail out early if the NFA would be faster. + // FOR TESTING ONLY. + static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b); + private: friend class Compiler; diff --git a/re2/regexp.h b/re2/regexp.h index 61882b579..2f406423b 100644 --- a/re2/regexp.h +++ b/re2/regexp.h @@ -449,6 +449,10 @@ class Regexp { // regardless of the return value. bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase); + // Controls the maximum repeat count permitted by the parser. + // FOR FUZZING ONLY. + static void FUZZING_ONLY_set_maximum_repeat_count(int i); + private: // Constructor allocates vectors as appropriate for operator. explicit Regexp(RegexpOp op, ParseFlags parse_flags); diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc index 9e15a41ed..842daafff 100644 --- a/re2/testing/dfa_test.cc +++ b/re2/testing/dfa_test.cc @@ -143,7 +143,7 @@ TEST(SingleThreaded, SearchDFA) { // NFA implementation instead. (The DFA loses its speed advantage // if it can't get a good cache hit rate.) // Tell the DFA to trudge along instead. - Prog::TEST_dfa_should_bail_when_slow(false); + Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(false); state_cache_resets = 0; search_failures = 0; @@ -194,7 +194,7 @@ TEST(SingleThreaded, SearchDFA) { re->Decref(); // Reset to original behaviour. - Prog::TEST_dfa_should_bail_when_slow(true); + Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(true); ASSERT_GT(state_cache_resets, 0); ASSERT_EQ(search_failures, 0); } @@ -218,7 +218,7 @@ static void DoSearch(Prog* prog, const StringPiece& match, } TEST(Multithreaded, SearchDFA) { - Prog::TEST_dfa_should_bail_when_slow(false); + Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(false); state_cache_resets = 0; search_failures = 0; @@ -259,7 +259,7 @@ TEST(Multithreaded, SearchDFA) { re->Decref(); // Reset to original behaviour. - Prog::TEST_dfa_should_bail_when_slow(true); + Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(true); ASSERT_GT(state_cache_resets, 0); ASSERT_EQ(search_failures, 0); }