From ee52f030e7fed3fb0cfc5f41c367a898194cf776 Mon Sep 17 00:00:00 2001 From: Paul Wankadia Date: Thu, 16 Nov 2017 20:57:41 +1100 Subject: [PATCH] Allow RE2::Set::Match() to output error information. Change-Id: Id249fbd13ada10432516461bc2be6b7287baf889 Reviewed-on: https://code-review.googlesource.com/19670 Reviewed-by: Paul Wankadia --- re2/set.cc | 20 ++++++++++++++++++-- re2/set.h | 40 ++++++++++++++++++++++++++++------------ re2/testing/set_test.cc | 15 +++++++++++++++ 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/re2/set.cc b/re2/set.cc index d04a4873a..8f736c49e 100644 --- a/re2/set.cc +++ b/re2/set.cc @@ -104,8 +104,15 @@ bool RE2::Set::Compile() { } bool RE2::Set::Match(const StringPiece& text, std::vector* v) const { + return Match(text, v, NULL); +} + +bool RE2::Set::Match(const StringPiece& text, std::vector* v, + ErrorInfo* error_info) const { if (!compiled_) { LOG(DFATAL) << "RE2::Set::Match() called before compiling"; + if (error_info != NULL) + error_info->kind = kNotCompiled; return false; } bool dfa_failed = false; @@ -121,17 +128,26 @@ bool RE2::Set::Match(const StringPiece& text, std::vector* v) const { LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", " << "bytemap range " << prog_->bytemap_range() << ", " << "list count " << prog_->list_count(); + if (error_info != NULL) + error_info->kind = kOutOfMemory; return false; } - if (ret == false) + if (ret == false) { + if (error_info != NULL) + error_info->kind = kNoError; return false; + } if (v != NULL) { if (matches->empty()) { - LOG(DFATAL) << "RE2::Set::Match() matched, but matches unknown"; + LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!"; + if (error_info != NULL) + error_info->kind = kInconsistent; return false; } v->assign(matches->begin(), matches->end()); } + if (error_info != NULL) + error_info->kind = kNoError; return true; } diff --git a/re2/set.h b/re2/set.h index f97d8a573..a8c2caa4a 100644 --- a/re2/set.h +++ b/re2/set.h @@ -22,29 +22,45 @@ namespace re2 { // be searched for simultaneously. class RE2::Set { public: + enum ErrorKind { + kNoError = 0, + kNotCompiled, // The set is not compiled. + kOutOfMemory, // The DFA ran out of memory. + kInconsistent, // The result is inconsistent. This should never happen. + }; + + struct ErrorInfo { + ErrorKind kind; + }; + Set(const RE2::Options& options, RE2::Anchor anchor); ~Set(); - // Add adds regexp pattern to the set, interpreted using the RE2 options. - // (The RE2 constructor's default options parameter is RE2::UTF8.) - // Add returns the regexp index that will be used to identify - // it in the result of Match, or -1 if the regexp cannot be parsed. + // Adds pattern to the set using the options passed to the constructor. + // Returns the index that will identify the regexp in the output of Match(), + // or -1 if the regexp cannot be parsed. // Indices are assigned in sequential order starting from 0. - // Error returns do not increment the index. - // If an error occurs and error != NULL, *error will hold an error message. + // Errors do not increment the index; if error is not NULL, *error will hold + // the error message from the parser. int Add(const StringPiece& pattern, string* error); - // Compile prepares the Set for matching. - // Add must not be called again after Compile. - // Compile must be called before Match. - // Compile may return false if it runs out of memory. + // Compiles the set in preparation for matching. + // Returns false if the compiler runs out of memory. + // Add() must not be called again after Compile(). + // Compile() must be called before Match(). bool Compile(); - // Match returns true if text matches any of the regexps in the set. - // If so, it fills v (if not NULL) with the indices of the matching regexps. + // Returns true if text matches at least one of the regexps in the set. + // Fills v (if not NULL) with the indices of the matching regexps. // Callers must not expect v to be sorted. bool Match(const StringPiece& text, std::vector* v) const; + // As above, but populates error_info (if not NULL) when none of the regexps + // in the set matched. This can inform callers when DFA execution fails, for + // example, because they might wish to handle that case differently. + bool Match(const StringPiece& text, std::vector* v, + ErrorInfo* error_info) const; + private: typedef std::pair Elem; diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc index 54aa106f4..25c4f18fc 100644 --- a/re2/testing/set_test.cc +++ b/re2/testing/set_test.cc @@ -3,6 +3,7 @@ // license that can be found in the LICENSE file. #include +#include #include #include "util/test.h" @@ -200,4 +201,18 @@ TEST(Set, Prefix) { CHECK_EQ(v[0], 0); } +TEST(Set, OutOfMemory) { + RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED); + + string a(10000, 'a'); + CHECK_EQ(s.Add(a, NULL), 0); + CHECK_EQ(s.Compile(), true); + + std::vector v; + RE2::Set::ErrorInfo ei; + CHECK_EQ(s.Match(a, &v, &ei), false); + CHECK_EQ(v.size(), 0); + CHECK_EQ(ei.kind, RE2::Set::kOutOfMemory); +} + } // namespace re2