Skip to content

Commit

Permalink
Allow RE2::Set::Match() to output error information.
Browse files Browse the repository at this point in the history
Change-Id: Id249fbd13ada10432516461bc2be6b7287baf889
Reviewed-on: https://code-review.googlesource.com/19670
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Nov 16, 2017
1 parent 7fc014d commit ee52f03
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 14 deletions.
20 changes: 18 additions & 2 deletions re2/set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,15 @@ bool RE2::Set::Compile() {
}

bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
return Match(text, v, NULL);
}

bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
ErrorInfo* error_info) const {
if (!compiled_) {
LOG(DFATAL) << "RE2::Set::Match() called before compiling";
if (error_info != NULL)
error_info->kind = kNotCompiled;
return false;
}
bool dfa_failed = false;
Expand All @@ -121,17 +128,26 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
<< "bytemap range " << prog_->bytemap_range() << ", "
<< "list count " << prog_->list_count();
if (error_info != NULL)
error_info->kind = kOutOfMemory;
return false;
}
if (ret == false)
if (ret == false) {
if (error_info != NULL)
error_info->kind = kNoError;
return false;
}
if (v != NULL) {
if (matches->empty()) {
LOG(DFATAL) << "RE2::Set::Match() matched, but matches unknown";
LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
if (error_info != NULL)
error_info->kind = kInconsistent;
return false;
}
v->assign(matches->begin(), matches->end());
}
if (error_info != NULL)
error_info->kind = kNoError;
return true;
}

Expand Down
40 changes: 28 additions & 12 deletions re2/set.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,29 +22,45 @@ namespace re2 {
// be searched for simultaneously.
class RE2::Set {
public:
enum ErrorKind {
kNoError = 0,
kNotCompiled, // The set is not compiled.
kOutOfMemory, // The DFA ran out of memory.
kInconsistent, // The result is inconsistent. This should never happen.
};

struct ErrorInfo {
ErrorKind kind;
};

Set(const RE2::Options& options, RE2::Anchor anchor);
~Set();

// Add adds regexp pattern to the set, interpreted using the RE2 options.
// (The RE2 constructor's default options parameter is RE2::UTF8.)
// Add returns the regexp index that will be used to identify
// it in the result of Match, or -1 if the regexp cannot be parsed.
// Adds pattern to the set using the options passed to the constructor.
// Returns the index that will identify the regexp in the output of Match(),
// or -1 if the regexp cannot be parsed.
// Indices are assigned in sequential order starting from 0.
// Error returns do not increment the index.
// If an error occurs and error != NULL, *error will hold an error message.
// Errors do not increment the index; if error is not NULL, *error will hold
// the error message from the parser.
int Add(const StringPiece& pattern, string* error);

// Compile prepares the Set for matching.
// Add must not be called again after Compile.
// Compile must be called before Match.
// Compile may return false if it runs out of memory.
// Compiles the set in preparation for matching.
// Returns false if the compiler runs out of memory.
// Add() must not be called again after Compile().
// Compile() must be called before Match().
bool Compile();

// Match returns true if text matches any of the regexps in the set.
// If so, it fills v (if not NULL) with the indices of the matching regexps.
// Returns true if text matches at least one of the regexps in the set.
// Fills v (if not NULL) with the indices of the matching regexps.
// Callers must not expect v to be sorted.
bool Match(const StringPiece& text, std::vector<int>* v) const;

// As above, but populates error_info (if not NULL) when none of the regexps
// in the set matched. This can inform callers when DFA execution fails, for
// example, because they might wish to handle that case differently.
bool Match(const StringPiece& text, std::vector<int>* v,
ErrorInfo* error_info) const;

private:
typedef std::pair<string, re2::Regexp*> Elem;

Expand Down
15 changes: 15 additions & 0 deletions re2/testing/set_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// license that can be found in the LICENSE file.

#include <stddef.h>
#include <string>
#include <vector>

#include "util/test.h"
Expand Down Expand Up @@ -200,4 +201,18 @@ TEST(Set, Prefix) {
CHECK_EQ(v[0], 0);
}

TEST(Set, OutOfMemory) {
RE2::Set s(RE2::DefaultOptions, RE2::UNANCHORED);

string a(10000, 'a');
CHECK_EQ(s.Add(a, NULL), 0);
CHECK_EQ(s.Compile(), true);

std::vector<int> v;
RE2::Set::ErrorInfo ei;
CHECK_EQ(s.Match(a, &v, &ei), false);
CHECK_EQ(v.size(), 0);
CHECK_EQ(ei.kind, RE2::Set::kOutOfMemory);
}

} // namespace re2

0 comments on commit ee52f03

Please sign in to comment.