diff --git a/re2/compile.cc b/re2/compile.cc index 326ab15f0..b3e5bdca0 100644 --- a/re2/compile.cc +++ b/re2/compile.cc @@ -1186,6 +1186,7 @@ Prog* Compiler::Finish() { prog_->Optimize(); prog_->Flatten(); prog_->ComputeByteMap(); + prog_->ComputeFirstByte(); // Record remaining memory for DFA. if (max_mem_ <= 0) { diff --git a/re2/nfa.cc b/re2/nfa.cc index 77fb5fb18..75ca30654 100644 --- a/re2/nfa.cc +++ b/re2/nfa.cc @@ -629,10 +629,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, return false; } -// Computes whether all successful matches have a common first byte, -// and if so, returns that byte. If not, returns -1. -int Prog::ComputeFirstByte() { - int b = -1; +void Prog::ComputeFirstByte() { SparseSet q(size()); q.insert(start()); for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) { @@ -645,23 +642,27 @@ int Prog::ComputeFirstByte() { case kInstMatch: // The empty string matches: no first byte. - return -1; + first_byte_ = -1; + return; case kInstByteRange: if (!ip->last()) q.insert(id+1); - // Must match only a single byte - if (ip->lo() != ip->hi()) - return -1; - if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z') - return -1; + // Must match only a single byte. + if (ip->lo() != ip->hi() || + (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')) { + first_byte_ = -1; + return; + } // If we haven't seen any bytes yet, record it; // otherwise must match the one we saw before. - if (b == -1) - b = ip->lo(); - else if (b != ip->lo()) - return -1; + if (first_byte_ == -1) { + first_byte_ = ip->lo(); + } else if (first_byte_ != ip->lo()) { + first_byte_ = -1; + return; + } break; case kInstNop: @@ -687,7 +688,6 @@ int Prog::ComputeFirstByte() { break; } } - return b; } bool diff --git a/re2/prog.cc b/re2/prog.cc index 9e91fafdf..9244319ec 100644 --- a/re2/prog.cc +++ b/re2/prog.cc @@ -184,14 +184,31 @@ std::string Prog::DumpByteMap() { return map; } -int Prog::first_byte() { - std::call_once(first_byte_once_, [](Prog* prog) { - prog->first_byte_ = prog->ComputeFirstByte(); - }, this); - return first_byte_; -} +// Is ip a guaranteed match at end of text, perhaps after some capturing? +static bool IsMatch(Prog* prog, Prog::Inst* ip) { + for (;;) { + switch (ip->opcode()) { + default: + LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode(); + return false; + + case kInstAlt: + case kInstAltMatch: + case kInstByteRange: + case kInstFail: + case kInstEmptyWidth: + return false; -static bool IsMatch(Prog*, Prog::Inst*); + case kInstCapture: + case kInstNop: + ip = prog->inst(ip->out()); + break; + + case kInstMatch: + return true; + } + } +} // Peep-hole optimizer. void Prog::Optimize() { @@ -257,32 +274,6 @@ void Prog::Optimize() { } } -// Is ip a guaranteed match at end of text, perhaps after some capturing? -static bool IsMatch(Prog* prog, Prog::Inst* ip) { - for (;;) { - switch (ip->opcode()) { - default: - LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode(); - return false; - - case kInstAlt: - case kInstAltMatch: - case kInstByteRange: - case kInstFail: - case kInstEmptyWidth: - return false; - - case kInstCapture: - case kInstNop: - ip = prog->inst(ip->out()); - break; - - case kInstMatch: - return true; - } - } -} - uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) { int flags = 0; diff --git a/re2/prog.h b/re2/prog.h index 4ba72c003..4306672e9 100644 --- a/re2/prog.h +++ b/re2/prog.h @@ -215,9 +215,7 @@ class Prog { void set_anchor_end(bool b) { anchor_end_ = b; } int bytemap_range() { return bytemap_range_; } const uint8_t* bytemap() { return bytemap_; } - - // Lazily computed. - int first_byte(); + int first_byte() { return first_byte_; } // Returns string representation of program for debugging. std::string Dump(); @@ -295,9 +293,8 @@ class Prog { // Compute bytemap. void ComputeByteMap(); - // Computes whether all matches must begin with the same first - // byte, and if so, returns that byte. If not, returns -1. - int ComputeFirstByte(); + // Computes whether all matches must begin with the same first byte. + void ComputeFirstByte(); // Run peep-hole optimizer on program. void Optimize(); @@ -416,7 +413,6 @@ class Prog { uint8_t bytemap_[256]; // map from input bytes to byte classes - std::once_flag first_byte_once_; std::once_flag dfa_first_once_; std::once_flag dfa_longest_once_;