diff --git a/re2/nfa.cc b/re2/nfa.cc index 1a3e0afe2..b2d5c0a63 100644 --- a/re2/nfa.cc +++ b/re2/nfa.cc @@ -95,20 +95,20 @@ class NFA { // Follows all empty arrows from id0 and enqueues all the states reached. // Enqueues only the ByteRange instructions that match byte c. - // The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. + // context is used (with p) for evaluating empty-width specials. // p is the current input position, and t0 is the current thread. - void AddToThreadq(Threadq* q, int id0, int c, int flag, + void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, const char* p, Thread* t0); // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. + // context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. - // flag is the bitwise OR of Bol, Eol, etc., specifying whether - // ^, $ and \b match the current input position (after c). // Frees all the threads on runq. // If there is a shortcut to the end, returns that shortcut. - inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p); + int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, + const char* p); // Returns text version of capture information, for debugging. string FormatCapture(const char** capture); @@ -204,9 +204,9 @@ void NFA::CopyCapture(const char** dst, const char** src) { // Follows all empty arrows from id0 and enqueues all the states reached. // Enqueues only the ByteRange instructions that match byte c. -// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. +// context is used (with p) for evaluating empty-width specials. // p is the current input position, and t0 is the current thread. -void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, +void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context, const char* p, Thread* t0) { if (id0 == 0) return; @@ -318,7 +318,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, stk[nstk++] = AddState(id+1); // Continue on if we have all the right flag bits. - if (ip->empty() & ~flag) + if (ip->empty() & ~Prog::EmptyFlags(context, p)) break; a = AddState(ip->out()); goto Loop; @@ -328,13 +328,13 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag, // Run runq on byte c, appending new states to nextq. // Updates matched_ and match_ as new, better matches are found. +// context is used (with p) for evaluating empty-width specials. // p is the position of byte c in the input string for AddToThreadq; // p-1 will be used when processing Match instructions. -// flag is the bitwise OR of Bol, Eol, etc., specifying whether -// ^, $ and \b match the current input position (after c). // Frees all the threads on runq. // If there is a shortcut to the end, returns that shortcut. -int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { +int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context, + const char* p) { nextq->clear(); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { @@ -360,7 +360,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { break; case kInstByteRange: - AddToThreadq(nextq, ip->out(), c, flag, p, t); + AddToThreadq(nextq, ip->out(), c, context, p, t); break; case kInstAltMatch: @@ -500,38 +500,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, runq->clear(); nextq->clear(); memset(&match_[0], 0, ncapture_*sizeof match_[0]); - int wasword = 0; - - if (text.begin() > context.begin()) - wasword = Prog::IsWordChar(text.begin()[-1] & 0xFF); // Loop over the text, stepping the machine. for (const char* p = text.begin();; p++) { - // Check for empty-width specials. - int flag = 0; - - // ^ and \A - if (p == context.begin()) - flag |= kEmptyBeginText | kEmptyBeginLine; - else if (p <= context.end() && p[-1] == '\n') - flag |= kEmptyBeginLine; - - // $ and \z - if (p == context.end()) - flag |= kEmptyEndText | kEmptyEndLine; - else if (p < context.end() && p[0] == '\n') - flag |= kEmptyEndLine; - - // \b and \B - int isword = 0; - if (p < context.end()) - isword = Prog::IsWordChar(p[0] & 0xFF); - - if (isword != wasword) - flag |= kEmptyWordBoundary; - else - flag |= kEmptyNonWordBoundary; - if (ExtraDebug) { int c = 0; if (p == context.begin()) @@ -541,7 +512,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, else if (p < text.end()) c = p[0] & 0xFF; - fprintf(stderr, "%c[%#x/%d/%d]:", c, flag, isword, wasword); + fprintf(stderr, "%c:", c); for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { Thread* t = i->second; if (t == NULL) @@ -552,7 +523,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, } // This is a no-op the first time around the loop because runq is empty. - int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p); + int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p); DCHECK_EQ(runq->size(), 0); using std::swap; swap(nextq, runq); @@ -604,17 +575,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, p = reinterpret_cast(memchr(p, fb, text.end() - p)); if (p == NULL) { p = text.end(); - isword = 0; - } else { - isword = Prog::IsWordChar(p[0] & 0xFF); } - flag = Prog::EmptyFlags(context, p); } Thread* t = AllocThread(); CopyCapture(t->capture, match_); t->capture[0] = p; - AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, flag, p, t); + AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p, + t); Decref(t); } @@ -624,8 +592,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context, fprintf(stderr, "dead\n"); break; } - - wasword = isword; } for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)