Skip to content

Commit

Permalink
Make the NFA execution engine use Prog::EmptyFlags().
Browse files Browse the repository at this point in the history
Change-Id: Iaab77132da28aaf1b43e45b1c72d2597397462c3
Reviewed-on: https://code-review.googlesource.com/c/35770
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Nov 23, 2018
1 parent 2206f4f commit e860767
Showing 1 changed file with 16 additions and 50 deletions.
66 changes: 16 additions & 50 deletions re2/nfa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,20 @@ class NFA {

// Follows all empty arrows from id0 and enqueues all the states reached.
// Enqueues only the ByteRange instructions that match byte c.
// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
// context is used (with p) for evaluating empty-width specials.
// p is the current input position, and t0 is the current thread.
void AddToThreadq(Threadq* q, int id0, int c, int flag,
void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
const char* p, Thread* t0);

// Run runq on byte c, appending new states to nextq.
// Updates matched_ and match_ as new, better matches are found.
// context is used (with p) for evaluating empty-width specials.
// p is the position of byte c in the input string for AddToThreadq;
// p-1 will be used when processing Match instructions.
// flag is the bitwise OR of Bol, Eol, etc., specifying whether
// ^, $ and \b match the current input position (after c).
// Frees all the threads on runq.
// If there is a shortcut to the end, returns that shortcut.
inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p);
int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
const char* p);

// Returns text version of capture information, for debugging.
string FormatCapture(const char** capture);
Expand Down Expand Up @@ -204,9 +204,9 @@ void NFA::CopyCapture(const char** dst, const char** src) {

// Follows all empty arrows from id0 and enqueues all the states reached.
// Enqueues only the ByteRange instructions that match byte c.
// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match.
// context is used (with p) for evaluating empty-width specials.
// p is the current input position, and t0 is the current thread.
void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,
void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
const char* p, Thread* t0) {
if (id0 == 0)
return;
Expand Down Expand Up @@ -318,7 +318,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,
stk[nstk++] = AddState(id+1);

// Continue on if we have all the right flag bits.
if (ip->empty() & ~flag)
if (ip->empty() & ~Prog::EmptyFlags(context, p))
break;
a = AddState(ip->out());
goto Loop;
Expand All @@ -328,13 +328,13 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, int flag,

// Run runq on byte c, appending new states to nextq.
// Updates matched_ and match_ as new, better matches are found.
// context is used (with p) for evaluating empty-width specials.
// p is the position of byte c in the input string for AddToThreadq;
// p-1 will be used when processing Match instructions.
// flag is the bitwise OR of Bol, Eol, etc., specifying whether
// ^, $ and \b match the current input position (after c).
// Frees all the threads on runq.
// If there is a shortcut to the end, returns that shortcut.
int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) {
int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
const char* p) {
nextq->clear();

for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
Expand All @@ -360,7 +360,7 @@ int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) {
break;

case kInstByteRange:
AddToThreadq(nextq, ip->out(), c, flag, p, t);
AddToThreadq(nextq, ip->out(), c, context, p, t);
break;

case kInstAltMatch:
Expand Down Expand Up @@ -500,38 +500,9 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
runq->clear();
nextq->clear();
memset(&match_[0], 0, ncapture_*sizeof match_[0]);
int wasword = 0;

if (text.begin() > context.begin())
wasword = Prog::IsWordChar(text.begin()[-1] & 0xFF);

// Loop over the text, stepping the machine.
for (const char* p = text.begin();; p++) {
// Check for empty-width specials.
int flag = 0;

// ^ and \A
if (p == context.begin())
flag |= kEmptyBeginText | kEmptyBeginLine;
else if (p <= context.end() && p[-1] == '\n')
flag |= kEmptyBeginLine;

// $ and \z
if (p == context.end())
flag |= kEmptyEndText | kEmptyEndLine;
else if (p < context.end() && p[0] == '\n')
flag |= kEmptyEndLine;

// \b and \B
int isword = 0;
if (p < context.end())
isword = Prog::IsWordChar(p[0] & 0xFF);

if (isword != wasword)
flag |= kEmptyWordBoundary;
else
flag |= kEmptyNonWordBoundary;

if (ExtraDebug) {
int c = 0;
if (p == context.begin())
Expand All @@ -541,7 +512,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
else if (p < text.end())
c = p[0] & 0xFF;

fprintf(stderr, "%c[%#x/%d/%d]:", c, flag, isword, wasword);
fprintf(stderr, "%c:", c);
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
Thread* t = i->second;
if (t == NULL)
Expand All @@ -552,7 +523,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
}

// This is a no-op the first time around the loop because runq is empty.
int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, flag, p);
int id = Step(runq, nextq, p < text.end() ? p[0] & 0xFF : -1, context, p);
DCHECK_EQ(runq->size(), 0);
using std::swap;
swap(nextq, runq);
Expand Down Expand Up @@ -604,17 +575,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
if (p == NULL) {
p = text.end();
isword = 0;
} else {
isword = Prog::IsWordChar(p[0] & 0xFF);
}
flag = Prog::EmptyFlags(context, p);
}

Thread* t = AllocThread();
CopyCapture(t->capture, match_);
t->capture[0] = p;
AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, flag, p, t);
AddToThreadq(runq, start_, p < text.end() ? p[0] & 0xFF : -1, context, p,
t);
Decref(t);
}

Expand All @@ -624,8 +592,6 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
fprintf(stderr, "dead\n");
break;
}

wasword = isword;
}

for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i)
Expand Down

0 comments on commit e860767

Please sign in to comment.