Skip to content

Commit

Permalink
Compute first_byte_ eagerly.
Browse files Browse the repository at this point in the history
Change-Id: Id4ebc31a21914c0cfdde40037da1bec9ab538c76
Reviewed-on: https://code-review.googlesource.com/c/re2/+/55234
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Apr 23, 2020
1 parent 0fadae0 commit 209319c
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 55 deletions.
1 change: 1 addition & 0 deletions re2/compile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1186,6 +1186,7 @@ Prog* Compiler::Finish() {
prog_->Optimize();
prog_->Flatten();
prog_->ComputeByteMap();
prog_->ComputeFirstByte();

// Record remaining memory for DFA.
if (max_mem_ <= 0) {
Expand Down
30 changes: 15 additions & 15 deletions re2/nfa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
return false;
}

// Computes whether all successful matches have a common first byte,
// and if so, returns that byte. If not, returns -1.
int Prog::ComputeFirstByte() {
int b = -1;
void Prog::ComputeFirstByte() {
SparseSet q(size());
q.insert(start());
for (SparseSet::iterator it = q.begin(); it != q.end(); ++it) {
Expand All @@ -645,23 +642,27 @@ int Prog::ComputeFirstByte() {

case kInstMatch:
// The empty string matches: no first byte.
return -1;
first_byte_ = -1;
return;

case kInstByteRange:
if (!ip->last())
q.insert(id+1);

// Must match only a single byte
if (ip->lo() != ip->hi())
return -1;
if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')
return -1;
// Must match only a single byte.
if (ip->lo() != ip->hi() ||
(ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z')) {
first_byte_ = -1;
return;
}
// If we haven't seen any bytes yet, record it;
// otherwise must match the one we saw before.
if (b == -1)
b = ip->lo();
else if (b != ip->lo())
return -1;
if (first_byte_ == -1) {
first_byte_ = ip->lo();
} else if (first_byte_ != ip->lo()) {
first_byte_ = -1;
return;
}
break;

case kInstNop:
Expand All @@ -687,7 +688,6 @@ int Prog::ComputeFirstByte() {
break;
}
}
return b;
}

bool
Expand Down
57 changes: 24 additions & 33 deletions re2/prog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,31 @@ std::string Prog::DumpByteMap() {
return map;
}

int Prog::first_byte() {
std::call_once(first_byte_once_, [](Prog* prog) {
prog->first_byte_ = prog->ComputeFirstByte();
}, this);
return first_byte_;
}
// Is ip a guaranteed match at end of text, perhaps after some capturing?
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
for (;;) {
switch (ip->opcode()) {
default:
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
return false;

case kInstAlt:
case kInstAltMatch:
case kInstByteRange:
case kInstFail:
case kInstEmptyWidth:
return false;

static bool IsMatch(Prog*, Prog::Inst*);
case kInstCapture:
case kInstNop:
ip = prog->inst(ip->out());
break;

case kInstMatch:
return true;
}
}
}

// Peep-hole optimizer.
void Prog::Optimize() {
Expand Down Expand Up @@ -257,32 +274,6 @@ void Prog::Optimize() {
}
}

// Is ip a guaranteed match at end of text, perhaps after some capturing?
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
for (;;) {
switch (ip->opcode()) {
default:
LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
return false;

case kInstAlt:
case kInstAltMatch:
case kInstByteRange:
case kInstFail:
case kInstEmptyWidth:
return false;

case kInstCapture:
case kInstNop:
ip = prog->inst(ip->out());
break;

case kInstMatch:
return true;
}
}
}

uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
int flags = 0;

Expand Down
10 changes: 3 additions & 7 deletions re2/prog.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,7 @@ class Prog {
void set_anchor_end(bool b) { anchor_end_ = b; }
int bytemap_range() { return bytemap_range_; }
const uint8_t* bytemap() { return bytemap_; }

// Lazily computed.
int first_byte();
int first_byte() { return first_byte_; }

// Returns string representation of program for debugging.
std::string Dump();
Expand Down Expand Up @@ -295,9 +293,8 @@ class Prog {
// Compute bytemap.
void ComputeByteMap();

// Computes whether all matches must begin with the same first
// byte, and if so, returns that byte. If not, returns -1.
int ComputeFirstByte();
// Computes whether all matches must begin with the same first byte.
void ComputeFirstByte();

// Run peep-hole optimizer on program.
void Optimize();
Expand Down Expand Up @@ -416,7 +413,6 @@ class Prog {

uint8_t bytemap_[256]; // map from input bytes to byte classes

std::once_flag first_byte_once_;
std::once_flag dfa_first_once_;
std::once_flag dfa_longest_once_;

Expand Down

0 comments on commit 209319c

Please sign in to comment.