Skip to content

Commit

Permalink
Replace StdIntMap with std::vector<int>.
Browse files Browse the repository at this point in the history
Change-Id: I877f3a8b363807cf96c66ba978c6c6cc6bd0688e
Reviewed-on: https://code-review.googlesource.com/c/re2/+/60010
Reviewed-by: Justin Lebar <[email protected]>
Reviewed-by: Paul Wankadia <[email protected]>
  • Loading branch information
junyer committed Mar 30, 2022
1 parent 248b65f commit 0c5616d
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 61 deletions.
85 changes: 27 additions & 58 deletions re2/prefilter_tree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -36,9 +35,6 @@ PrefilterTree::PrefilterTree(int min_atom_len)
PrefilterTree::~PrefilterTree() {
for (size_t i = 0; i < prefilter_vec_.size(); i++)
delete prefilter_vec_[i];

for (size_t i = 0; i < entries_.size(); i++)
delete entries_[i].parents;
}

void PrefilterTree::Add(Prefilter* prefilter) {
Expand Down Expand Up @@ -77,25 +73,21 @@ void PrefilterTree::Compile(std::vector<std::string>* atom_vec) {
// not miss out on any regexps triggering by getting rid of a
// prefilter node.
for (size_t i = 0; i < entries_.size(); i++) {
StdIntMap* parents = entries_[i].parents;
if (parents->size() > 8) {
std::vector<int>& parents = entries_[i].parents;
if (parents.size() > 8) {
// This one triggers too many things. If all the parents are AND
// nodes and have other things guarding them, then get rid of
// this trigger. TODO(vsri): Adjust the threshold appropriately,
// make it a function of total number of nodes?
bool have_other_guard = true;
for (StdIntMap::iterator it = parents->begin();
it != parents->end(); ++it) {
for (int parent : parents) {
have_other_guard = have_other_guard &&
(entries_[it->first].propagate_up_at_count > 1);
(entries_[parent].propagate_up_at_count > 1);
}

if (have_other_guard) {
for (StdIntMap::iterator it = parents->begin();
it != parents->end(); ++it)
entries_[it->first].propagate_up_at_count -= 1;

parents->clear(); // Forget the parents
for (int parent : parents)
entries_[parent].propagate_up_at_count -= 1;
parents.clear(); // Forget the parents
}
}
}
Expand Down Expand Up @@ -216,20 +208,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
node->set_unique_id(canonical->unique_id());
}
}
entries_.resize(nodes->size());

// Create parent StdIntMap for the entries.
for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Prefilter* prefilter = v[i];
if (prefilter == NULL)
continue;

if (CanonicalNode(nodes, prefilter) != prefilter)
continue;

Entry* entry = &entries_[prefilter->unique_id()];
entry->parents = new StdIntMap();
}
entries_.resize(unique_id);

// Fill the entries.
for (int i = static_cast<int>(v.size()) - 1; i >= 0; i--) {
Expand All @@ -240,41 +219,34 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes,
if (CanonicalNode(nodes, prefilter) != prefilter)
continue;

Entry* entry = &entries_[prefilter->unique_id()];
int id = prefilter->unique_id();

switch (prefilter->op()) {
default:
case Prefilter::ALL:
LOG(DFATAL) << "Unexpected op: " << prefilter->op();
return;

case Prefilter::ATOM:
entry->propagate_up_at_count = 1;
entries_[id].propagate_up_at_count = 1;
break;

case Prefilter::OR:
case Prefilter::AND: {
std::set<int> uniq_child;
// For each child, we append our id to the child's list of
// parent ids... unless we happen to have done so already.
// The number of appends is the number of unique children,
// which allows correct upward propagation from AND nodes.
int up_count = 0;
for (size_t j = 0; j < prefilter->subs()->size(); j++) {
Prefilter* child = (*prefilter->subs())[j];
Prefilter* canonical = CanonicalNode(nodes, child);
if (canonical == NULL) {
LOG(DFATAL) << "Null canonical node";
return;
}
int child_id = canonical->unique_id();
uniq_child.insert(child_id);
// To the child, we want to add to parent indices.
Entry* child_entry = &entries_[child_id];
if (child_entry->parents->find(prefilter->unique_id()) ==
child_entry->parents->end()) {
(*child_entry->parents)[prefilter->unique_id()] = 1;
int child_id = (*prefilter->subs())[j]->unique_id();
std::vector<int>& parents = entries_[child_id].parents;
if (parents.empty() || parents.back() != id) {
parents.push_back(id);
up_count++;
}
}
entry->propagate_up_at_count = prefilter->op() == Prefilter::AND
? static_cast<int>(uniq_child.size())
: 1;

entries_[id].propagate_up_at_count =
prefilter->op() == Prefilter::AND ? up_count : 1;
break;
}
}
Expand Down Expand Up @@ -335,10 +307,7 @@ void PrefilterTree::PropagateMatch(const std::vector<int>& atom_ids,
regexps->set(entry.regexps[i], 1);
int c;
// Pass trigger up to parents.
for (StdIntMap::iterator it = entry.parents->begin();
it != entry.parents->end();
++it) {
int j = it->first;
for (int j : entry.parents) {
const Entry& parent = entries_[j];
// Delay until all the children have succeeded.
if (parent.propagate_up_at_count > 1) {
Expand Down Expand Up @@ -368,12 +337,12 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) {
LOG(ERROR) << "#Unique Nodes: " << entries_.size();

for (size_t i = 0; i < entries_.size(); i++) {
StdIntMap* parents = entries_[i].parents;
const std::vector<int>& parents = entries_[i].parents;
const std::vector<int>& regexps = entries_[i].regexps;
LOG(ERROR) << "EntryId: " << i
<< " N: " << parents->size() << " R: " << regexps.size();
for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
LOG(ERROR) << it->first;
<< " N: " << parents.size() << " R: " << regexps.size();
for (int parent : parents)
LOG(ERROR) << parent;
}
LOG(ERROR) << "Map:";
for (NodeMap::const_iterator iter = nodes->begin();
Expand Down
4 changes: 1 addition & 3 deletions re2/prefilter_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include <map>
#include <string>
#include <unordered_map>
#include <vector>

#include "util/util.h"
Expand Down Expand Up @@ -60,7 +59,6 @@ class PrefilterTree {

private:
typedef SparseArray<int> IntMap;
typedef std::unordered_map<int, int> StdIntMap;
// TODO(junyer): Use std::unordered_set<Prefilter*> instead?
// It should be trivial to get rid of the stringification...
typedef std::map<std::string, Prefilter*> NodeMap;
Expand All @@ -80,7 +78,7 @@ class PrefilterTree {
// are two different nodes, but they share the atom 'def'. So when
// 'def' matches, it triggers two parents, corresponding to the two
// different OR nodes.
StdIntMap* parents;
std::vector<int> parents;

// When this node is ready to trigger the parent, what are the
// regexps that are triggered.
Expand Down

0 comments on commit 0c5616d

Please sign in to comment.