From 0c5616df9c0aaa44c9440d87422012423d91c7d1 Mon Sep 17 00:00:00 2001 From: Paul Wankadia Date: Wed, 30 Mar 2022 11:33:17 -0700 Subject: [PATCH] Replace `StdIntMap` with `std::vector`. Change-Id: I877f3a8b363807cf96c66ba978c6c6cc6bd0688e Reviewed-on: https://code-review.googlesource.com/c/re2/+/60010 Reviewed-by: Justin Lebar Reviewed-by: Paul Wankadia --- re2/prefilter_tree.cc | 85 ++++++++++++++----------------------------- re2/prefilter_tree.h | 4 +- 2 files changed, 28 insertions(+), 61 deletions(-) diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc index b483b2ee8..1e4eaffca 100644 --- a/re2/prefilter_tree.cc +++ b/re2/prefilter_tree.cc @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -36,9 +35,6 @@ PrefilterTree::PrefilterTree(int min_atom_len) PrefilterTree::~PrefilterTree() { for (size_t i = 0; i < prefilter_vec_.size(); i++) delete prefilter_vec_[i]; - - for (size_t i = 0; i < entries_.size(); i++) - delete entries_[i].parents; } void PrefilterTree::Add(Prefilter* prefilter) { @@ -77,25 +73,21 @@ void PrefilterTree::Compile(std::vector* atom_vec) { // not miss out on any regexps triggering by getting rid of a // prefilter node. for (size_t i = 0; i < entries_.size(); i++) { - StdIntMap* parents = entries_[i].parents; - if (parents->size() > 8) { + std::vector& parents = entries_[i].parents; + if (parents.size() > 8) { // This one triggers too many things. If all the parents are AND // nodes and have other things guarding them, then get rid of // this trigger. TODO(vsri): Adjust the threshold appropriately, // make it a function of total number of nodes? bool have_other_guard = true; - for (StdIntMap::iterator it = parents->begin(); - it != parents->end(); ++it) { + for (int parent : parents) { have_other_guard = have_other_guard && - (entries_[it->first].propagate_up_at_count > 1); + (entries_[parent].propagate_up_at_count > 1); } - if (have_other_guard) { - for (StdIntMap::iterator it = parents->begin(); - it != parents->end(); ++it) - entries_[it->first].propagate_up_at_count -= 1; - - parents->clear(); // Forget the parents + for (int parent : parents) + entries_[parent].propagate_up_at_count -= 1; + parents.clear(); // Forget the parents } } } @@ -216,20 +208,7 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, node->set_unique_id(canonical->unique_id()); } } - entries_.resize(nodes->size()); - - // Create parent StdIntMap for the entries. - for (int i = static_cast(v.size()) - 1; i >= 0; i--) { - Prefilter* prefilter = v[i]; - if (prefilter == NULL) - continue; - - if (CanonicalNode(nodes, prefilter) != prefilter) - continue; - - Entry* entry = &entries_[prefilter->unique_id()]; - entry->parents = new StdIntMap(); - } + entries_.resize(unique_id); // Fill the entries. for (int i = static_cast(v.size()) - 1; i >= 0; i--) { @@ -240,41 +219,34 @@ void PrefilterTree::AssignUniqueIds(NodeMap* nodes, if (CanonicalNode(nodes, prefilter) != prefilter) continue; - Entry* entry = &entries_[prefilter->unique_id()]; + int id = prefilter->unique_id(); switch (prefilter->op()) { default: - case Prefilter::ALL: LOG(DFATAL) << "Unexpected op: " << prefilter->op(); return; case Prefilter::ATOM: - entry->propagate_up_at_count = 1; + entries_[id].propagate_up_at_count = 1; break; case Prefilter::OR: case Prefilter::AND: { - std::set uniq_child; + // For each child, we append our id to the child's list of + // parent ids... unless we happen to have done so already. + // The number of appends is the number of unique children, + // which allows correct upward propagation from AND nodes. + int up_count = 0; for (size_t j = 0; j < prefilter->subs()->size(); j++) { - Prefilter* child = (*prefilter->subs())[j]; - Prefilter* canonical = CanonicalNode(nodes, child); - if (canonical == NULL) { - LOG(DFATAL) << "Null canonical node"; - return; - } - int child_id = canonical->unique_id(); - uniq_child.insert(child_id); - // To the child, we want to add to parent indices. - Entry* child_entry = &entries_[child_id]; - if (child_entry->parents->find(prefilter->unique_id()) == - child_entry->parents->end()) { - (*child_entry->parents)[prefilter->unique_id()] = 1; + int child_id = (*prefilter->subs())[j]->unique_id(); + std::vector& parents = entries_[child_id].parents; + if (parents.empty() || parents.back() != id) { + parents.push_back(id); + up_count++; } } - entry->propagate_up_at_count = prefilter->op() == Prefilter::AND - ? static_cast(uniq_child.size()) - : 1; - + entries_[id].propagate_up_at_count = + prefilter->op() == Prefilter::AND ? up_count : 1; break; } } @@ -335,10 +307,7 @@ void PrefilterTree::PropagateMatch(const std::vector& atom_ids, regexps->set(entry.regexps[i], 1); int c; // Pass trigger up to parents. - for (StdIntMap::iterator it = entry.parents->begin(); - it != entry.parents->end(); - ++it) { - int j = it->first; + for (int j : entry.parents) { const Entry& parent = entries_[j]; // Delay until all the children have succeeded. if (parent.propagate_up_at_count > 1) { @@ -368,12 +337,12 @@ void PrefilterTree::PrintDebugInfo(NodeMap* nodes) { LOG(ERROR) << "#Unique Nodes: " << entries_.size(); for (size_t i = 0; i < entries_.size(); i++) { - StdIntMap* parents = entries_[i].parents; + const std::vector& parents = entries_[i].parents; const std::vector& regexps = entries_[i].regexps; LOG(ERROR) << "EntryId: " << i - << " N: " << parents->size() << " R: " << regexps.size(); - for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) - LOG(ERROR) << it->first; + << " N: " << parents.size() << " R: " << regexps.size(); + for (int parent : parents) + LOG(ERROR) << parent; } LOG(ERROR) << "Map:"; for (NodeMap::const_iterator iter = nodes->begin(); diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h index aa8f9131b..6de1c38eb 100644 --- a/re2/prefilter_tree.h +++ b/re2/prefilter_tree.h @@ -18,7 +18,6 @@ #include #include -#include #include #include "util/util.h" @@ -60,7 +59,6 @@ class PrefilterTree { private: typedef SparseArray IntMap; - typedef std::unordered_map StdIntMap; // TODO(junyer): Use std::unordered_set instead? // It should be trivial to get rid of the stringification... typedef std::map NodeMap; @@ -80,7 +78,7 @@ class PrefilterTree { // are two different nodes, but they share the atom 'def'. So when // 'def' matches, it triggers two parents, corresponding to the two // different OR nodes. - StdIntMap* parents; + std::vector parents; // When this node is ready to trigger the parent, what are the // regexps that are triggered.