forked from ydb-platform/ydb
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DPHyp] Impl. added (ydb-platform#3763)
Co-authored-by: Pavel Ivanov <[email protected]>
- Loading branch information
1 parent
62d05d2
commit b2c3e7d
Showing
13 changed files
with
1,357 additions
and
1,199 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#pragma once | ||
|
||
#include <stdlib.h> | ||
|
||
/* | ||
* This header contains helper functions for working with bitsets. | ||
* They are templated by TNodeSet, which is a std::bitset<>. | ||
* We use the the template for efficiency: for 64 bit nodesets we implement a faster next subset functionality | ||
*/ | ||
|
||
namespace NYql::NDq { | ||
|
||
template <typename TNodeSet> | ||
inline bool Overlaps(const TNodeSet& lhs, const TNodeSet& rhs) { | ||
return (lhs & rhs) != 0; | ||
} | ||
|
||
template <typename TNodeSet> | ||
inline bool IsSubset(const TNodeSet& lhs, const TNodeSet& rhs) { | ||
return (lhs & rhs) == lhs; | ||
} | ||
|
||
template <typename TNodeSet> | ||
inline bool HasSingleBit(TNodeSet nodeSet) { | ||
return nodeSet.count() == 1; | ||
} | ||
|
||
template <typename TNodeSet> | ||
inline size_t GetLowestSetBit(TNodeSet nodeSet) { | ||
for (size_t i = 0; i < nodeSet.size(); ++i) { | ||
if (nodeSet[i]) { | ||
return i; | ||
} | ||
} | ||
|
||
Y_ASSERT(false); | ||
return nodeSet.size(); | ||
} | ||
|
||
/* Iterates the indecies of the set bits in the TNodeSet. */ | ||
template <typename TNodeSet> | ||
class TSetBitsIt { | ||
public: | ||
TSetBitsIt(TNodeSet nodeSet) | ||
: NodeSet_(nodeSet) | ||
, Size_(nodeSet.size()) | ||
, BitId_(0) | ||
{ | ||
SkipUnsetBits(); | ||
} | ||
|
||
bool HasNext() { | ||
return BitId_ < Size_; | ||
} | ||
|
||
size_t Next() { | ||
size_t bitId = BitId_++; | ||
SkipUnsetBits(); | ||
|
||
return bitId; | ||
} | ||
|
||
private: | ||
void SkipUnsetBits() { | ||
while (BitId_ < Size_ && !NodeSet_[BitId_]) { | ||
++BitId_; | ||
} | ||
} | ||
|
||
private: | ||
TNodeSet NodeSet_; | ||
size_t Size_; | ||
size_t BitId_; | ||
}; | ||
|
||
} // namespace NYql::NDq |
86 changes: 86 additions & 0 deletions
86
ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#include "dq_opt_conflict_rules_collector.h" | ||
#include <util/generic/hash_set.h> | ||
|
||
namespace NYql::NDq { | ||
|
||
/* To make ASSOC, RASSCOM, LASSCOM tables simplier */ | ||
EJoinKind GetEquivalentJoinByAlgebraicProperties(EJoinKind joinKind) { | ||
switch (joinKind) { | ||
case EJoinKind::Exclusion: | ||
return EJoinKind::InnerJoin; | ||
case EJoinKind::LeftOnly: | ||
return EJoinKind::LeftJoin; | ||
default: | ||
return joinKind; | ||
} | ||
} | ||
|
||
bool OperatorIsCommutative(EJoinKind joinKind) { | ||
joinKind = GetEquivalentJoinByAlgebraicProperties(joinKind); | ||
switch (joinKind) { | ||
case EJoinKind::InnerJoin: | ||
case EJoinKind::OuterJoin: | ||
case EJoinKind::Cross: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
|
||
Y_UNREACHABLE(); | ||
} | ||
|
||
bool OperatorsAreAssociative(EJoinKind lhs, EJoinKind rhs) { | ||
lhs = GetEquivalentJoinByAlgebraicProperties(lhs); | ||
rhs = GetEquivalentJoinByAlgebraicProperties(rhs); | ||
|
||
static THashMap<EJoinKind, THashSet<EJoinKind>> ASSOC_TABLE = { | ||
{EJoinKind::Cross, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}}, | ||
{EJoinKind::InnerJoin, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}}, | ||
{EJoinKind::LeftJoin, {EJoinKind::LeftJoin}}, | ||
{EJoinKind::OuterJoin, {EJoinKind::LeftJoin, EJoinKind::OuterJoin}} | ||
}; | ||
|
||
if (!(ASSOC_TABLE.contains(lhs))) { | ||
return false; | ||
} | ||
|
||
return ASSOC_TABLE[lhs].contains(rhs); | ||
} | ||
|
||
bool OperatorsAreLeftAsscom(EJoinKind lhs, EJoinKind rhs) { | ||
lhs = GetEquivalentJoinByAlgebraicProperties(lhs); | ||
rhs = GetEquivalentJoinByAlgebraicProperties(rhs); | ||
|
||
static THashMap<EJoinKind, THashSet<EJoinKind>> LASSCOM_TABLE = { | ||
{EJoinKind::Cross, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}}, | ||
{EJoinKind::InnerJoin, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}}, | ||
{EJoinKind::LeftSemi, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}}, | ||
{EJoinKind::LeftJoin, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin, EJoinKind::OuterJoin}}, | ||
{EJoinKind::OuterJoin, {EJoinKind::LeftJoin, EJoinKind::OuterJoin}} | ||
}; | ||
|
||
if (!(LASSCOM_TABLE.contains(lhs))) { | ||
return false; | ||
} | ||
|
||
return LASSCOM_TABLE[lhs].contains(rhs); | ||
} | ||
|
||
bool OperatorsAreRightAsscom(EJoinKind lhs, EJoinKind rhs) { | ||
lhs = GetEquivalentJoinByAlgebraicProperties(lhs); | ||
rhs = GetEquivalentJoinByAlgebraicProperties(rhs); | ||
|
||
static THashMap<EJoinKind, THashSet<EJoinKind>> RASSCOM_TABLE = { | ||
{EJoinKind::Cross, {EJoinKind::Cross, EJoinKind::InnerJoin}}, | ||
{EJoinKind::InnerJoin, {EJoinKind::Cross, EJoinKind::InnerJoin}}, | ||
{EJoinKind::OuterJoin, {EJoinKind::OuterJoin}} | ||
}; | ||
|
||
if (!(RASSCOM_TABLE.contains(lhs))) { | ||
return false; | ||
} | ||
|
||
return RASSCOM_TABLE[lhs].contains(rhs); | ||
} | ||
|
||
} // namespace NYql::NDq |
148 changes: 148 additions & 0 deletions
148
ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
#pragma once | ||
|
||
#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h> | ||
|
||
/* | ||
* This header contains an algorithm for resolving join conflicts with TConflictRulesCollector class | ||
* and ConvertConflictRulesIntoTES function, which are used to construct the hypergraph. | ||
*/ | ||
|
||
namespace NYql::NDq { | ||
|
||
bool OperatorIsCommutative(EJoinKind); | ||
|
||
bool OperatorsAreAssociative(EJoinKind, EJoinKind); | ||
|
||
/* (e1 o12 e3) o13 e3 == (e1 o13 e3) o12 e2 */ | ||
bool OperatorsAreLeftAsscom(EJoinKind, EJoinKind); | ||
|
||
/* e1 o13 (e2 o23 e3) == e2 o23 (e1 o13 e3) */ | ||
bool OperatorsAreRightAsscom(EJoinKind, EJoinKind); | ||
|
||
template <typename TNodeSet> | ||
struct TConflictRule { | ||
TConflictRule(const TNodeSet& ruleActivationNodes, const TNodeSet& requiredNodes) | ||
: RuleActivationNodes(ruleActivationNodes) | ||
, RequiredNodes(requiredNodes) | ||
{} | ||
|
||
TNodeSet RuleActivationNodes; | ||
TNodeSet RequiredNodes; | ||
}; | ||
|
||
/* | ||
* This class finds and collect conflicts between root of subtree and its nodes. | ||
* It traverses both sides of root and checks algebraic join properties (ASSOC, LASSCOM, RASSCOM). | ||
* The name of algorithm is "CD-C", and details are described in white papper - | ||
* - "On the Correct and Complete Enumeration of the Core Search Space" in section "5.4 Approach CD-C". | ||
*/ | ||
template<typename TNodeSet> | ||
class TConflictRulesCollector { | ||
public: | ||
TConflictRulesCollector( | ||
std::shared_ptr<TJoinOptimizerNode> root, | ||
std::unordered_map<std::shared_ptr<IBaseOptimizerNode>, TNodeSet>& subtreeNodes | ||
) | ||
: Root_(root) | ||
, ConflictRules_({}) | ||
, SubtreeNodes_(subtreeNodes) | ||
{} | ||
|
||
TVector<TConflictRule<TNodeSet>> CollectConflicts() { | ||
VisitJoinTree(Root_->LeftArg, GetLeftConflictsVisitor()); | ||
VisitJoinTree(Root_->RightArg, GetRightConflictsVisitor()); | ||
return std::move(ConflictRules_); | ||
} | ||
|
||
private: | ||
auto GetLeftConflictsVisitor() { | ||
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) { | ||
if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { | ||
ConflictRules_.emplace_back( | ||
SubtreeNodes_[child->RightArg], | ||
SubtreeNodes_[child->LeftArg] | ||
); | ||
} | ||
|
||
if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { | ||
ConflictRules_.emplace_back( | ||
SubtreeNodes_[child->LeftArg], | ||
SubtreeNodes_[child->RightArg] | ||
); | ||
} | ||
}; | ||
|
||
return visitor; | ||
} | ||
|
||
auto GetRightConflictsVisitor() { | ||
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) { | ||
if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { | ||
ConflictRules_.emplace_back( | ||
SubtreeNodes_[child->LeftArg], | ||
SubtreeNodes_[child->RightArg] | ||
); | ||
} | ||
|
||
if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { | ||
ConflictRules_.emplace_back( | ||
SubtreeNodes_[child->RightArg], | ||
SubtreeNodes_[child->LeftArg] | ||
); | ||
} | ||
}; | ||
|
||
return visitor; | ||
} | ||
|
||
private: | ||
template <typename TFunction> | ||
void VisitJoinTree(const std::shared_ptr<IBaseOptimizerNode>& child, TFunction visitor) { | ||
if (child->Kind == EOptimizerNodeKind::RelNodeType) { | ||
return; | ||
} | ||
|
||
auto childJoinNode = std::static_pointer_cast<TJoinOptimizerNode>(child); | ||
VisitJoinTree(childJoinNode->LeftArg, visitor); | ||
VisitJoinTree(childJoinNode->RightArg, visitor); | ||
|
||
visitor(childJoinNode); | ||
} | ||
|
||
private: | ||
std::shared_ptr<TJoinOptimizerNode> Root_; | ||
TVector<TConflictRule<TNodeSet>> ConflictRules_; | ||
std::unordered_map<std::shared_ptr<IBaseOptimizerNode>, TNodeSet>& SubtreeNodes_; | ||
}; | ||
|
||
/* | ||
* This function converts conflict rules into TES. | ||
* TES (Total Eligibility Set) captures reordering constraints and represents | ||
* set of table, that must present, before join expresion can be evaluated. | ||
* It is initialized with SES (Syntatic Eligibility Set) - condition used tables. | ||
*/ | ||
template <typename TNodeSet> | ||
TNodeSet ConvertConflictRulesIntoTES(const TNodeSet& SES, TVector<TConflictRule<TNodeSet>>& conflictRules) { | ||
auto TES = SES; | ||
|
||
while (true) { | ||
auto prevTES = TES; | ||
|
||
for (const auto& conflictRule: conflictRules) { | ||
if (Overlaps(conflictRule.RuleActivationNodes, TES)) { | ||
TES |= conflictRule.RequiredNodes; | ||
} | ||
} | ||
|
||
EraseIf( | ||
conflictRules, | ||
[&](const TConflictRule<TNodeSet>& conflictRule){ return IsSubset(conflictRule.RequiredNodes, TES); } | ||
); | ||
|
||
if (TES == prevTES || conflictRules.empty()) { | ||
return TES; | ||
} | ||
} | ||
} | ||
|
||
} // namespace NYql::NDq |
Oops, something went wrong.