Skip to content

Commit

Permalink
[DPHyp] Impl. added (ydb-platform#3763)
Browse files Browse the repository at this point in the history
Co-authored-by: Pavel Ivanov <[email protected]>
  • Loading branch information
pashandor789 and Pavel Ivanov authored Apr 17, 2024
1 parent 62d05d2 commit b2c3e7d
Show file tree
Hide file tree
Showing 13 changed files with 1,357 additions and 1,199 deletions.
3 changes: 3 additions & 0 deletions .github/config/muted_ya.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ ydb/core/viewer/ut Viewer.TabletMerging
ydb/core/viewer/ut Viewer.TabletMergingPacked
ydb/library/actors/http/ut HttpProxy.TooLongHeader
ydb/library/actors/http/ut sole*
ydb/library/yql/dq/opt/ut DQCBO.JoinSearch3Rels
ydb/library/yql/providers/generic/connector/tests* *
ydb/library/yql/providers/yt/provider/ut TYqlCBO.NonReordable
ydb/library/yql/tests/sql/dq_file/part17 *dq-join_cbo_native_3_tables--*
ydb/public/lib/ydb_cli/topic/ut TTopicReaderTests.TestRun_ReadOneMessage
ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/with_offset_ranges_mode_ut RetryPolicy.RetryWithBatching
ydb/public/sdk/cpp/client/ydb_persqueue_core/ut/with_offset_ranges_mode_ut RetryPolicy.TWriteSession_TestBrokenPolicy
Expand Down
2 changes: 1 addition & 1 deletion ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ TJoinOptimizerNode::TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>
JoinConditions(joinConditions),
JoinType(joinType),
JoinAlgo(joinAlgo) {
IsReorderable = (JoinType==EJoinKind::InnerJoin) && (nonReorderable==false);
IsReorderable = !nonReorderable;
for (auto [l,r] : joinConditions ) {
LeftJoinKeys.push_back(l.AttributeName);
RightJoinKeys.push_back(r.AttributeName);
Expand Down
76 changes: 76 additions & 0 deletions ydb/library/yql/dq/opt/bitset.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#pragma once

#include <stdlib.h>

/*
* This header contains helper functions for working with bitsets.
* They are templated by TNodeSet, which is a std::bitset<>.
* We use the the template for efficiency: for 64 bit nodesets we implement a faster next subset functionality
*/

namespace NYql::NDq {

template <typename TNodeSet>
inline bool Overlaps(const TNodeSet& lhs, const TNodeSet& rhs) {
return (lhs & rhs) != 0;
}

template <typename TNodeSet>
inline bool IsSubset(const TNodeSet& lhs, const TNodeSet& rhs) {
return (lhs & rhs) == lhs;
}

template <typename TNodeSet>
inline bool HasSingleBit(TNodeSet nodeSet) {
return nodeSet.count() == 1;
}

template <typename TNodeSet>
inline size_t GetLowestSetBit(TNodeSet nodeSet) {
for (size_t i = 0; i < nodeSet.size(); ++i) {
if (nodeSet[i]) {
return i;
}
}

Y_ASSERT(false);
return nodeSet.size();
}

/* Iterates the indecies of the set bits in the TNodeSet. */
template <typename TNodeSet>
class TSetBitsIt {
public:
TSetBitsIt(TNodeSet nodeSet)
: NodeSet_(nodeSet)
, Size_(nodeSet.size())
, BitId_(0)
{
SkipUnsetBits();
}

bool HasNext() {
return BitId_ < Size_;
}

size_t Next() {
size_t bitId = BitId_++;
SkipUnsetBits();

return bitId;
}

private:
void SkipUnsetBits() {
while (BitId_ < Size_ && !NodeSet_[BitId_]) {
++BitId_;
}
}

private:
TNodeSet NodeSet_;
size_t Size_;
size_t BitId_;
};

} // namespace NYql::NDq
86 changes: 86 additions & 0 deletions ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#include "dq_opt_conflict_rules_collector.h"
#include <util/generic/hash_set.h>

namespace NYql::NDq {

/* To make ASSOC, RASSCOM, LASSCOM tables simplier */
EJoinKind GetEquivalentJoinByAlgebraicProperties(EJoinKind joinKind) {
switch (joinKind) {
case EJoinKind::Exclusion:
return EJoinKind::InnerJoin;
case EJoinKind::LeftOnly:
return EJoinKind::LeftJoin;
default:
return joinKind;
}
}

bool OperatorIsCommutative(EJoinKind joinKind) {
joinKind = GetEquivalentJoinByAlgebraicProperties(joinKind);
switch (joinKind) {
case EJoinKind::InnerJoin:
case EJoinKind::OuterJoin:
case EJoinKind::Cross:
return true;
default:
return false;
}

Y_UNREACHABLE();
}

bool OperatorsAreAssociative(EJoinKind lhs, EJoinKind rhs) {
lhs = GetEquivalentJoinByAlgebraicProperties(lhs);
rhs = GetEquivalentJoinByAlgebraicProperties(rhs);

static THashMap<EJoinKind, THashSet<EJoinKind>> ASSOC_TABLE = {
{EJoinKind::Cross, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}},
{EJoinKind::InnerJoin, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}},
{EJoinKind::LeftJoin, {EJoinKind::LeftJoin}},
{EJoinKind::OuterJoin, {EJoinKind::LeftJoin, EJoinKind::OuterJoin}}
};

if (!(ASSOC_TABLE.contains(lhs))) {
return false;
}

return ASSOC_TABLE[lhs].contains(rhs);
}

bool OperatorsAreLeftAsscom(EJoinKind lhs, EJoinKind rhs) {
lhs = GetEquivalentJoinByAlgebraicProperties(lhs);
rhs = GetEquivalentJoinByAlgebraicProperties(rhs);

static THashMap<EJoinKind, THashSet<EJoinKind>> LASSCOM_TABLE = {
{EJoinKind::Cross, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}},
{EJoinKind::InnerJoin, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}},
{EJoinKind::LeftSemi, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin}},
{EJoinKind::LeftJoin, {EJoinKind::Cross, EJoinKind::InnerJoin, EJoinKind::LeftSemi, EJoinKind::LeftJoin, EJoinKind::OuterJoin}},
{EJoinKind::OuterJoin, {EJoinKind::LeftJoin, EJoinKind::OuterJoin}}
};

if (!(LASSCOM_TABLE.contains(lhs))) {
return false;
}

return LASSCOM_TABLE[lhs].contains(rhs);
}

bool OperatorsAreRightAsscom(EJoinKind lhs, EJoinKind rhs) {
lhs = GetEquivalentJoinByAlgebraicProperties(lhs);
rhs = GetEquivalentJoinByAlgebraicProperties(rhs);

static THashMap<EJoinKind, THashSet<EJoinKind>> RASSCOM_TABLE = {
{EJoinKind::Cross, {EJoinKind::Cross, EJoinKind::InnerJoin}},
{EJoinKind::InnerJoin, {EJoinKind::Cross, EJoinKind::InnerJoin}},
{EJoinKind::OuterJoin, {EJoinKind::OuterJoin}}
};

if (!(RASSCOM_TABLE.contains(lhs))) {
return false;
}

return RASSCOM_TABLE[lhs].contains(rhs);
}

} // namespace NYql::NDq
148 changes: 148 additions & 0 deletions ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#pragma once

#include <ydb/library/yql/core/cbo/cbo_optimizer_new.h>

/*
* This header contains an algorithm for resolving join conflicts with TConflictRulesCollector class
* and ConvertConflictRulesIntoTES function, which are used to construct the hypergraph.
*/

namespace NYql::NDq {

bool OperatorIsCommutative(EJoinKind);

bool OperatorsAreAssociative(EJoinKind, EJoinKind);

/* (e1 o12 e3) o13 e3 == (e1 o13 e3) o12 e2 */
bool OperatorsAreLeftAsscom(EJoinKind, EJoinKind);

/* e1 o13 (e2 o23 e3) == e2 o23 (e1 o13 e3) */
bool OperatorsAreRightAsscom(EJoinKind, EJoinKind);

template <typename TNodeSet>
struct TConflictRule {
TConflictRule(const TNodeSet& ruleActivationNodes, const TNodeSet& requiredNodes)
: RuleActivationNodes(ruleActivationNodes)
, RequiredNodes(requiredNodes)
{}

TNodeSet RuleActivationNodes;
TNodeSet RequiredNodes;
};

/*
* This class finds and collect conflicts between root of subtree and its nodes.
* It traverses both sides of root and checks algebraic join properties (ASSOC, LASSCOM, RASSCOM).
* The name of algorithm is "CD-C", and details are described in white papper -
* - "On the Correct and Complete Enumeration of the Core Search Space" in section "5.4 Approach CD-C".
*/
template<typename TNodeSet>
class TConflictRulesCollector {
public:
TConflictRulesCollector(
std::shared_ptr<TJoinOptimizerNode> root,
std::unordered_map<std::shared_ptr<IBaseOptimizerNode>, TNodeSet>& subtreeNodes
)
: Root_(root)
, ConflictRules_({})
, SubtreeNodes_(subtreeNodes)
{}

TVector<TConflictRule<TNodeSet>> CollectConflicts() {
VisitJoinTree(Root_->LeftArg, GetLeftConflictsVisitor());
VisitJoinTree(Root_->RightArg, GetRightConflictsVisitor());
return std::move(ConflictRules_);
}

private:
auto GetLeftConflictsVisitor() {
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) {
if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->RightArg],
SubtreeNodes_[child->LeftArg]
);
}

if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->LeftArg],
SubtreeNodes_[child->RightArg]
);
}
};

return visitor;
}

auto GetRightConflictsVisitor() {
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) {
if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->LeftArg],
SubtreeNodes_[child->RightArg]
);
}

if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->RightArg],
SubtreeNodes_[child->LeftArg]
);
}
};

return visitor;
}

private:
template <typename TFunction>
void VisitJoinTree(const std::shared_ptr<IBaseOptimizerNode>& child, TFunction visitor) {
if (child->Kind == EOptimizerNodeKind::RelNodeType) {
return;
}

auto childJoinNode = std::static_pointer_cast<TJoinOptimizerNode>(child);
VisitJoinTree(childJoinNode->LeftArg, visitor);
VisitJoinTree(childJoinNode->RightArg, visitor);

visitor(childJoinNode);
}

private:
std::shared_ptr<TJoinOptimizerNode> Root_;
TVector<TConflictRule<TNodeSet>> ConflictRules_;
std::unordered_map<std::shared_ptr<IBaseOptimizerNode>, TNodeSet>& SubtreeNodes_;
};

/*
* This function converts conflict rules into TES.
* TES (Total Eligibility Set) captures reordering constraints and represents
* set of table, that must present, before join expresion can be evaluated.
* It is initialized with SES (Syntatic Eligibility Set) - condition used tables.
*/
template <typename TNodeSet>
TNodeSet ConvertConflictRulesIntoTES(const TNodeSet& SES, TVector<TConflictRule<TNodeSet>>& conflictRules) {
auto TES = SES;

while (true) {
auto prevTES = TES;

for (const auto& conflictRule: conflictRules) {
if (Overlaps(conflictRule.RuleActivationNodes, TES)) {
TES |= conflictRule.RequiredNodes;
}
}

EraseIf(
conflictRules,
[&](const TConflictRule<TNodeSet>& conflictRule){ return IsSubset(conflictRule.RequiredNodes, TES); }
);

if (TES == prevTES || conflictRules.empty()) {
return TES;
}
}
}

} // namespace NYql::NDq
Loading

0 comments on commit b2c3e7d

Please sign in to comment.