Skip to content

Commit

Permalink
clash has been fixed for TColumnStatistics (ydb-platform#5989)
Browse files Browse the repository at this point in the history
  • Loading branch information
dorooleg authored Jul 1, 2024
1 parent df559c9 commit 460d363
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ namespace {
return predicate;
}

NYql::TColumnStatistics BuildTimestampStats(const TInstant& from, const TInstant& to) {
NYql::TColumnStatistics statistics;
NYql::NGenericPushDown::TColumnStatistics BuildTimestampStats(const TInstant& from, const TInstant& to) {
NYql::NGenericPushDown::TColumnStatistics statistics;
statistics.ColumnType.set_type_id(::Ydb::Type::TIMESTAMP);
statistics.Timestamp.ConstructInPlace();
statistics.Timestamp->lowValue = from;
Expand All @@ -25,17 +25,17 @@ namespace {

Y_UNIT_TEST_SUITE(MatchPredicate) {
Y_UNIT_TEST(EmptyMatch) {
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::TColumnStatistics>{}, NYql::NConnector::NApi::TPredicate{}));
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::NGenericPushDown::TColumnStatistics>{}, NYql::NConnector::NApi::TPredicate{}));
}

Y_UNIT_TEST(EmptyWhere) {
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::TColumnStatistics>{{{"col1", NYql::TColumnStatistics{}},
{"col2", NYql::TColumnStatistics{}}}},
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::NGenericPushDown::TColumnStatistics>{{{"col1", NYql::NGenericPushDown::TColumnStatistics{}},
{"col2", NYql::NGenericPushDown::TColumnStatistics{}}}},
NYql::NConnector::NApi::TPredicate{}));
}

Y_UNIT_TEST(Between) {
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-01T00:00:00Z"), TInstant::ParseIso8601("2024-03-01T23:59:59Z"))}}},
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::NGenericPushDown::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-01T00:00:00Z"), TInstant::ParseIso8601("2024-03-01T23:59:59Z"))}}},
BuildPredicate(
R"proto(
between {
Expand Down Expand Up @@ -67,7 +67,7 @@ Y_UNIT_TEST_SUITE(MatchPredicate) {
}

Y_UNIT_TEST(Less) {
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-01T00:00:00Z"), TInstant::ParseIso8601("2024-03-01T23:59:59Z"))}}},
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::NGenericPushDown::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-01T00:00:00Z"), TInstant::ParseIso8601("2024-03-01T23:59:59Z"))}}},
BuildPredicate(
R"proto(
comparison {
Expand All @@ -90,7 +90,7 @@ Y_UNIT_TEST_SUITE(MatchPredicate) {
}

Y_UNIT_TEST(NotLess) {
UNIT_ASSERT(!MatchPredicate(TMap<TString, NYql::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-02T00:00:00Z"), TInstant::ParseIso8601("2024-03-02T23:59:59Z"))}}},
UNIT_ASSERT(!MatchPredicate(TMap<TString, NYql::NGenericPushDown::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-02T00:00:00Z"), TInstant::ParseIso8601("2024-03-02T23:59:59Z"))}}},
BuildPredicate(
R"proto(
comparison {
Expand All @@ -113,7 +113,7 @@ Y_UNIT_TEST_SUITE(MatchPredicate) {
}

Y_UNIT_TEST(RightColumn) {
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-01T00:00:00Z"), TInstant::ParseIso8601("2024-03-01T23:59:59Z"))}}},
UNIT_ASSERT(MatchPredicate(TMap<TString, NYql::NGenericPushDown::TColumnStatistics>{{{"col1", BuildTimestampStats(TInstant::ParseIso8601("2024-03-01T00:00:00Z"), TInstant::ParseIso8601("2024-03-01T23:59:59Z"))}}},
BuildPredicate(
R"proto(
comparison {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <util/generic/string.h>
#include <util/datetime/base.h>

namespace NYql {
namespace NYql::NGenericPushDown {

struct TBooleanColumnStatsData {
TMaybe<i64> numTrues;
Expand Down Expand Up @@ -83,4 +83,4 @@ namespace NYql {
TMaybe<TTimestampColumnStatsData> Timestamp;
};

} // namespace NYql
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "yql_generic_match_predicate.h"

namespace NYql {
namespace NYql::NGenericPushDown {

namespace {

Expand Down Expand Up @@ -323,4 +323,4 @@ namespace NYql {
return MatchPredicateImpl(columns, predicate) != Triple::False;
}

} // namespace NYql
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

#include <util/generic/map.h>

namespace NYql {
namespace NYql::NGenericPushDown {

bool MatchPredicate(const TMap<TString, TColumnStatistics>& columns, const NYql::NConnector::NApi::TPredicate& predicate);

} // namespace NYql
}
4 changes: 4 additions & 0 deletions ydb/library/yql/providers/s3/actors/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,7 @@ IF (CLANG AND NOT WITH_VALGRIND)
ENDIF()

END()

RECURSE_FOR_TESTS(
ut
)
20 changes: 10 additions & 10 deletions ydb/library/yql/providers/s3/actors/yql_arrow_push_down.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

namespace {

TMaybe<NYql::TTimestampColumnStatsData> GetDateStatistics(parquet::Type::type physicalType, std::shared_ptr<parquet::Statistics> statistics) {
TMaybe<NYql::NGenericPushDown::TTimestampColumnStatsData> GetDateStatistics(parquet::Type::type physicalType, std::shared_ptr<parquet::Statistics> statistics) {
switch (physicalType) {
case parquet::Type::type::INT32: {
const parquet::TypedStatistics<arrow::Int32Type>* typedStatistics = static_cast<const parquet::TypedStatistics<arrow::Int32Type>*>(statistics.get());
int64_t minValue = typedStatistics->min();
int64_t maxValue = typedStatistics->max();
NYql::TTimestampColumnStatsData stats;
NYql::NGenericPushDown::TTimestampColumnStatsData stats;
stats.lowValue = TInstant::Days(minValue);
stats.highValue = TInstant::Days(maxValue);
return stats;
Expand All @@ -21,7 +21,7 @@ TMaybe<NYql::TTimestampColumnStatsData> GetDateStatistics(parquet::Type::type ph
const parquet::TypedStatistics<arrow::Int64Type>* typedStatistics = static_cast<const parquet::TypedStatistics<arrow::Int64Type>*>(statistics.get());
int64_t minValue = typedStatistics->min();
int64_t maxValue = typedStatistics->max();
NYql::TTimestampColumnStatsData stats;
NYql::NGenericPushDown::TTimestampColumnStatsData stats;
stats.lowValue = TInstant::Days(minValue);
stats.highValue = TInstant::Days(maxValue);
return stats;
Expand All @@ -37,7 +37,7 @@ TMaybe<NYql::TTimestampColumnStatsData> GetDateStatistics(parquet::Type::type ph
}
}

TMaybe<NYql::TTimestampColumnStatsData> GetTimestampStatistics(const parquet::TimestampLogicalType* typestampLogicalType, parquet::Type::type physicalType, std::shared_ptr<parquet::Statistics> statistics) {
TMaybe<NYql::NGenericPushDown::TTimestampColumnStatsData> GetTimestampStatistics(const parquet::TimestampLogicalType* typestampLogicalType, parquet::Type::type physicalType, std::shared_ptr<parquet::Statistics> statistics) {
int64_t multiplier = 1;
switch (typestampLogicalType->time_unit()) {
case parquet::LogicalType::TimeUnit::unit::UNKNOWN:
Expand All @@ -54,7 +54,7 @@ TMaybe<NYql::TTimestampColumnStatsData> GetTimestampStatistics(const parquet::Ti
const parquet::TypedStatistics<arrow::Int32Type>* typedStatistics = static_cast<const parquet::TypedStatistics<arrow::Int32Type>*>(statistics.get());
int64_t minValue = typedStatistics->min();
int64_t maxValue = typedStatistics->max();
NYql::TTimestampColumnStatsData stats;
NYql::NGenericPushDown::TTimestampColumnStatsData stats;
stats.lowValue = TInstant::FromValue(minValue * multiplier);
stats.highValue = TInstant::FromValue(maxValue * multiplier);
return stats;
Expand All @@ -63,7 +63,7 @@ TMaybe<NYql::TTimestampColumnStatsData> GetTimestampStatistics(const parquet::Ti
const parquet::TypedStatistics<arrow::Int64Type>* typedStatistics = static_cast<const parquet::TypedStatistics<arrow::Int64Type>*>(statistics.get());
int64_t minValue = typedStatistics->min();
int64_t maxValue = typedStatistics->max();
NYql::TTimestampColumnStatsData stats;
NYql::NGenericPushDown::TTimestampColumnStatsData stats;
stats.lowValue = TInstant::FromValue(minValue * multiplier);
stats.highValue = TInstant::FromValue(maxValue * multiplier);
return stats;
Expand All @@ -79,16 +79,16 @@ TMaybe<NYql::TTimestampColumnStatsData> GetTimestampStatistics(const parquet::Ti
}
}

NYql::TColumnStatistics MakeTimestampStatistics(const TString& name, ::Ydb::Type::PrimitiveTypeId type, const TMaybe<NYql::TTimestampColumnStatsData>& statistics) {
NYql::TColumnStatistics columnStatistics;
NYql::NGenericPushDown::TColumnStatistics MakeTimestampStatistics(const TString& name, ::Ydb::Type::PrimitiveTypeId type, const TMaybe<NYql::NGenericPushDown::TTimestampColumnStatsData>& statistics) {
NYql::NGenericPushDown::TColumnStatistics columnStatistics;
columnStatistics.ColumnName = name;
columnStatistics.ColumnType.set_type_id(type);
columnStatistics.Timestamp = statistics;
return columnStatistics;
}

bool MatchRowGroup(std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata, const NYql::NConnector::NApi::TPredicate& predicate) {
TMap<TString, NYql::TColumnStatistics> columns;
TMap<TString, NYql::NGenericPushDown::TColumnStatistics> columns;
for (int i = 0; i < rowGroupMetadata->schema()->num_columns(); i++) {
auto columnChunkMetadata = rowGroupMetadata->ColumnChunk(i);
if (!columnChunkMetadata->is_stats_set()) {
Expand Down Expand Up @@ -132,7 +132,7 @@ bool MatchRowGroup(std::unique_ptr<parquet::RowGroupMetaData> rowGroupMetadata,
break;
}
}
return NYql::MatchPredicate(columns, predicate);
return NYql::NGenericPushDown::MatchPredicate(columns, predicate);
}

TVector<ui64> MatchedRowGroupsImpl(parquet::FileMetaData* fileMetadata, const NYql::NConnector::NApi::TPredicate& predicate) {
Expand Down

0 comments on commit 460d363

Please sign in to comment.