Skip to content

Commit 1c44848

Browse files
committed
Kusto-phase3: rebase v24.2.1.2248-stable
Kusto-phase3: rebase v24.2.1.2248-stable fixing tests. Kusto-phase3: rebase v24.2.1.2248-stable fix has any ip functions. Kusto-phase3: rebase v24.2.1.2248-stable provide additional protection on ip functions.
1 parent 84dc585 commit 1c44848

22 files changed

+84
-80
lines changed

src/Common/IntervalKind.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ struct IntervalKind
4242
Float64 toSeconds() const;
4343

4444
/// Chooses an interval kind based on number of seconds.
45-
/// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Hour`.
45+
/// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Kind::Hour`.
4646
static IntervalKind fromAvgSeconds(Int64 num_seconds);
4747

4848
/// Returns whether IntervalKind has a fixed number of seconds (e.g. Day) or non-fixed(e.g. Month)
@@ -54,16 +54,16 @@ struct IntervalKind
5454
const char * toLowercasedKeyword() const;
5555

5656
/// Returns the string which can be passed to the `unit` parameter of the dateDiff() function.
57-
/// For example, `IntervalKind{IntervalKind::Day}.getDateDiffParameter()` returns "day".
57+
/// For example, `IntervalKind{IntervalKind::Kind::Day}.getDateDiffParameter()` returns "day".
5858
const char * toDateDiffUnit() const;
5959

6060
/// Returns the name of the function converting a number to the interval data type.
61-
/// For example, `IntervalKind{IntervalKind::Day}.getToIntervalDataTypeFunctionName()`
61+
/// For example, `IntervalKind{IntervalKind::Kind::Day}.getToIntervalDataTypeFunctionName()`
6262
/// returns "toIntervalDay".
6363
const char * toNameOfFunctionToIntervalDataType() const;
6464

6565
/// Returns the name of the function extracting time part from a date or a time.
66-
/// For example, `IntervalKind{IntervalKind::Day}.getExtractTimePartFunctionName()`
66+
/// For example, `IntervalKind{IntervalKind::Kind::Day}.getExtractTimePartFunctionName()`
6767
/// returns "toDayOfMonth".
6868
const char * toNameOfFunctionExtractTimePart() const;
6969

src/Core/SettingsChangesHistory.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
141141
{"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."},
142142
{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
143143
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
144-
{"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
144+
{"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"},
145+
{"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},
146+
{"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}},
145147
{"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
146148
{"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"},
147149
{"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"},

src/Functions/FunctionBinaryArithmetic.h

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1723,29 +1723,7 @@ class FunctionBinaryArithmetic : public IFunction
17231723

17241724
if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
17251725
{
1726-
if constexpr (is_div_int || is_div_int_or_zero)
1727-
type_res = std::make_shared<ResultDataType>();
1728-
else if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
1729-
{
1730-
if constexpr (is_division)
1731-
{
1732-
if (context->getSettingsRef().decimal_check_overflow)
1733-
{
1734-
/// Check overflow by using operands scale (based on big decimal division implementation details):
1735-
/// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
1736-
/// i.e. int_operand = decimal_operand*10^scale
1737-
/// For division, left operand will be scaled by right operand scale also to do big integer division,
1738-
/// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
1739-
/// So, we can check upfront possible overflow just by checking max scale used for left operand
1740-
/// Note: it doesn't detect all possible overflow during big decimal division
1741-
if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
1742-
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
1743-
}
1744-
}
1745-
ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
1746-
type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
1747-
}
1748-
else if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
1726+
if constexpr (((IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>) ||
17491727
(IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>)))
17501728
{
17511729
type_res = std::make_shared<DataTypeFloat64>();
@@ -1807,6 +1785,28 @@ class FunctionBinaryArithmetic : public IFunction
18071785

18081786
return static_cast<bool>(nested_type);
18091787
}
1788+
else if constexpr (IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>)
1789+
{
1790+
if constexpr (is_division)
1791+
{
1792+
if (context->getSettingsRef().decimal_check_overflow)
1793+
{
1794+
/// Check overflow by using operands scale (based on big decimal division implementation details):
1795+
/// big decimal arithmetic is based on big integers, decimal operands are converted to big integers
1796+
/// i.e. int_operand = decimal_operand*10^scale
1797+
/// For division, left operand will be scaled by right operand scale also to do big integer division,
1798+
/// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale
1799+
/// So, we can check upfront possible overflow just by checking max scale used for left operand
1800+
/// Note: it doesn't detect all possible overflow during big decimal division
1801+
if (left.getScale() + right.getScale() > ResultDataType::maxPrecision())
1802+
throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division");
1803+
}
1804+
}
1805+
ResultDataType result_type = decimalResultType<is_multiply, is_division>(left, right);
1806+
type_res = std::make_shared<ResultDataType>(result_type.getPrecision(), result_type.getScale());
1807+
}
1808+
else if constexpr (is_div_int || is_div_int_or_zero)
1809+
type_res = std::make_shared<ResultDataType>();
18101810
else
18111811
type_res = std::make_shared<ResultDataType>();
18121812
return true;

src/Functions/FunctionsConversion.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3316,7 +3316,7 @@ class FunctionCast final : public FunctionCastBase
33163316
switch (interval_kind)
33173317
{
33183318
#define DECLARE_CASE(NAME) \
3319-
case IntervalKind::NAME: \
3319+
case IntervalKind::Kind::NAME: \
33203320
return FunctionToInterval##NAME::create();
33213321
FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
33223322
#undef DECLARE_CASE

src/Functions/Kusto/KqlRange.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -809,7 +809,7 @@ class FunctionKqlRange : public IFunction
809809
total_values = total_elements;
810810
}
811811

812-
auto out = ColumnArray::create(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)->createColumn());
812+
auto out = ColumnArray::create(std::make_shared<DataTypeInterval>(IntervalKind::Kind::Nanosecond)->createColumn());
813813
IColumn & out_data = out->getData();
814814
IColumn::Offsets & out_offsets = out->getOffsets();
815815

src/Functions/Kusto/kqlDateTime.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ ColumnPtr FunctionKqlDateTime<input_policy>::executeImpl(
9595

9696
const ColumnsWithTypeAndName addition_args{
9797
asArgument(converted, "converted"),
98-
createConstColumnWithTypeAndName<DataTypeInterval>(50, "interval_50", IntervalKind::Nanosecond)};
98+
createConstColumnWithTypeAndName<DataTypeInterval>(50, "interval_50", IntervalKind::Kind::Nanosecond)};
9999
const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count);
100100

101101
const ColumnsWithTypeAndName to_start_of_interval_args{
102-
asArgument(sum, "sum"), createConstColumnWithTypeAndName<DataTypeInterval>(100, "interval_100", IntervalKind::Nanosecond)};
102+
asArgument(sum, "sum"), createConstColumnWithTypeAndName<DataTypeInterval>(100, "interval_100", IntervalKind::Kind::Nanosecond)};
103103
const auto [rounded_column, _] = executeFunctionCall(context, "toStartOfInterval", to_start_of_interval_args, input_rows_count);
104104

105105
return wrapInNullable(rounded_column, conversion_args, result_type, input_rows_count);

src/Functions/Kusto/kqlHasAnyIp.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#include <optional>
22
#include <ranges>
3-
#include <regex>
43
#include <Columns/ColumnArray.h>
54
#include <Columns/ColumnDecimal.h>
65
#include <Columns/ColumnString.h>
6+
#include <Common/OptimizedRegularExpression.h>
77
#include <DataTypes/DataTypeString.h>
88
#include <DataTypes/DataTypesNumber.h>
99
#include <Functions/FunctionFactory.h>
@@ -397,17 +397,26 @@ class FunctionKqlHasIpGeneric : public IFunction
397397
const auto ips = extractIpsFromArguments(arguments, result_type, context, i);
398398

399399
std::string source = arguments[0].column->getDataAt(i).toString();
400-
const std::regex ip_finder(Func::regex);
401-
std::smatch matches;
402400

403-
while (!res && std::regex_search(source, matches, ip_finder))
401+
OptimizedRegularExpression ip_finder(Func::regex);
402+
OptimizedRegularExpression::MatchVec matches;
403+
404+
unsigned num_matches;
405+
while (!res && (num_matches = ip_finder.match(source, matches)))
404406
{
405-
res = Func::checkRegexMatch(matches[2].str(), result_type, context, ips);
407+
if (num_matches <= 3)
408+
continue;
406409

407-
source = matches.suffix().str();
410+
res = Func::checkRegexMatch(source.substr(matches[2].offset, matches[2].length), result_type, context, ips);
411+
412+
if (matches[3].length == 0)
413+
source = "";
414+
else
415+
source = source.substr(matches[3].offset, source.length());
408416
}
409417
result_column.push_back(static_cast<UInt8>(res));
410418
}
419+
411420
return result;
412421
}
413422

src/Functions/Kusto/kqlIndexOfRegex.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,13 @@
11
#include <Columns/ColumnNullable.h>
22
#include <Columns/ColumnString.h>
33
#include <Columns/ColumnsNumber.h>
4+
#include <Common/re2.h>
45
#include <DataTypes/DataTypeString.h>
56
#include <DataTypes/DataTypesNumber.h>
67
#include <Functions/FunctionFactory.h>
78
#include <Functions/FunctionHelpers.h>
89
#include <Functions/IFunction.h>
910

10-
#ifdef __clang__
11-
# pragma clang diagnostic push
12-
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
13-
#endif
14-
#include <re2/re2.h>
15-
#ifdef __clang__
16-
# pragma clang diagnostic pop
17-
#endif
18-
1911
namespace DB::ErrorCodes
2012
{
2113
extern const int CANNOT_COMPILE_REGEXP;

src/Functions/Kusto/kqlToTimespan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ ColumnPtr FunctionKqlToTimespan::executeImpl(
7979

8080
DataTypePtr FunctionKqlToTimespan::getReturnTypeImpl(const DataTypes &) const
8181
{
82-
return makeNullable(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond));
82+
return makeNullable(std::make_shared<DataTypeInterval>(IntervalKind::Kind::Nanosecond));
8383
}
8484

8585
REGISTER_FUNCTION(KqlToTimespan)

src/Functions/dateTime64Diff.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ DataTypePtr FunctionDateTime64Diff::getReturnTypeImpl(const DataTypes & argument
9292
rhs->getName(),
9393
getName());
9494

95-
return std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond);
95+
return std::make_shared<DataTypeInterval>(IntervalKind::Kind::Nanosecond);
9696
}
9797

9898
REGISTER_FUNCTION(DateTime64Diff)

0 commit comments

Comments
 (0)