diff --git a/.github/workflows/clibmouse_pr.yml b/.github/workflows/clibmouse_pr.yml new file mode 100644 index 000000000000..b91a29237a61 --- /dev/null +++ b/.github/workflows/clibmouse_pr.yml @@ -0,0 +1,343 @@ +name: PR Sanity +run-name: ${{ github.actor }} is running PR sanity check 🚀 +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +on: # yamllint disable-line rule:truthy + pull_request: + types: + - synchronize + - reopened + - opened + branches: + - Kusto-phase3 + paths-ignore: + - 'docker/docs/**' + - 'docs/**' + - 'website/**' +jobs: + CheckLabels: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Labels check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 run_check.py + PythonUnitTests: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' + DockerHubPushAarch64: + needs: CheckLabels + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64, PythonUnitTests] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json + StyleCheck: + needs: DockerHubPush + runs-on: [self-hosted, style-checker] + if: ${{ success() || failure() }} + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{ runner.temp }}/style_check + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=package_release + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + fetch-depth: 0 # for performance artifact + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + BuilderBinRelease: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_release + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinClangTidy: + needs: [DockerHubPush, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_tidy + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + UnitTestsReleaseClang: + needs: [BuilderBinRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/unit_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Unit tests (release-clang) + REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Unit test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 unit_tests_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index eba8109253db..30654504e0bd 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ class Foundation_API URI URI(); /// Creates an empty URI. - explicit URI(const std::string & uri, bool disable_url_encoding = false); + explicit URI(const std::string & uri, bool enable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -362,7 +362,7 @@ class Foundation_API URI std::string _query; std::string _fragment; - bool _disable_url_encoding = false; + bool _enable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 3354c69d1883..41e331bb0800 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri, bool decode_and_encode_path): - _port(0), _disable_url_encoding(decode_and_encode_path) +URI::URI(const std::string& uri, bool enable_url_encoding): + _port(0), _enable_url_encoding(enable_url_encoding) { parse(uri); } @@ -108,7 +108,7 @@ URI::URI(const URI& uri): _path(uri._path), _query(uri._query), _fragment(uri._fragment), - _disable_url_encoding(uri._disable_url_encoding) + _enable_url_encoding(uri._enable_url_encoding) { } @@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _path(baseURI._path), _query(baseURI._query), _fragment(baseURI._fragment), - _disable_url_encoding(baseURI._disable_url_encoding) + _enable_url_encoding(baseURI._enable_url_encoding) { resolve(relativeURI); } @@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; - _disable_url_encoding = uri._disable_url_encoding; + _enable_url_encoding = uri._enable_url_encoding; } return *this; } @@ -184,7 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); - std::swap(_disable_url_encoding, uri._disable_url_encoding); + std::swap(_enable_url_encoding, uri._enable_url_encoding); } @@ -687,18 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa void URI::encodePath(std::string & encodedStr) const { - if (_disable_url_encoding) - encodedStr = _path; - else + if (_enable_url_encoding) encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; } void URI::decodePath(const std::string & encodedStr) { - if (_disable_url_encoding) - _path = encodedStr; - else + if (_enable_url_encoding) decode(encodedStr, _path); + else + _path = encodedStr; } bool URI::isWellKnownPort() const diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 821b7b468553..1c7b756b3bce 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54476) SET(VERSION_MAJOR 23) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 1) -SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df) -SET(VERSION_DESCRIBE v23.7.1.1-testing) -SET(VERSION_STRING 23.7.1.1) +SET(VERSION_PATCH 2) +SET(VERSION_GITHASH a70127baecc451f1f7073bad7b6198f6703441d8) +SET(VERSION_DESCRIBE v23.7.2.1-stable) +SET(VERSION_STRING 23.7.2.1) # end of autochange diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 8e95d94b6dc7..064538a04488 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -95,6 +95,7 @@ RUN python3 -m pip install --no-cache-dir \ pytest-timeout \ pytest-xdist \ pytz \ + pyyaml==5.3.1 \ redis \ requests-kerberos \ tzlocal==2.1 \ diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index f556df0a0887..5a5e15641804 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8dfb6c0d2250..db25ef10c910 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3468,11 +3468,11 @@ Possible values: Default value: `0`. -## disable_url_encoding {#disable_url_encoding} +## enable_url_encoding {#enable_url_encoding} -Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. +Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. -Disabled by default. +Enabled by default. ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 677ed0119608..859de86f0190 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,7 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. **See Also** diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 018e88ef8285..66299fddd4aa 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -55,6 +55,9 @@ contents: - src: clickhouse dst: /usr/bin/clickhouse-keeper type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink - src: root/usr/bin/clickhouse-report dst: /usr/bin/clickhouse-report - src: root/usr/bin/clickhouse-server diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 43a8d84b5131..f1a269becafd 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -46,7 +46,8 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp + # this will need to be uncommented once dbms is no longer a dependency of standalone clickhouse-keeper + # ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerSettings.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp @@ -198,7 +199,7 @@ if (BUILD_STANDALONE_KEEPER) clickhouse_common_config_no_zookeeper_log loggers_no_text_log clickhouse_common_io - clickhouse_parsers # Otherwise compression will not built. FIXME. + clickhouse_parsers dbms # Otherwise compression will not built. FIXME. ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a38467c3369f..40288fb14b86 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -67,15 +67,17 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } -#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD +// this will need to be uncommented once dbms is no longer a dependency of standalone clickhouse-keeper +// +// #ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD -// Weak symbols don't work correctly on Darwin -// so we have a stub implementation to avoid linker errors -void collectCrashLog( - Int32, UInt64, const String &, const StackTrace &) -{} +// // Weak symbols don't work correctly on Darwin +// // so we have a stub implementation to avoid linker errors +// void collectCrashLog( +// Int32, UInt64, const String &, const StackTrace &) +// {} -#endif +// #endif namespace DB { diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index dce52ecdb125..adec13a5199e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -2031,27 +2031,26 @@ void Server::createServers( for (const auto & protocol : protocols) { - if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + std::string prefix = "protocols." + protocol + "."; + std::string port_name = prefix + "port"; + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + + if (!config.has(prefix + "port")) + continue; + + if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name)) continue; std::vector hosts; - if (config.has("protocols." + protocol + ".host")) - hosts.push_back(config.getString("protocols." + protocol + ".host")); + if (config.has(prefix + "host")) + hosts.push_back(config.getString(prefix + "host")); else hosts = listen_hosts; for (const auto & host : hosts) { - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - - if (!config.has(prefix + "port")) - continue; - - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; bool is_secure = false; auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); diff --git a/programs/server/users.xml b/programs/server/users.xml index 5e2ff51bf4de..9810feb9a53c 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -5,6 +5,15 @@ + + random diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5c66c7e94950..b5e606fa1a8b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -616,6 +616,7 @@ if (ENABLE_TESTS) ) target_link_libraries(unit_tests_dbms PRIVATE + ch_contrib::gmock_all ch_contrib::gtest_all ch_contrib::gmock_all clickhouse_functions diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 3e964d5c6a36..a266eb76b14c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -339,7 +339,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(); else if (dialect == Dialect::prql) parser = std::make_unique(max_length, settings.max_parser_depth); else @@ -2161,21 +2161,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool ClientBase::processQueryText(const String & text) { - auto trimmed_input = trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }); - - if (exit_strings.end() != exit_strings.find(trimmed_input)) + if (exit_strings.end() != exit_strings.find(trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }))) return false; - if (trimmed_input.starts_with("\\i")) - { - size_t skip_prefix_size = std::strlen("\\i"); - auto file_name = trim( - trimmed_input.substr(skip_prefix_size, trimmed_input.size() - skip_prefix_size), - [](char c) { return isWhitespaceASCII(c); }); - - return processMultiQueryFromFile(file_name); - } - if (!is_multiquery) { assert(!query_fuzzer_runs); @@ -2428,17 +2416,6 @@ void ClientBase::runInteractive() } -bool ClientBase::processMultiQueryFromFile(const String & file_name) -{ - String queries_from_file; - - ReadBufferFromFile in(file_name); - readStringUntilEOF(queries_from_file, in); - - return executeMultiQuery(queries_from_file); -} - - void ClientBase::runNonInteractive() { if (delayed_interactive) @@ -2446,13 +2423,23 @@ void ClientBase::runNonInteractive() if (!queries_files.empty()) { + auto process_multi_query_from_file = [&](const String & file) + { + String queries_from_file; + + ReadBufferFromFile in(file); + readStringUntilEOF(queries_from_file, in); + + return executeMultiQuery(queries_from_file); + }; + for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) - if (!processMultiQueryFromFile(interleave_file)) + if (!process_multi_query_from_file(interleave_file)) return; - if (!processMultiQueryFromFile(queries_file)) + if (!process_multi_query_from_file(queries_file)) return; } diff --git a/src/Common/CaresPTRResolver.cpp b/src/Common/CaresPTRResolver.cpp index fee4d01eb379..bf61e940745d 100644 --- a/src/Common/CaresPTRResolver.cpp +++ b/src/Common/CaresPTRResolver.cpp @@ -41,9 +41,25 @@ namespace DB } } - std::mutex CaresPTRResolver::mutex; + struct AresChannelRAII + { + AresChannelRAII() + { + if (ares_init(&channel) != ARES_SUCCESS) + { + throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to initialize c-ares channel"); + } + } + + ~AresChannelRAII() + { + ares_destroy(channel); + } + + ares_channel channel; + }; - CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr) + CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) { /* * ares_library_init is not thread safe. Currently, the only other usage of c-ares seems to be in grpc. @@ -57,34 +73,22 @@ namespace DB * */ static const auto library_init_result = ares_library_init(ARES_LIB_INIT_ALL); - if (library_init_result != ARES_SUCCESS || ares_init(&channel) != ARES_SUCCESS) + if (library_init_result != ARES_SUCCESS) { throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to initialize c-ares"); } } - CaresPTRResolver::~CaresPTRResolver() - { - ares_destroy(channel); - /* - * Library initialization is currently done only once in the constructor. Multiple instances of CaresPTRResolver - * will be used in the lifetime of ClickHouse, thus it's problematic to have de-init here. - * In a practical view, it makes little to no sense to de-init a DNS library since DNS requests will happen - * until the end of the program. Hence, ares_library_cleanup() will not be called. - * */ - } - std::unordered_set CaresPTRResolver::resolve(const std::string & ip) { - std::lock_guard guard(mutex); + AresChannelRAII channel_raii; std::unordered_set ptr_records; - resolve(ip, ptr_records); + resolve(ip, ptr_records, channel_raii.channel); - if (!wait_and_process()) + if (!wait_and_process(channel_raii.channel)) { - cancel_requests(); throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip); } @@ -93,22 +97,21 @@ namespace DB std::unordered_set CaresPTRResolver::resolve_v6(const std::string & ip) { - std::lock_guard guard(mutex); + AresChannelRAII channel_raii; std::unordered_set ptr_records; - resolve_v6(ip, ptr_records); + resolve_v6(ip, ptr_records, channel_raii.channel); - if (!wait_and_process()) + if (!wait_and_process(channel_raii.channel)) { - cancel_requests(); throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip); } return ptr_records; } - void CaresPTRResolver::resolve(const std::string & ip, std::unordered_set & response) + void CaresPTRResolver::resolve(const std::string & ip, std::unordered_set & response, ares_channel channel) { in_addr addr; @@ -117,7 +120,7 @@ namespace DB ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET, callback, &response); } - void CaresPTRResolver::resolve_v6(const std::string & ip, std::unordered_set & response) + void CaresPTRResolver::resolve_v6(const std::string & ip, std::unordered_set & response, ares_channel channel) { in6_addr addr; inet_pton(AF_INET6, ip.c_str(), &addr); @@ -125,15 +128,15 @@ namespace DB ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET6, callback, &response); } - bool CaresPTRResolver::wait_and_process() + bool CaresPTRResolver::wait_and_process(ares_channel channel) { int sockets[ARES_GETSOCK_MAXNUM]; pollfd pollfd[ARES_GETSOCK_MAXNUM]; while (true) { - auto readable_sockets = get_readable_sockets(sockets, pollfd); - auto timeout = calculate_timeout(); + auto readable_sockets = get_readable_sockets(sockets, pollfd, channel); + auto timeout = calculate_timeout(channel); int number_of_fds_ready = 0; if (!readable_sockets.empty()) @@ -158,11 +161,11 @@ namespace DB if (number_of_fds_ready > 0) { - process_readable_sockets(readable_sockets); + process_readable_sockets(readable_sockets, channel); } else { - process_possible_timeout(); + process_possible_timeout(channel); break; } } @@ -170,12 +173,12 @@ namespace DB return true; } - void CaresPTRResolver::cancel_requests() + void CaresPTRResolver::cancel_requests(ares_channel channel) { ares_cancel(channel); } - std::span CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd) + std::span CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel) { int sockets_bitmask = ares_getsock(channel, sockets, ARES_GETSOCK_MAXNUM); @@ -205,7 +208,7 @@ namespace DB return std::span(pollfd, number_of_sockets_to_poll); } - int64_t CaresPTRResolver::calculate_timeout() + int64_t CaresPTRResolver::calculate_timeout(ares_channel channel) { timeval tv; if (auto * tvp = ares_timeout(channel, nullptr, &tv)) @@ -218,14 +221,14 @@ namespace DB return 0; } - void CaresPTRResolver::process_possible_timeout() + void CaresPTRResolver::process_possible_timeout(ares_channel channel) { /* Call ares_process() unconditonally here, even if we simply timed out above, as otherwise the ares name resolve won't timeout! */ ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD); } - void CaresPTRResolver::process_readable_sockets(std::span readable_sockets) + void CaresPTRResolver::process_readable_sockets(std::span readable_sockets, ares_channel channel) { for (auto readable_socket : readable_sockets) { diff --git a/src/Common/CaresPTRResolver.h b/src/Common/CaresPTRResolver.h index 454509ae43c3..24a5e422ca8b 100644 --- a/src/Common/CaresPTRResolver.h +++ b/src/Common/CaresPTRResolver.h @@ -28,32 +28,35 @@ namespace DB public: explicit CaresPTRResolver(provider_token); - ~CaresPTRResolver() override; + + /* + * Library initialization is currently done only once in the constructor. Multiple instances of CaresPTRResolver + * will be used in the lifetime of ClickHouse, thus it's problematic to have de-init here. + * In a practical view, it makes little to no sense to de-init a DNS library since DNS requests will happen + * until the end of the program. Hence, ares_library_cleanup() will not be called. + * */ + ~CaresPTRResolver() override = default; std::unordered_set resolve(const std::string & ip) override; std::unordered_set resolve_v6(const std::string & ip) override; private: - bool wait_and_process(); - - void cancel_requests(); - - void resolve(const std::string & ip, std::unordered_set & response); + bool wait_and_process(ares_channel channel); - void resolve_v6(const std::string & ip, std::unordered_set & response); + void cancel_requests(ares_channel channel); - std::span get_readable_sockets(int * sockets, pollfd * pollfd); + void resolve(const std::string & ip, std::unordered_set & response, ares_channel channel); - int64_t calculate_timeout(); + void resolve_v6(const std::string & ip, std::unordered_set & response, ares_channel channel); - void process_possible_timeout(); + std::span get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel); - void process_readable_sockets(std::span readable_sockets); + int64_t calculate_timeout(ares_channel channel); - ares_channel channel; + void process_possible_timeout(ares_channel channel); - static std::mutex mutex; + void process_readable_sockets(std::span readable_sockets, ares_channel channel); }; } diff --git a/src/Common/tests/gtest_dns_reverse_resolve.cpp b/src/Common/tests/gtest_dns_reverse_resolve.cpp index 08351564eaf4..de33deddac3a 100644 --- a/src/Common/tests/gtest_dns_reverse_resolve.cpp +++ b/src/Common/tests/gtest_dns_reverse_resolve.cpp @@ -9,34 +9,35 @@ namespace DB { TEST(Common, ReverseDNS) { - auto addresses = std::vector({ - "8.8.8.8", "2001:4860:4860::8888", // dns.google - "142.250.219.35", // google.com - "157.240.12.35", // facebook - "208.84.244.116", "2600:1419:c400::214:c410", //www.terra.com.br, - "127.0.0.1", "::1" - }); - auto func = [&]() { // Good random seed, good engine auto rnd1 = std::mt19937(std::random_device{}()); - for (int i = 0; i < 50; ++i) + for (int i = 0; i < 10; ++i) { auto & dns_resolver_instance = DNSResolver::instance(); -// unfortunately, DNS cache can't be disabled because we might end up causing a DDoS attack -// dns_resolver_instance.setDisableCacheFlag(); - - auto addr_index = rnd1() % addresses.size(); - - [[maybe_unused]] auto result = dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ addresses[addr_index] }); - -// will not assert either because some of the IP addresses might change in the future and -// this test will become flaky -// ASSERT_TRUE(!result.empty()); + dns_resolver_instance.setDisableCacheFlag(); + + auto val1 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val2 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val3 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val4 = rnd1() % static_cast((pow(2, 31) - 1)); + + uint32_t ipv4_buffer[1] = { + static_cast(val1) + }; + + uint32_t ipv6_buffer[4] = { + static_cast(val1), + static_cast(val2), + static_cast(val3), + static_cast(val4) + }; + + dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ ipv4_buffer, sizeof(ipv4_buffer)}); + dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ ipv6_buffer, sizeof(ipv6_buffer)}); } - }; auto number_of_threads = 200u; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index a4c3d91e1c9e..88b9f1cedb42 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -794,8 +794,14 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action) std::lock_guard _{server_write_mutex}; if (const auto * add = std::get_if(&action)) - return raft_instance->get_srv_config(add->id) != nullptr - || raft_instance->add_srv(static_cast(*add))->get_accepted(); + { + if (raft_instance->get_srv_config(add->id) != nullptr) + return true; + + auto resp = raft_instance->add_srv(static_cast(*add)); + resp->get(); + return resp->get_accepted(); + } else if (const auto * remove = std::get_if(&action)) { if (remove->id == raft_instance->get_leader()) @@ -807,8 +813,12 @@ bool KeeperServer::applyConfigUpdate(const ClusterUpdateAction & action) return false; } - return raft_instance->get_srv_config(remove->id) == nullptr - || raft_instance->remove_srv(remove->id)->get_accepted(); + if (raft_instance->get_srv_config(remove->id) == nullptr) + return true; + + auto resp = raft_instance->remove_srv(remove->id); + resp->get(); + return resp->get_accepted(); } else if (const auto * update = std::get_if(&action)) { diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index dbe13d9502a1..9897f314aa83 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -46,15 +46,6 @@ #define DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION 54454 -/// Version of ClickHouse TCP protocol. -/// -/// Should be incremented manually on protocol changes. -/// -/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, -/// later is just a number for server version (one number instead of commit SHA) -/// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54464 - #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 #define DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS_IN_INSERT 54456 @@ -77,3 +68,14 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_TOTAL_BYTES_IN_PROGRESS 54463 #define DBMS_MIN_PROTOCOL_VERSION_WITH_TIMEZONE_UPDATES 54464 + +#define DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION 54465 + +/// Version of ClickHouse TCP protocol. +/// +/// Should be incremented manually on protocol changes. +/// +/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, +/// later is just a number for server version (one number instead of commit SHA) +/// for simplicity (sometimes it may be more convenient in some use cases). +#define DBMS_TCP_PROTOCOL_VERSION 54465 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c69d132ea253..fbe805cdfa5b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -622,7 +622,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ - M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \ + M(Bool, enable_url_encoding, true, " Allows to enable/disable decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 86400954e2fd..603e0fbf5241 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -138,10 +138,8 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, - {"kusto", Dialect::kusto}, {"kusto", Dialect::kusto}, {"prql", Dialect::prql}}) - // FIXME: do not add 'kusto_auto' to the list. Maybe remove it from code completely? IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGUMENTS, {{"default", ParallelReplicasCustomKeyFilterType::DEFAULT}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index c61afbd2bbfd..f64ed697421b 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -206,7 +206,6 @@ enum class Dialect { clickhouse, kusto, - kusto_auto, prql, }; diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index 3c412fa1f17d..f0724b6dc7e6 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index 39ce37c4c139..d1d4dd39a9d0 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -157,6 +157,7 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) class DataTypeDate; class DataTypeDate32; +class DataTypeInterval; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; @@ -201,6 +202,8 @@ bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... args) case TypeIndex::DateTime: return f(TypePair(), std::forward(args)...); case TypeIndex::DateTime64: return f(TypePair(), std::forward(args)...); + case TypeIndex::Interval: return f(TypePair(), std::forward(args)...); + case TypeIndex::String: return f(TypePair(), std::forward(args)...); case TypeIndex::FixedString: return f(TypePair(), std::forward(args)...); diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index c398a54268e5..92e2e5914bc2 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -22,7 +22,7 @@ class DataTypeInterval final : public DataTypeNumberBase IntervalKind getKind() const { return kind; } - explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {} + explicit DataTypeInterval(IntervalKind kind_ = {}) : kind(kind_) {} SerializationPtr doGetDefaultSerialization() const override; std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index ea05e6ae59bc..0bf3f3ac8b30 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -37,6 +37,7 @@ class DataTypeTuple final : public IDataType bool canBeInsideNullable() const override { return false; } bool supportsSparseSerialization() const override { return true; } + bool canBeInsideSparseColumns() const override { return false; } MutableColumnPtr createColumn() const override; MutableColumnPtr createColumn(const ISerialization & serialization) const override; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 4adafe5d2129..5086376b4be5 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -110,6 +110,7 @@ class IDataType : private boost::noncopyable, public std::enable_shared_from_thi /// TODO: support more types. virtual bool supportsSparseSerialization() const { return !haveSubtypes(); } + virtual bool canBeInsideSparseColumns() const { return supportsSparseSerialization(); } SerializationPtr getDefaultSerialization() const; SerializationPtr getSparseSerialization() const; @@ -373,6 +374,7 @@ struct WhichDataType constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; } constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; } constexpr bool isDateOrDate32() const { return isDate() || isDate32(); } + constexpr bool isDateOrDate32OrDateTimeOrDateTime64() const { return isDate() || isDate32() || isDateTime() || isDateTime64(); } constexpr bool isString() const { return idx == TypeIndex::String; } constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; } @@ -562,6 +564,7 @@ template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; template constexpr bool IsDataTypeDate = false; +template constexpr bool IsDataTypeInterval = false; template constexpr bool IsDataTypeEnum = false; template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; @@ -577,6 +580,8 @@ class DataTypeDate32; class DataTypeDateTime; class DataTypeDateTime64; +class DataTypeInterval; + template constexpr bool IsDataTypeDecimal> = true; template <> inline constexpr bool IsDataTypeDecimal = true; @@ -590,6 +595,8 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime = tru template <> inline constexpr bool IsDataTypeDateOrDateTime = true; template <> inline constexpr bool IsDataTypeDateOrDateTime = true; +template <> inline constexpr bool IsDataTypeInterval = true; + template class DataTypeEnum; diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 8080179ad476..ab9ddc370ab6 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -32,7 +31,7 @@ String getExceptionMessage( template std::conditional_t, T> -getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string context_data_type_name) +getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string & context_data_type_name) { using NearestResultType = NearestFieldType; const auto field_type = Field::TypeToEnum::value; diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index f726a8a2a465..55060b1592f2 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -34,7 +34,9 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying map_item(Decimal32), map_item(Decimal64), map_item(Decimal128), map_item(Decimal256), map_item(DateTime64), - map_item(UUID), map_item(String), map_item(Array) + map_item(UUID), map_item(String), map_item(Array), + + map_item(IPv4), map_item(IPv6) }; #undef map_item diff --git a/src/Disks/getDiskConfigurationFromAST.cpp b/src/Disks/getDiskConfigurationFromAST.cpp index 4b1323b4db82..76a257d3b52f 100644 --- a/src/Disks/getDiskConfigurationFromAST.cpp +++ b/src/Disks/getDiskConfigurationFromAST.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes message.empty() ? "" : ": " + message); } -Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::string & root_name, const ASTs & disk_args, ContextPtr context) +Poco::AutoPtr getDiskConfigurationFromASTImpl(const ASTs & disk_args, ContextPtr context) { if (disk_args.empty()) throwBadConfiguration("expected non-empty list of arguments"); @@ -39,8 +39,6 @@ Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::st Poco::AutoPtr xml_document(new Poco::XML::Document()); Poco::AutoPtr root(xml_document->createElement("disk")); xml_document->appendChild(root); - Poco::AutoPtr disk_configuration(xml_document->createElement(root_name)); - root->appendChild(disk_configuration); for (const auto & arg : disk_args) { @@ -62,7 +60,7 @@ Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::st const std::string & key = key_identifier->name(); Poco::AutoPtr key_element(xml_document->createElement(key)); - disk_configuration->appendChild(key_element); + root->appendChild(key_element); if (!function_args[1]->as() && !function_args[1]->as()) throwBadConfiguration("expected values to be literals or identifiers"); @@ -75,9 +73,9 @@ Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::st return xml_document; } -DiskConfigurationPtr getDiskConfigurationFromAST(const std::string & root_name, const ASTs & disk_args, ContextPtr context) +DiskConfigurationPtr getDiskConfigurationFromAST(const ASTs & disk_args, ContextPtr context) { - auto xml_document = getDiskConfigurationFromASTImpl(root_name, disk_args, context); + auto xml_document = getDiskConfigurationFromASTImpl(disk_args, context); Poco::AutoPtr conf(new Poco::Util::XMLConfiguration()); conf->load(xml_document); return conf; diff --git a/src/Disks/getDiskConfigurationFromAST.h b/src/Disks/getDiskConfigurationFromAST.h index 5697955e9146..f23fb37b9dc5 100644 --- a/src/Disks/getDiskConfigurationFromAST.h +++ b/src/Disks/getDiskConfigurationFromAST.h @@ -14,19 +14,19 @@ using DiskConfigurationPtr = Poco::AutoPtr; /** * Transform a list of pairs ( key1=value1, key2=value2, ... ), where keys and values are ASTLiteral or ASTIdentifier * into - * + * * value1 * value2 * ... - * + * * * Used in case disk configuration is passed via AST when creating * a disk object on-the-fly without any configuration file. */ -DiskConfigurationPtr getDiskConfigurationFromAST(const std::string & root_name, const ASTs & disk_args, ContextPtr context); +DiskConfigurationPtr getDiskConfigurationFromAST(const ASTs & disk_args, ContextPtr context); /// The same as above function, but return XML::Document for easier modification of result configuration. -[[ maybe_unused ]] Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::string & root_name, const ASTs & disk_args, ContextPtr context); +[[ maybe_unused ]] Poco::AutoPtr getDiskConfigurationFromASTImpl(const ASTs & disk_args, ContextPtr context); /* * A reverse function. diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index a9a0e972bd16..da318303f624 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -26,8 +26,16 @@ namespace { std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) { + const auto * function_args_expr = assert_cast(function.arguments.get()); + const auto & function_args = function_args_expr->children; + auto config = getDiskConfigurationFromAST(function_args, context); + std::string disk_name; - if (function.name == "disk") + if (config->has("name")) + { + disk_name = config->getString("name"); + } + else { /// We need a unique name for a created custom disk, but it needs to be the same /// after table is reattached or server is restarted, so take a hash of the disk @@ -36,21 +44,9 @@ namespace disk_name = DiskSelector::TMP_INTERNAL_DISK_PREFIX + toString(sipHash128(disk_setting_string.data(), disk_setting_string.size())); } - else - { - static constexpr std::string_view custom_disk_prefix = "disk_"; - - if (function.name.size() <= custom_disk_prefix.size() || !function.name.starts_with(custom_disk_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid disk name: {}", function.name); - - disk_name = function.name.substr(custom_disk_prefix.size()); - } auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { - const auto * function_args_expr = assert_cast(function.arguments.get()); - const auto & function_args = function_args_expr->children; - auto config = getDiskConfigurationFromAST(disk_name, function_args, context); - auto disk = DiskFactory::instance().create(disk_name, *config, disk_name, context, disks_map); + auto disk = DiskFactory::instance().create(disk_name, *config, "", context, disks_map); /// Mark that disk can be used without storage policy. disk->markDiskAsCustom(); return disk; diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 8100a3868e64..70d5b7914a7a 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -135,9 +135,19 @@ size_t NativeWriter::write(const Block & block) if (client_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) { auto info = column.type->getSerializationInfo(*column.column); - serialization = column.type->getSerialization(*info); + bool has_custom = false; + + if (client_revision >= DBMS_MIN_REVISION_WITH_SPARSE_SERIALIZATION) + { + serialization = column.type->getSerialization(*info); + has_custom = info->hasCustomSerialization(); + } + else + { + serialization = column.type->getDefaultSerialization(); + column.column = recursiveRemoveSparse(column.column); + } - bool has_custom = info->hasCustomSerialization(); writeBinary(static_cast(has_custom), ostr); if (has_custom) info->serialializeKindBinary(ostr); diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 06436488050b..6c255946f00f 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -105,6 +105,9 @@ list (APPEND PRIVATE_LIBS clickhouse_functions_jsonpath) add_subdirectory(keyvaluepair) list (APPEND OBJECT_LIBS $) +add_subdirectory(Kusto) +list (APPEND OBJECT_LIBS $) + # Signed integer overflow on user-provided data inside boost::geometry - ignore. set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-sanitize=signed-integer-overflow) diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index ff07309e248d..e693318c15a8 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -65,6 +65,7 @@ struct DivideIntegralImpl { using ResultType = typename NumberTraits::ResultOfIntegerDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -117,6 +118,7 @@ struct ModuloImpl using IntegerBType = typename NumberTraits::ToInteger::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index c699da4eaf60..27df4620339e 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -98,6 +98,9 @@ template constexpr bool IsIntegralOrExtendedOrDecimal = IsIntegralOrExtended || IsDataTypeDecimal; +template constexpr bool IsInterval = false; +template <> inline constexpr bool IsInterval = true; + template constexpr bool IsFloatingPoint = false; template <> inline constexpr bool IsFloatingPoint = true; template <> inline constexpr bool IsFloatingPoint = true; @@ -128,6 +131,7 @@ struct BinaryOperationTraits { using T0 = typename LeftDataType::FieldType; using T1 = typename RightDataType::FieldType; + private: /// it's not correct for Decimal using Op = Operation; @@ -209,8 +213,6 @@ template static void NO_INLINE process(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size, const NullMap * right_nullmap = nullptr) @@ -739,6 +741,8 @@ class FunctionBinaryArithmetic : public IFunction { static constexpr bool is_plus = IsOperation::plus; static constexpr bool is_minus = IsOperation::minus; + static constexpr bool is_modulo = IsOperation::modulo; + static constexpr bool is_modulo_or_zero = IsOperation::modulo_or_zero; static constexpr bool is_multiply = IsOperation::multiply; static constexpr bool is_division = IsOperation::division; static constexpr bool is_bit_hamming_distance = IsOperation::bit_hamming_distance; @@ -777,8 +781,31 @@ class FunctionBinaryArithmetic : public IFunction }); } + static ColumnsWithTypeAndName switchArgumentOrder(const ColumnsWithTypeAndName & arguments) + { + auto new_arguments = arguments; + + /// Interval argument must be second. + if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) + std::swap(new_arguments[0], new_arguments[1]); + + /// Change interval argument type to its representation + if (WhichDataType(new_arguments[1].type).isInterval()) + new_arguments[1].type = std::make_shared>(); + + return new_arguments; + } + + static FunctionOverloadResolverPtr getFunctionForDateTimeArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) + { + if (isDateTime64(type0) && isDateTime64(type1) && is_minus) + return FunctionFactory::instance().get("dateTime64Diff", context); + + return {}; + } + static FunctionOverloadResolverPtr - getFunctionForIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) + getFunctionForDateTimeIntervalArithmetic(const DataTypePtr & type0, const DataTypePtr & type1, ContextPtr context) { bool first_is_date_or_datetime = isDateOrDate32(type0) || isDateTime(type0) || isDateTime64(type0); bool second_is_date_or_datetime = isDateOrDate32(type1) || isDateTime(type1) || isDateTime64(type1); @@ -1089,18 +1116,37 @@ class FunctionBinaryArithmetic : public IFunction ColumnPtr executeDateTimeIntervalPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const { - ColumnsWithTypeAndName new_arguments = arguments; + const auto new_arguments = switchArgumentOrder(arguments); + auto function = function_builder->build(new_arguments); + return function->execute(new_arguments, result_type, input_rows_count); + } - /// Interval argument must be second. - if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) - std::swap(new_arguments[0], new_arguments[1]); + ColumnPtr + executeInterval(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const + { + const auto convert_argument = [this, &input_rows_count](const ColumnWithTypeAndName & argument) + { + if (const WhichDataType which_data_type(*argument.type); which_data_type.isInterval()) + { + const ColumnsWithTypeAndName conversion_args{ + argument, + createConstColumnWithTypeAndName( + DataTypeNumber().getName(), "target_type")}; - /// Change interval argument type to its representation - if (WhichDataType(new_arguments[1].type).isInterval()) - new_arguments[1].type = std::make_shared>(); + const auto converted = executeFunctionCall(context, "cast", conversion_args, input_rows_count); + return asArgument(converted, argument.name); + } - auto function = function_builder->build(new_arguments); - return function->execute(new_arguments, result_type, input_rows_count); + return argument; + }; + + const ColumnsWithTypeAndName adjusted_args{convert_argument(arguments.front()), convert_argument(arguments.back())}; + const auto intermediate = executeFunctionCall(context, name, adjusted_args, input_rows_count); + + const ColumnsWithTypeAndName conversion_args + = {asArgument(intermediate, "intermediate"), + createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "accurateCastOrNull", conversion_args, input_rows_count).first; } ColumnPtr executeDateTimeTupleOfIntervalsPlusMinus(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, @@ -1326,21 +1372,23 @@ class FunctionBinaryArithmetic : public IFunction return getReturnTypeImplStatic(new_arguments, context); } - /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. - if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1], context)) + if (auto function_builder = getFunctionForDateTimeArithmetic(arguments[0], arguments[1], context)) { ColumnsWithTypeAndName new_arguments(2); - for (size_t i = 0; i < 2; ++i) new_arguments[i].type = arguments[i]; - /// Interval argument must be second. - if (isDateOrDate32(new_arguments[1].type) || isDateTime(new_arguments[1].type) || isDateTime64(new_arguments[1].type)) - std::swap(new_arguments[0], new_arguments[1]); + return function_builder->build(new_arguments)->getResultType(); + } - /// Change interval argument to its representation - new_arguments[1].type = std::make_shared>(); + /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. + if (auto function_builder = getFunctionForDateTimeIntervalArithmetic(arguments[0], arguments[1], context)) + { + ColumnsWithTypeAndName new_arguments(2); + for (size_t i = 0; i < 2; ++i) + new_arguments[i].type = arguments[i]; + new_arguments = switchArgumentOrder(new_arguments); auto function = function_builder->build(new_arguments); return function->getResultType(); } @@ -1407,6 +1455,7 @@ class FunctionBinaryArithmetic : public IFunction { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; + using ConcreteOp = Op; if constexpr ((std::is_same_v || std::is_same_v) || (std::is_same_v || std::is_same_v)) @@ -1414,7 +1463,7 @@ class FunctionBinaryArithmetic : public IFunction if constexpr (std::is_same_v && std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!ConcreteOp::allow_fixed_string) return false; else { @@ -1443,21 +1492,6 @@ class FunctionBinaryArithmetic : public IFunction type_res = std::make_shared(); return true; } - else if constexpr (std::is_same_v || std::is_same_v) - { - if constexpr (std::is_same_v && - std::is_same_v) - { - if constexpr (is_plus || is_minus) - { - if (left.getKind() == right.getKind()) - { - type_res = std::make_shared(left.getKind()); - return true; - } - } - } - } else { using ResultDataType = typename BinaryOperationTraits::ResultDataType; @@ -1504,6 +1538,42 @@ class FunctionBinaryArithmetic : public IFunction tz = &left; type_res = std::make_shared(*tz); } + else if constexpr (IsInterval || IsInterval) + { + if constexpr (!ConcreteOp::allow_interval) + return false; + + const auto nested_type = std::invoke( + [&]() -> std::shared_ptr + { + static constexpr auto is_left_interval = IsInterval; + static constexpr auto is_right_interval = IsInterval; + if constexpr ( + is_left_interval && !is_right_interval + && (is_division || is_modulo || is_modulo_or_zero || is_multiply)) + return std::make_shared(left.getKind()); + else if constexpr (!is_left_interval && is_right_interval && is_multiply) + return std::make_shared(right.getKind()); + else if constexpr ( + is_left_interval && is_right_interval + && (is_division || is_minus || is_modulo || is_modulo_or_zero || is_plus)) + { + if (left.getKind() != right.getKind()) + return {}; + else if constexpr (!is_division) + return std::make_shared(left.getKind()); + else + return std::make_shared(); + } + else + return {}; + }); + + if (nested_type) + type_res = makeNullable(nested_type); + + return static_cast(nested_type); + } else type_res = std::make_shared(); return true; @@ -1919,27 +1989,11 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A return executeAggregateAddition(arguments, result_type, input_rows_count); } - /// Special case - one or both arguments are IPv4 - if (isIPv4(arguments[0].type) || isIPv4(arguments[1].type)) - { - ColumnsWithTypeAndName new_arguments { - { - isIPv4(arguments[0].type) ? castColumn(arguments[0], std::make_shared()) : arguments[0].column, - isIPv4(arguments[0].type) ? std::make_shared() : arguments[0].type, - arguments[0].name, - }, - { - isIPv4(arguments[1].type) ? castColumn(arguments[1], std::make_shared()) : arguments[1].column, - isIPv4(arguments[1].type) ? std::make_shared() : arguments[1].type, - arguments[1].name - } - }; - - return executeImpl(new_arguments, result_type, input_rows_count); - } + if (auto function_builder = getFunctionForDateTimeArithmetic(arguments[0].type, arguments[1].type, context)) + return function_builder->build(arguments)->execute(arguments, result_type, input_rows_count); /// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval. - if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0].type, arguments[1].type, context)) + if (auto function_builder = getFunctionForDateTimeIntervalArithmetic(arguments[0].type, arguments[1].type, context)) { return executeDateTimeIntervalPlusMinus(arguments, result_type, input_rows_count, function_builder); } @@ -1991,6 +2045,25 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A return wrapInNullable(res, arguments, result_type, input_rows_count); } + /// Special case - one or both arguments are IPv4 + if (isIPv4(arguments[0].type) || isIPv4(arguments[1].type)) + { + ColumnsWithTypeAndName new_arguments { + { + isIPv4(arguments[0].type) ? castColumn(arguments[0], std::make_shared()) : arguments[0].column, + isIPv4(arguments[0].type) ? std::make_shared() : arguments[0].type, + arguments[0].name, + }, + { + isIPv4(arguments[1].type) ? castColumn(arguments[1], std::make_shared()) : arguments[1].column, + isIPv4(arguments[1].type) ? std::make_shared() : arguments[1].type, + arguments[1].name + } + }; + + return executeImpl2(new_arguments, result_type, input_rows_count, right_nullmap); + } + const auto * const left_generic = left_argument.type.get(); const auto * const right_generic = right_argument.type.get(); ColumnPtr res; @@ -1999,6 +2072,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A { using LeftDataType = std::decay_t; using RightDataType = std::decay_t; + using ConcreteOp = Op; if constexpr ((std::is_same_v || std::is_same_v) || (std::is_same_v || std::is_same_v)) @@ -2006,7 +2080,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A if constexpr (std::is_same_v && std::is_same_v) { - if constexpr (!Op::allow_fixed_string) + if constexpr (!ConcreteOp::allow_fixed_string) return false; else return (res = executeFixedString(arguments)) != nullptr; @@ -2027,6 +2101,13 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A else if constexpr (std::is_same_v) return (res = executeStringInteger(arguments, left, right)) != nullptr; } + else if constexpr (IsInterval || IsInterval) + { + if constexpr (!ConcreteOp::allow_interval) + return false; + + return (res = executeInterval(arguments, result_type, input_rows_count)) != nullptr; + } else return (res = executeNumeric(arguments, left, right, right_nullmap)) != nullptr; }); diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp index ff09274d9076..d8a2fd4197a2 100644 --- a/src/Functions/FunctionHelpers.cpp +++ b/src/Functions/FunctionHelpers.cpp @@ -1,3 +1,5 @@ +#include "FunctionFactory.h" + #include #include #include @@ -312,4 +314,16 @@ bool isDecimalOrNullableDecimal(const DataTypePtr & type) return isDecimal(assert_cast(type.get())->getNestedType()); } +std::pair executeFunctionCall( + const ContextPtr & context, const std::string & name, const ColumnsWithTypeAndName & arguments, const size_t input_rows_count) +{ + const auto function = FunctionFactory::instance().get(name, context)->build(arguments); + const auto & result_data_type = function->getResultType(); + return {function->execute(arguments, result_data_type, input_rows_count), result_data_type}; +} + +ColumnWithTypeAndName asArgument(const std::pair & column_with_type, const std::string_view name) +{ + return {column_with_type.first, column_with_type.second, std::string(name)}; +} } diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 77affe8488d0..dca6f3a35562 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -174,4 +175,15 @@ struct NullPresence NullPresence getNullPresense(const ColumnsWithTypeAndName & args); bool isDecimalOrNullableDecimal(const DataTypePtr & type); + +template +ColumnWithTypeAndName createConstColumnWithTypeAndName(const typename T::FieldType & value, const std::string & name, Args&&... args) +{ + return {T().createColumnConst(1, toField(value)), std::make_shared(std::forward(args)...), name}; +} + +std::pair executeFunctionCall( + const ContextPtr & context, const std::string & name, const ColumnsWithTypeAndName & arguments, size_t input_rows_count); + +ColumnWithTypeAndName asArgument(const std::pair & column_with_type, std::string_view name); } diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h index 259dc1c42ba5..61b11d1c511b 100644 --- a/src/Functions/FunctionUnaryArithmetic.h +++ b/src/Functions/FunctionUnaryArithmetic.h @@ -307,11 +307,12 @@ class FunctionUnaryArithmetic : public IFunction result = std::make_shared(); } } - else if constexpr (std::is_same_v) + else if constexpr (std::is_same_v && !is_sign_function) { - if constexpr (!IsUnaryOperation::negate) + if constexpr (!Op::allow_interval) return false; - result = std::make_shared(type.getKind()); + + result = std::make_shared(type.getKind()); } else { diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 01e057e19a1e..5ed7728cdab4 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -117,17 +117,17 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction(); factory.registerFunction(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index b272e88d17d0..e2477c177006 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2552,6 +2552,17 @@ using FunctionToDate32 = FunctionConvert; using FunctionToDateTime32 = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; +using FunctionToIntervalNanosecond = FunctionConvert; +using FunctionToIntervalMicrosecond = FunctionConvert; +using FunctionToIntervalMillisecond = FunctionConvert; +using FunctionToIntervalSecond = FunctionConvert; +using FunctionToIntervalMinute = FunctionConvert; +using FunctionToIntervalHour = FunctionConvert; +using FunctionToIntervalDay = FunctionConvert; +using FunctionToIntervalWeek = FunctionConvert; +using FunctionToIntervalMonth = FunctionConvert; +using FunctionToIntervalQuarter = FunctionConvert; +using FunctionToIntervalYear = FunctionConvert; using FunctionToUUID = FunctionConvert>; using FunctionToIPv4 = FunctionConvert>; using FunctionToIPv6 = FunctionConvert>; @@ -2914,8 +2925,8 @@ class FunctionCast final : public FunctionCastBase { TypeIndex from_type_index = from_type->getTypeId(); WhichDataType which(from_type_index); - bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) - && (which.isInt() || which.isUInt() || which.isFloat()); + const bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) + && (which.isInt() || which.isUInt() || which.isFloat() || which.isInterval()); if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) { @@ -2927,8 +2938,30 @@ class FunctionCast final : public FunctionCastBase } else if (!can_apply_accurate_cast) { - FunctionPtr function = FunctionTo::Type::create(context); - return createFunctionAdaptor(function, from_type); + if constexpr (std::is_same_v) + { + const auto to_interval_function = std::invoke( + [interval_kind = to_type->getKind()] + { + switch (interval_kind) + { +#define DECLARE_CASE(NAME) \ + case IntervalKind::NAME: \ + return FunctionToInterval##NAME::create(); + FOR_EACH_INTERVAL_KIND(DECLARE_CASE) +#undef DECLARE_CASE + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected interval kind"); + }); + + return createFunctionAdaptor(to_interval_function, from_type); + } + else + { + FunctionPtr function = FunctionTo::Type::create(context); + return createFunctionAdaptor(function, from_type); + } } auto wrapper_cast_type = cast_type; @@ -2942,9 +2975,9 @@ class FunctionCast final : public FunctionCastBase using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; - if constexpr (IsDataTypeNumber) + if constexpr (IsDataTypeNumber || IsDataTypeInterval) { - if constexpr (IsDataTypeNumber) + if constexpr (IsDataTypeNumber || IsDataTypeInterval) { if (wrapper_cast_type == CastType::accurate) { @@ -3984,6 +4017,7 @@ class FunctionCast final : public FunctionCastBase std::is_same_v || std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v) diff --git a/src/Functions/FunctionsStringSearchToString.h b/src/Functions/FunctionsStringSearchToString.h index 978a84de472d..6f0e9b2a6449 100644 --- a/src/Functions/FunctionsStringSearchToString.h +++ b/src/Functions/FunctionsStringSearchToString.h @@ -85,4 +85,63 @@ class FunctionsStringSearchToString : public IFunction } }; + +template +class KqlStringSearchToString : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); + + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName()); + + if (!isUnsignedInteger(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[2]->getName(), getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr column = arguments[0].column; + const ColumnPtr column_needle = arguments[1].column; + const auto capture = arguments[2].column->getUInt(0); + + const ColumnConst * col_needle = typeid_cast(&*column_needle); + if (!col_needle) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function {} must be constant string", getName()); + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + Impl::vector(col->getChars(), col->getOffsets(), col_needle->getValue(), static_cast(capture), vec_res, offsets_res); + + return col_res; + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); + } +}; } diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h index df531363c31f..567853bd62c0 100644 --- a/src/Functions/GCDLCMImpl.h +++ b/src/Functions/GCDLCMImpl.h @@ -23,6 +23,7 @@ struct GCDLCMImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index ab6b6399486b..661b8ae97536 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -39,9 +39,6 @@ struct HasTokenImpl if (start_pos != nullptr) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name); - if (pattern.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token"); - if (haystack_offsets.empty()) return; @@ -49,7 +46,7 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - if (!std::none_of(pattern.begin(), pattern.end(), isTokenSeparator)) + if (const auto has_separator = std::any_of(pattern.cbegin(), pattern.cend(), isTokenSeparator); has_separator || pattern.empty()) { if (res_null) { @@ -57,8 +54,12 @@ struct HasTokenImpl std::ranges::fill(res_null->getData(), true); return; } - else + else if (has_separator) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); + else if (pattern.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token"); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected internal state"); } size_t pattern_size = pattern.size(); diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 650b54d9a377..d119b15733ba 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -361,7 +361,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, /// If default of sparse column is changed after execution of function, convert to full column. /// If there are any default in non-zero position after execution of function, convert to full column. /// Currently there is no easy way to rebuild sparse column with new offsets. - if (!result_type->supportsSparseSerialization() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1) + if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1) { const auto & offsets_data = assert_cast &>(*sparse_offsets).getData(); return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1); diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index 0c54901579e5..0ccc66bb1b9e 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -17,6 +17,7 @@ template struct DivideIntegralOrZeroImpl; template struct LeastBaseImpl; template struct GreatestBaseImpl; template struct ModuloImpl; +template struct ModuloOrZeroImpl; template struct PositiveModuloImpl; template struct EqualsOp; template struct NotEqualsOp; @@ -54,6 +55,7 @@ struct IsOperation static constexpr bool div_int = IsSameOperation::value; static constexpr bool div_int_or_zero = IsSameOperation::value; static constexpr bool modulo = IsSameOperation::value; + static constexpr bool modulo_or_zero = IsSameOperation::value; static constexpr bool positive_modulo = IsSameOperation::value; static constexpr bool least = IsSameOperation::value; static constexpr bool greatest = IsSameOperation::value; diff --git a/src/Functions/Kusto/CMakeLists.txt b/src/Functions/Kusto/CMakeLists.txt new file mode 100644 index 000000000000..3c534905d227 --- /dev/null +++ b/src/Functions/Kusto/CMakeLists.txt @@ -0,0 +1,8 @@ +include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") +add_headers_and_sources(clickhouse_functions_kusto .) +add_library(clickhouse_functions_kusto OBJECT ${clickhouse_functions_kusto_sources} ${clickhouse_functions_kusto_headers}) +target_link_libraries(clickhouse_functions_kusto PRIVATE dbms clickhouse_functions_gatherutils) + +if (OMIT_HEAVY_DEBUG_SYMBOLS) + target_compile_options(clickhouse_functions_kusto PRIVATE "-g0") +endif() diff --git a/src/Functions/Kusto/KqlArrayIif.cpp b/src/Functions/Kusto/KqlArrayIif.cpp new file mode 100644 index 000000000000..bbee6b64bdc9 --- /dev/null +++ b/src/Functions/Kusto/KqlArrayIif.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlArrayIif : public KqlFunctionBase +{ +public: + static constexpr auto name = "kql_ArrayIif"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + explicit FunctionKqlArrayIif(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlArrayIif() override = default; + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 3; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + + static bool isDataTypeBoolORBoolConvertible(std::string_view datatype_name); + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + +private: + ContextPtr context; +}; + +bool FunctionKqlArrayIif::isDataTypeBoolORBoolConvertible(std::string_view datatype_name) +{ + if (datatype_name.find("Int") != datatype_name.npos || + datatype_name.find("Float") != datatype_name.npos || + datatype_name.find("Decimal") != datatype_name.npos || + datatype_name.find("Bool") != datatype_name.npos) + return true; + return false; +} + +DataTypePtr FunctionKqlArrayIif::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto * array_type0 = typeid_cast(arguments[0].get()); + if (!array_type0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array but it has type {}", getName(), + arguments[0]->getName()); + + DataTypePtr nested_type1, nested_type2; + + const auto * array_type1 = typeid_cast(arguments[1].get()); + if (!array_type1) + nested_type1 = makeNullable(arguments[1]->getPtr()); + else + nested_type1 = makeNullable(array_type1->getNestedType()); + + const auto * array_type2 = typeid_cast(arguments[2].get()); + if (!array_type2) + nested_type2 = makeNullable(arguments[2]->getPtr()); + else + nested_type2 = makeNullable(array_type2->getNestedType()); + + if (nested_type1->getName() != nested_type2->getName()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last two arguments for function {} must have same nested data type", getName()); + + DataTypes types = {nested_type1, nested_type2}; + + return std::make_shared(getLeastSupertype(types)); +} + +ColumnPtr FunctionKqlArrayIif::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const DataTypePtr & elem_type = static_cast(*result_type).getNestedType(); + auto out = ColumnArray::create(elem_type->createColumn()); + + if (input_rows_count == 0) + return out; + + IColumn & out_data = out->getData(); + IColumn::Offsets & out_offsets = out->getOffsets(); + size_t total_length = 0; + for (size_t i = 0; i < input_rows_count; i++) + { + Field array0; + arguments[0].column->get(i, array0); + total_length += array0.get().size(); + } + + out_data.reserve(total_length); + out_offsets.resize(input_rows_count); + IColumn::Offset current_offset = 0; + + for (size_t i = 0; i < input_rows_count; i++) + { + Field array0; + arguments[0].column->get(i, array0); + size_t len0 = array0.get().size(); + for (size_t k = 0; k < len0; k++) + { + if (!isDataTypeBoolORBoolConvertible(array0.get().at(k).getTypeName())) + out_data.insert(Field()); + else + { + Field temp; + std::string dump = array0.get().at(k).dump(); + dump = dump.substr(dump.find('_') + 1); + if (dump == "0" || dump == "-0") + arguments[2].column->get(i, temp); + else + arguments[1].column->get(i, temp); + if (temp.getTypeName() == "Array") + { + if (k < temp.get().size()) + out_data.insert(temp.get().at(k)); + else + out_data.insert(Field()); + } + else + out_data.insert(temp); + } + } + current_offset += len0; + out_offsets[i] = current_offset; + } + return out; +} + +REGISTER_FUNCTION(KqlArrayIif) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlArraySort.cpp b/src/Functions/Kusto/KqlArraySort.cpp new file mode 100644 index 000000000000..5be36328cc37 --- /dev/null +++ b/src/Functions/Kusto/KqlArraySort.cpp @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +template +class FunctionKqlArraySort : public KqlFunctionBase +{ +public: + static constexpr auto name = Name::name; + explicit FunctionKqlArraySort(ContextPtr context_) : context(context_) { } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.empty()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument; passed {}.", + getName(), + arguments.size()); + + auto array_count = arguments.size(); + + if (!isArray(arguments.at(array_count - 1).type)) + --array_count; + + DataTypes nested_types; + for (size_t index = 0; index < array_count; ++index) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); + if (!array_type) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} of function {} must be array. Found {} instead.", + index + 1, + getName(), + arguments[0].type->getName()); + + nested_types.emplace_back(array_type->getNestedType()); + } + + DataTypes data_types(array_count); + + for (size_t i = 0; i < array_count; ++i) + data_types[i] = std::make_shared(makeNullable(nested_types[i])); + + return std::make_shared(data_types); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + size_t array_count = arguments.size(); + const auto & last_arg = arguments[array_count - 1]; + + size_t input_rows_count_local = input_rows_count; + + bool null_last = true; + if (!isArray(last_arg.type)) + { + --array_count; + null_last = check_condition(last_arg, context, input_rows_count_local); + } + + ColumnsWithTypeAndName new_args; + ColumnPtr first_array_column; + std::unordered_set null_indices; + DataTypes nested_types; + + String sort_function = is_desc ? "arrayReverseSort" : "arraySort"; + + for (size_t i = 0; i < array_count; ++i) + { + ColumnPtr holder = arguments[i].column->convertToFullColumnIfConst(); + + const ColumnArray * column_array = checkAndGetColumn(holder.get()); + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].type.get()); + + if (!column_array) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Argument {} of function {} must be array. Found column {} instead.", + i + 1, + getName(), + holder->getName()); + + nested_types.emplace_back(makeNullable(array_type->getNestedType())); + if (i == 0) + { + first_array_column = holder; + new_args.push_back(arguments[i]); + } + else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) + { + null_indices.insert(i); + } + else + new_args.push_back(arguments[i]); + } + + auto zipped + = FunctionFactory::instance().get("arrayZip", context)->build(new_args)->execute(new_args, result_type, input_rows_count_local); + + ColumnsWithTypeAndName sort_arg({{zipped, std::make_shared(result_type), "zipped"}}); + auto sorted_tuple + = FunctionFactory::instance().get(sort_function, context)->build(sort_arg)->execute(sort_arg, result_type, input_rows_count_local); + + auto null_type = std::make_shared(std::make_shared()); + + Columns tuple_columns(array_count); + size_t sorted_index = 0; + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + auto fun_array = FunctionFactory::instance().get("array", context); + + DataTypePtr arg_type + = std::make_shared(makeNullable(nested_types[i])); + + ColumnsWithTypeAndName null_array_arg({ + {null_type->createColumnConstWithDefaultValue(input_rows_count_local), null_type, "NULL"}, + }); + + tuple_columns[i] = fun_array->build(null_array_arg)->execute(null_array_arg, arg_type, input_rows_count_local); + tuple_columns[i] = tuple_columns[i]->convertToFullColumnIfConst(); + } + else + { + ColumnsWithTypeAndName untuple_args( + {{ColumnWithTypeAndName(sorted_tuple, std::make_shared(result_type), "sorted")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(sorted_index + 1))), std::make_shared(), ""}}); + auto tuple_coulmn = FunctionFactory::instance() + .get("tupleElement", context) + ->build(untuple_args) + ->execute(untuple_args, result_type, input_rows_count_local); + + auto out_tmp = ColumnArray::create(nested_types[i]->createColumn()); + + size_t array_size = tuple_coulmn->size(); + const auto * arr = checkAndGetColumn(tuple_coulmn.get()); + + for (size_t j = 0; j < array_size; ++j) + { + Field arr_field; + arr->get(j, arr_field); + out_tmp->insert(arr_field); + } + + tuple_columns[i] = std::move(out_tmp); + + ++sorted_index; + } + } + + if (!null_last) + { + Columns adjusted_columns(array_count); + + ColumnWithTypeAndName arg_of_index{nullptr, std::make_shared(nested_types[0]), "array"}; + arg_of_index.column = tuple_columns[0]; + + auto inside_null_type = nested_types[0]; + ColumnsWithTypeAndName indexof_args({ + arg_of_index, + {inside_null_type->createColumnConstWithDefaultValue(input_rows_count_local), inside_null_type, "NULL"}, + }); + + auto null_index_datetype = std::make_shared(); + + ColumnWithTypeAndName slice_index{nullptr, null_index_datetype, ""}; + slice_index.column = FunctionFactory::instance() + .get("indexOf", context) + ->build(indexof_args) + ->execute(indexof_args, result_type, input_rows_count_local); + + auto null_index_in_array = slice_index.column->get64(0); + if (null_index_in_array > 0) + { + ColumnWithTypeAndName slice_index_len{nullptr, null_index_datetype, ""}; + slice_index_len.column = DataTypeUInt64().createColumnConst(1, toField(UInt64(null_index_in_array - 1))); + + auto fun_slice = FunctionFactory::instance().get("arraySlice", context); + + for (size_t i = 0; i < array_count; ++i) + { + if (null_indices.contains(i)) + { + adjusted_columns[i] = std::move(tuple_columns[i]); + } + else + { + DataTypePtr arg_type = std::make_shared(nested_types[i]); + + ColumnsWithTypeAndName slice_args_left( + {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + slice_index_len}); + + ColumnsWithTypeAndName slice_args_right( + {{ColumnWithTypeAndName(tuple_columns[i], arg_type, "array")}, slice_index}); + ColumnWithTypeAndName arr_left{ + fun_slice->build(slice_args_left)->execute(slice_args_left, arg_type, input_rows_count_local), arg_type, ""}; + ColumnWithTypeAndName arr_right{ + fun_slice->build(slice_args_right)->execute(slice_args_right, arg_type, input_rows_count_local), arg_type, ""}; + + ColumnsWithTypeAndName arr_cancat({arr_right, arr_left}); + auto out_tmp = FunctionFactory::instance() + .get("arrayConcat", context) + ->build(arr_cancat) + ->execute(arr_cancat, arg_type, input_rows_count_local); + adjusted_columns[i] = std::move(out_tmp); + } + } + return ColumnTuple::create(adjusted_columns); + } + } + return ColumnTuple::create(tuple_columns); + } + +private: + ContextPtr context; +}; + +struct NameKqlArraySortAsc +{ + static constexpr auto name = "kql_array_sort_asc"; +}; + +struct NameKqlArraySortDesc +{ + static constexpr auto name = "kql_array_sort_desc"; +}; + +using FunctionKqlArraySortAsc = FunctionKqlArraySort; +using FunctionKqlArraySortDesc = FunctionKqlArraySort; + +REGISTER_FUNCTION(KqlArraySort) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlFunctionBase.h b/src/Functions/Kusto/KqlFunctionBase.h new file mode 100644 index 000000000000..efdf8982f4b1 --- /dev/null +++ b/src/Functions/Kusto/KqlFunctionBase.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Functions/array/FunctionArrayMapped.h" + +namespace DB +{ + +class KqlFunctionBase : public IFunction +{ +public: + static bool check_condition (const ColumnWithTypeAndName & condition, ContextPtr context, size_t input_rows_count) + { + ColumnsWithTypeAndName if_columns( + { + condition, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(1))), std::make_shared(), ""}, + {DataTypeUInt8().createColumnConst(1, toField(UInt8(2))), std::make_shared(), ""} + }); + auto if_res = FunctionFactory::instance().get("if", context)->build(if_columns)->execute(if_columns, std::make_shared(), input_rows_count); + auto result = if_res->getUInt(0); + return (result == 1); + } +}; + +} diff --git a/src/Functions/Kusto/KqlGetType.cpp b/src/Functions/Kusto/KqlGetType.cpp new file mode 100644 index 000000000000..670edc912526 --- /dev/null +++ b/src/Functions/Kusto/KqlGetType.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +class FunctionKqlGetType : public IFunction +{ +public: + static constexpr auto name = "kql_gettype"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlGetType(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlGetType() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlGetType::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + return DataTypeString().createColumnConst(input_rows_count, toString(toKQLDataType(argument.type->getTypeId(), KQLScope::Row))); +} + + +REGISTER_FUNCTION(KqlGetType) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/KqlIndexOf.cpp b/src/Functions/Kusto/KqlIndexOf.cpp new file mode 100644 index 000000000000..57c97100aab8 --- /dev/null +++ b/src/Functions/Kusto/KqlIndexOf.cpp @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlIndexOf : public KqlFunctionBase +{ +public: + static constexpr auto name = "kql_indexof"; + explicit FunctionKqlIndexOf(ContextPtr context_) : context(context_) { } + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() < 2 || 5 < arguments.size()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 to 5.", + getName(), + arguments.size()); + + if (arguments.size() >= 3) + { + for (size_t i = 3; i < arguments.size(); ++i) + if (!isInteger(arguments.at(i).type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + + return std::make_shared(std::make_shared()); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + int64_t occurrence = 1; + + auto null_type = std::make_shared(std::make_shared()); + auto null_column = null_type->createColumnConstWithDefaultValue(1); + auto not_found_column = DataTypeUInt64().createColumnConst(1, toField(UInt64(0))); + + ColumnPtr column_source = arguments[0].column; + ColumnPtr column_lookup = arguments[1].column; + ColumnPtr column_start_pos = DataTypeUInt64().createColumnConst(input_rows_count, toField(UInt64(1))); + ColumnPtr column_length = DataTypeInt64().createColumnConst(input_rows_count, toField(Int64(-1))); + + if (!isString(arguments[0].type)) + column_source = FunctionFactory::instance() + .get("toString", context) + ->build({arguments[0]}) + ->execute({arguments[0]}, std::make_shared(), input_rows_count); + + if (!isString(arguments[1].type)) + column_lookup = FunctionFactory::instance() + .get("toString", context) + ->build({arguments[1]}) + ->execute({arguments[1]}, std::make_shared(), input_rows_count); + + if (arguments.size() >= 3) + { + auto input_start_column = ColumnUInt64::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + StringRef source = column_source->getDataAt(j); + auto start_pos = arguments[2].column->getInt(j); + if (start_pos < 0) + { + start_pos = source.size + start_pos; + if (start_pos < 0) + start_pos = 0; + } + ++start_pos; + input_start_column->insertValue(start_pos); + } + column_start_pos = std::move(input_start_column); + } + + if (arguments.size() >= 4) + column_length = arguments[3].column; + + if (arguments.size() == 5) + occurrence = arguments[4].column->getInt(0); //must be a constant + + if (occurrence < 0) + return null_column; + + ColumnPtr last_pos = not_found_column; + for (auto i = 0; i < occurrence; ++i) + { + ColumnsWithTypeAndName position_args( + {{ColumnWithTypeAndName(column_source, std::make_shared(), "source")}, + {ColumnWithTypeAndName(column_lookup, std::make_shared(), "lookup")}, + {ColumnWithTypeAndName(column_start_pos, std::make_shared(), "start_pos")}}); + auto pos = FunctionFactory::instance() + .get("position", context) + ->build(position_args) + ->execute(position_args, result_type, input_rows_count); + last_pos = pos; + + auto new_pos_column = ColumnUInt64::create(); + for (size_t j = 0; j < input_rows_count; ++j) + { + new_pos_column->insertValue(pos->getInt(j) + 1); + } + column_start_pos = std::move(new_pos_column); + } + + auto null_map = ColumnUInt8::create(input_rows_count); + auto result_column = ColumnInt64::create(); + for (size_t i = 0; i < input_rows_count; ++i) + { + auto length = column_length->getInt(i); + null_map->getData()[i] = length < -1; + + auto pos_val = last_pos->get64(i); + if (length > -1 && last_pos->get64(i) > UInt64(length) + 1) + pos_val = 0; + result_column->insertValue(Int64(pos_val) -1); // used for kql, so returned index is 0 based + } + return ColumnNullable::create(std::move(result_column), std::move(null_map)); + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(KqlIndexOf) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/KqlRange.cpp b/src/Functions/Kusto/KqlRange.cpp new file mode 100644 index 000000000000..25cffd9c2258 --- /dev/null +++ b/src/Functions/Kusto/KqlRange.cpp @@ -0,0 +1,865 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static constexpr size_t max_array_size_as_field = 1000000; // the value from ColumnArray.cpp + +class FunctionKqlRange : public IFunction +{ +public: + static constexpr auto name = "kql_range"; + + const size_t max_elements; + static FunctionPtr create(ContextPtr context_) { return std::make_shared(std::move(context_)); } + explicit FunctionKqlRange(ContextPtr context) : max_elements(context->getSettingsRef().function_range_max_elements_in_block) { } + +private: + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2 || 3 < arguments.size()) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3.", + getName(), + arguments.size()); + + const auto & start = arguments[0]; + const auto & end = arguments[1]; + + WhichDataType start_type(*start); + WhichDataType end_type(*end); + + auto return_type = start; + if (start_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(start.get()); + if (nullable_type) + { + return_type = nullable_type->getNestedType(); + start_type = WhichDataType(nullable_type->getNestedType()); + } + } + if (end_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(end.get()); + if (nullable_type) + { + end_type = WhichDataType(nullable_type->getNestedType()); + } + } + if ((!start_type.isDateTime64()) && !start_type.isInterval() && !isNumber(start_type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of first argument of function {}, expected DateTime64, Interval or Number", + getName()); + + if ((start_type.isDateTime64() && !end_type.isDateTime64()) || (!start_type.isDateTime64() && end_type.isDateTime64())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first and second argument of function {}", getName()); + + if ((start_type.isInterval() && !end_type.isInterval()) || (!start_type.isInterval() && end_type.isInterval())) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first and second argument of function {}", getName()); + + if (arguments.size() == 3) + { + const auto & step = arguments[2]; + const WhichDataType step_type(*step); + if (!isNumber(step_type) && !step_type.isInterval()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}, expected Interval or Number", + getName()); + + if ((start_type.isInterval() || start_type.isDateTime64()) && !step_type.isInterval()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Type not match of first argument and step of function {}", getName()); + } + + if (start_type.isDateTime64() || start_type.isInterval()) + return std::make_shared(return_type); + DataTypePtr common_type = getLeastSupertype(arguments); + return std::make_shared(common_type); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + WhichDataType which(elem_type); + ColumnPtr res; + const auto & start = arguments[0]; + WhichDataType start_type(*start.type); + + ColumnsWithTypeAndName new_args; + + for (size_t i = 0; i < arguments.size(); ++i) + { + const auto & arg_col = arguments[i]; + WhichDataType arg_type(*arg_col.type); + + if (arg_type.isNullable()) + { + const auto * nullable_type = checkAndGetDataType(arg_col.type.get()); + const auto & nested_type = nullable_type->getNestedType(); + const auto * nullable_column = checkAndGetColumn(*arguments[i].column); + ColumnPtr nested_column = nullable_column->getNestedColumnPtr(); + ColumnWithTypeAndName new_arg{nullptr, nested_type, "new_arg"}; + new_arg.column = nested_column; + new_args.push_back(new_arg); + + if (i == 0) + start_type = nested_type; + } + else + new_args.push_back(arguments[i]); + } + + if (start_type.isDateTime64()) + { + return executeDateTime64(new_args, result_type, input_rows_count); + } + + if (start_type.isInterval()) + { + return executeInterval(new_args, result_type, input_rows_count); + } + + Columns columns_holder(3); + ColumnRawPtrs column_ptrs(3); + + for (size_t i = 0; i < new_args.size(); ++i) + { + if (i <= 1) + columns_holder[i] = castColumn(new_args[i], elem_type)->convertToFullColumnIfConst(); + else + columns_holder[i] = castColumn(new_args[i], elem_type); + + column_ptrs[i] = columns_holder[i].get(); + } + + /// Step is one by default. + if (new_args.size() == 2) + { + /// Convert a column with constant 1 to the result type. + if (start_type.isFloat32()) + columns_holder[2] = castColumn( + {DataTypeFloat32().createColumnConst(input_rows_count, 1.0), std::make_shared(), {}}, elem_type); + else if (start_type.isFloat64()) + columns_holder[2] = castColumn( + {DataTypeFloat64().createColumnConst(input_rows_count, 1.0), std::make_shared(), {}}, elem_type); + else if (start_type.isUInt8() || start_type.isUInt16() || start_type.isUInt32() || start_type.isUInt64()) + columns_holder[2] = castColumn( + {DataTypeUInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); + else + columns_holder[2] + = castColumn({DataTypeInt8().createColumnConst(input_rows_count, 1), std::make_shared(), {}}, elem_type); + + column_ptrs[2] = columns_holder[2].get(); + } + + bool is_start_const = isColumnConst(*column_ptrs[0]); + bool is_step_const = isColumnConst(*column_ptrs[2]); + + if (is_start_const && is_step_const) + { + UInt64 start_uint = assert_cast(*column_ptrs[0]).getUInt(0); + UInt64 step_uint = assert_cast(*column_ptrs[2]).getUInt(0); + Int64 start_int = assert_cast(*column_ptrs[0]).getInt(0); + Int64 step_int = assert_cast(*column_ptrs[2]).getInt(0); + Float32 start_float32 = assert_cast(*column_ptrs[0]).getFloat32(0); + Float32 step_float32 = assert_cast(*column_ptrs[2]).getFloat32(0); + Float64 start_float64 = assert_cast(*column_ptrs[0]).getFloat64(0); + Float64 step_float64 = assert_cast(*column_ptrs[2]).getFloat64(0); + + if ((res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start_uint), static_cast(step_uint), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_uint, step_uint, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep( + column_ptrs[1], static_cast(start_uint), static_cast(step_uint), input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_int, step_int, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_float32, step_float32, input_rows_count)) + || (res = executeConstStartStep(column_ptrs[1], start_float64, step_float64, input_rows_count))) + { + } + } + else if (is_start_const && !is_step_const) + { + UInt64 start_uint = assert_cast(*column_ptrs[0]).getUInt(0); + Int64 start_int = assert_cast(*column_ptrs[0]).getInt(0); + Float32 start_float32 = assert_cast(*column_ptrs[0]).getFloat32(0); + Float64 start_float64 = assert_cast(*column_ptrs[0]).getFloat64(0); + + if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start_uint), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_uint, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start_uint), input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_int, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_float32, input_rows_count)) + || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start_float64, input_rows_count))) + { + } + } + else if (!is_start_const && is_step_const) + { + UInt64 step_uint = assert_cast(*column_ptrs[2]).getUInt(0); + Int64 step_int = assert_cast(*column_ptrs[2]).getInt(0); + Float32 step_float32 = assert_cast(*column_ptrs[2]).getFloat32(0); + Float64 step_float64 = assert_cast(*column_ptrs[2]).getFloat64(0); + + if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_uint, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step_uint), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step_uint), input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_int, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_float32, input_rows_count)) + || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step_float64, input_rows_count))) + { + } + } + else + { + if ((res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count)) + || (res = executeGeneric(column_ptrs[0], column_ptrs[1], column_ptrs[2], input_rows_count))) + { + } + } + + if (!res) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {} of argument of function {}", column_ptrs[0]->getName(), getName()); + + return res; + } + + template + ColumnPtr executeConstStartStep(const IColumn * end_arg, const T start, const T step, const size_t input_rows_count) const + { + const double epsilon = 0.00000001; + auto end_column = checkAndGetColumn>(end_arg); + if (!end_column) + return nullptr; + using actual_type = typename std::remove_cv::type>::type; + bool is_float64 = std::is_same::value; + + const auto & end_data = end_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + size_t count_in_range = 0; + if ((start < end_data[row_idx] && step > 0) || (start > end_data[row_idx] && step < 0)) + { + auto st = step > 0 ? start : end_data[row_idx]; + auto ed = step > 0 ? end_data[row_idx] : start; + auto new_step = step > 0 ? step : -1 * step; + if (is_float64 && step != 0) + { + while (st < ed || st - ed < epsilon) + { + ++count_in_range; + st += new_step; + } + } + else + { + count_in_range = static_cast((ed - st) / new_step) + 1; + } + } + pre_values += count_in_range; + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + auto st = start; + auto ed = end_data[row_idx]; + if (step > 0) + while (st < ed || st - ed < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step; + } + else if (step < 0) + while (st > ed || ed - st < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step; + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr executeConstStep(const IColumn * start_arg, const IColumn * end_arg, const T step, const size_t input_rows_count) const + { + const double epsilon = 0.00000001; + auto start_column = checkAndGetColumn>(start_arg); + auto end_column = checkAndGetColumn>(end_arg); + if (!end_column || !start_column) + return nullptr; + using actual_type = typename std::remove_cv::type>::type; + bool is_float64 = std::is_same::value; + + const auto & start_data = start_column->getData(); + const auto & end_data = end_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + size_t count_in_range = 0; + if ((start_data[row_idx] < end_data[row_idx] && step > 0) || (start_data[row_idx] > end_data[row_idx] && step < 0)) + { + auto st = step > 0 ? start_data[row_idx] : end_data[row_idx]; + auto ed = step > 0 ? end_data[row_idx] : start_data[row_idx]; + auto new_step = step > 0 ? step : -1 * step; + if (is_float64 && step != 0) + { + while (st < ed || st - ed < epsilon) + { + ++count_in_range; + st += new_step; + } + } + else + { + count_in_range = static_cast((ed - st) / new_step) + 1; + } + } + pre_values += count_in_range; + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + auto st = start_data[row_idx]; + auto ed = end_data[row_idx]; + if (step > 0) + while (st < ed || st - ed < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step; + } + else if (step < 0) + while (st > ed || ed - st < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step; + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr executeConstStart(const IColumn * end_arg, const IColumn * step_arg, const T start, const size_t input_rows_count) const + { + const double epsilon = 0.00000001; + auto end_column = checkAndGetColumn>(end_arg); + auto step_column = checkAndGetColumn>(step_arg); + if (!end_column || !step_column) + return nullptr; + using actual_type = typename std::remove_cv::type>::type; + bool is_float64 = std::is_same::value; + + const auto & end_data = end_column->getData(); + const auto & step_data = step_column->getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + size_t count_in_range = 0; + if ((start < end_data[row_idx] && step_data[row_idx] > 0) || (start > end_data[row_idx] && step_data[row_idx] < 0)) + { + auto st = step_data[row_idx] > 0 ? start : end_data[row_idx]; + auto ed = step_data[row_idx] > 0 ? end_data[row_idx] : start; + auto step = step_data[row_idx] > 0 ? step_data[row_idx] : -1 * step_data[row_idx]; + if (is_float64 && step_data[row_idx] != 0) + { + while (st < ed || st - ed < epsilon) + { + ++count_in_range; + st += step; + } + } + else + { + count_in_range = static_cast((ed - st) / step) + 1; + } + } + pre_values += count_in_range; + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + auto st = start; + auto ed = end_data[row_idx]; + if (step_data[row_idx] > 0) + while (st < ed || st - ed < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + else if (step_data[row_idx] < 0) + while (st > ed || ed - st < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + template + ColumnPtr + executeGeneric(const IColumn * start_col, const IColumn * end_col, const IColumn * step_col, const size_t input_rows_count) const + { + const double epsilon = 0.00000001; + auto start_column = checkAndGetColumn>(start_col); + auto end_column = checkAndGetColumn>(end_col); + auto step_column = checkAndGetColumn>(step_col); + + if (!start_column || !end_column || !step_column) + return nullptr; + using actual_type = typename std::remove_cv::type>::type; + bool is_float64 = std::is_same::value; + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + const auto & start_data = start_column->getData(); + const auto & end_start = end_column->getData(); + const auto & step_data = step_column->getData(); + + size_t total_values = 0; + size_t pre_values = 0; + + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + size_t count_in_range = 0; + if ((start_data[row_idx] < end_start[row_idx] && step_data[row_idx] > 0) + || (start_data[row_idx] > end_start[row_idx] && step_data[row_idx] < 0)) + { + auto st = step_data[row_idx] > 0 ? start_data[row_idx] : end_start[row_idx]; + auto ed = step_data[row_idx] > 0 ? end_start[row_idx] : start_data[row_idx]; + auto step = step_data[row_idx] > 0 ? step_data[row_idx] : -1 * step_data[row_idx]; + if (is_float64 && step_data[row_idx] != 0) + { + while (st < ed || st - ed < epsilon) + { + ++count_in_range; + st += step; + } + } + else + { + count_in_range = static_cast((ed - st) / step) + 1; + } + } + pre_values += count_in_range; + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnVector::create(total_values); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_column->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + auto st = start_data[row_idx]; + auto ed = end_start[row_idx]; + if (step_data[row_idx] > 0) + while (st < ed || st - ed < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + else if (step_data[row_idx] < 0) + while (st > ed || ed - st < epsilon) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_data[row_idx]) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + st += step_data[row_idx]; + } + out_offsets[row_idx] = offset; + } + + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + ColumnPtr executeDateTime64(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + const auto & start_col = arguments[0].column; + const auto & end_col = arguments[1].column; + Int64 step_value = 3600000000000; + + const auto & start_data = typeid_cast &>(*start_col).getData(); + const auto & end_data = typeid_cast &>(*end_col).getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + + if (step_value > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] + ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step_value) + 1); + } + + if (step_value < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] + ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step_value) + 1); + } + + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto data_col = ColumnDecimal::create(total_values, 9); + auto offsets_col = ColumnArray::ColumnOffsets::create(end_col->size()); + + auto & out_data = data_col->getData(); + auto & out_offsets = offsets_col->getData(); + IColumn::Offset offset{}; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + if (step_value > 0) + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step_value) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st > st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + else if (step_value < 0) + { + for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step_value) + { + out_data[offset++] = st; + if (offset >= total_values) + break; + if (st < st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + out_offsets[row_idx] = offset; + } + return ColumnArray::create(std::move(data_col), std::move(offsets_col)); + } + + ColumnPtr executeInterval(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); + const auto & start_col = arguments[0].column; + const auto & end_col = arguments[1].column; + Int64 step_value = 3600000000000; + + const auto & start_data = typeid_cast &>(*start_col).getData(); + const auto & end_data = typeid_cast &>(*end_col).getData(); + + //The maximum number of values in KQL is 1,048,576 (2^20). + size_t total_elements = max_elements < 1048576 ? max_elements : 1048576; + if (total_elements > max_array_size_as_field) + total_elements = max_array_size_as_field; + + size_t total_values = 0; + size_t pre_values = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + + if (step_value > 0 && start_data[row_idx] <= end_data[row_idx]) + { + pre_values += start_data[row_idx] >= end_data[row_idx] + ? 0 + : static_cast((end_data[row_idx] - start_data[row_idx]) / (step_value) + 1); + } + + if (step_value < 0 && start_data[row_idx] >= end_data[row_idx]) + { + pre_values += start_data[row_idx] <= end_data[row_idx] + ? 0 + : static_cast((start_data[row_idx] - end_data[row_idx]) / (-step_value) + 1); + } + if (pre_values < total_values) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + + total_values = pre_values; + if (total_values > total_elements) + total_values = total_elements; + } + + auto out = ColumnArray::create(std::make_shared(IntervalKind::Nanosecond)->createColumn()); + IColumn & out_data = out->getData(); + IColumn::Offsets & out_offsets = out->getOffsets(); + + out_data.reserve(input_rows_count * total_values); + out_offsets.resize(input_rows_count); + IColumn::Offset current_offset = 0; + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments.size() > 2) + step_value = arguments[2].column->getInt(row_idx); + if (step_value > 0) + { + for (int64_t st = start_data[row_idx], ed = end_data[row_idx]; st <= ed; st += step_value) + { + out_data.insert(Field(st)); + current_offset++; + if (current_offset >= total_values) + break; + if (st > st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + else if (step_value < 0) + { + for (int64_t st = start_data[row_idx], ed = end_data[row_idx]; st >= ed; st += step_value) + { + out_data.insert(Field(st)); + current_offset++; + if (current_offset >= total_values) + break; + if (st < st + step_value) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values of arguments you are passing", + getName()); + } + } + out_offsets[row_idx] = current_offset; + } + + return out; + } +}; + +REGISTER_FUNCTION(KqlRange) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/Kusto/kqlBetween.cpp b/src/Functions/Kusto/kqlBetween.cpp new file mode 100644 index 000000000000..11455b454719 --- /dev/null +++ b/src/Functions/Kusto/kqlBetween.cpp @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBetween : public IFunction +{ +public: + static constexpr auto name = "kql_between"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBetween(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBetween() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 3; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + void checkTypeCompatibility(const DB::DataTypePtr arg1, const DB::DataTypePtr arg2) const; + +private: + ContextPtr context; +}; + +void FunctionKqlBetween::checkTypeCompatibility(const DB::DataTypePtr arg1, const DB::DataTypePtr arg2) const +{ + if (!arg1->getPtr()->equals(*arg2) + && !(WhichDataType(arg1).isDateTime64() && (WhichDataType(arg2).isInterval() || WhichDataType(arg2).isDateTime64())) + && !( + (WhichDataType(arg1).isInt() || WhichDataType(arg1).isUInt() || WhichDataType(arg1).isFloat()) + && (WhichDataType(arg2).isInt() || WhichDataType(arg2).isUInt() || WhichDataType(arg2).isFloat()))) + throw DB::Exception( + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments for function {} doesn't match: all arguments should be of same type", + getName()); +} + +DataTypePtr FunctionKqlBetween::getReturnTypeImpl(const DataTypes & arguments) const +{ + checkTypeCompatibility(arguments[0], arguments[1]); + checkTypeCompatibility(arguments[1], arguments[2]); + + const auto arg_it = std::ranges::find_if(arguments, [](const auto & argument) { + return !WhichDataType(argument).isUInt() && !WhichDataType(argument).isInt() && !WhichDataType(argument).isFloat() + && !WhichDataType(argument).isInterval() && !WhichDataType(argument).isDateTime64(); + }); + + if (arg_it != arguments.cend()) + throw DB::Exception( + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments type argument # {} for function {} doesn't match: arguments should be integer, long, real or datetime", + std::distance(arguments.cbegin(), arg_it), + getName()); + return DataTypeFactory::instance().get("Bool"); +} + +ColumnPtr +FunctionKqlBetween::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & base_arg = arguments[0]; + const auto & comparable_arg1 = arguments[1]; + const auto & comparable_arg2 = arguments[2]; + + const ColumnsWithTypeAndName lhs_and_value{comparable_arg1, base_arg}; + const auto lhs_and_value_compared = executeFunctionCall(context, "lessOrEquals", lhs_and_value, input_rows_count); + + if (!WhichDataType(*comparable_arg2.type).isInterval() || !WhichDataType(*comparable_arg1.type).isDateTime64()) + { + const ColumnsWithTypeAndName value_and_rhs{base_arg, comparable_arg2}; + const auto value_and_rhs_compared = executeFunctionCall(context, "lessOrEquals", value_and_rhs, input_rows_count); + const ColumnsWithTypeAndName comparisons{ + asArgument(lhs_and_value_compared, "lhs_and_value_compared"), asArgument(value_and_rhs_compared, "value_and_rhs_compared")}; + return executeFunctionCall(context, "and", comparisons, input_rows_count).first; + } + else + { + const ColumnsWithTypeAndName lhs_and_rhs{comparable_arg1, comparable_arg2}; + const auto lhs_and_rhs_sum = executeFunctionCall(context, "plus", lhs_and_rhs, input_rows_count); + + const ColumnsWithTypeAndName value_and_rhs{base_arg, asArgument(lhs_and_rhs_sum, "lhs_and_rhs_sum")}; + + const auto value_and_rhs_compared = executeFunctionCall(context, "lessOrEquals", value_and_rhs, input_rows_count); + const ColumnsWithTypeAndName comparisons{ + asArgument(lhs_and_value_compared, "lhs_and_value_compared"), asArgument(value_and_rhs_compared, "value_and_rhs_compared")}; + return executeFunctionCall(context, "and", comparisons, input_rows_count).first; + } +} + +REGISTER_FUNCTION(KqlBetween) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlBin.cpp b/src/Functions/Kusto/kqlBin.cpp new file mode 100644 index 000000000000..400ab106441e --- /dev/null +++ b/src/Functions/Kusto/kqlBin.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include + +namespace +{ +DB::ColumnWithTypeAndName +interpretAsInterval(const DB::ContextPtr & context, const DB::ColumnWithTypeAndName & argument, const size_t input_rows_count) +{ + static constexpr auto NANOSECONDS_PER_SECOND = 1'000'000'000U; + + const DB::ColumnsWithTypeAndName multiply_args{ + argument, DB::createConstColumnWithTypeAndName(NANOSECONDS_PER_SECOND, argument.name)}; + const auto product = executeFunctionCall(context, "multiply", multiply_args, input_rows_count); + + const DB::ColumnsWithTypeAndName to_interval_args{asArgument(product, argument.name)}; + const auto interval = executeFunctionCall(context, "toIntervalNanosecond", to_interval_args, input_rows_count); + + return asArgument(interval, argument.name); +} +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBin : public IFunction +{ +public: + static constexpr auto name = "kql_bin"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBin(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBin() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlBin::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto intermediate = std::invoke( + [this, &arguments, &input_rows_count] + { + const auto & round_to_argument = arguments.back(); + const auto & value_argument = arguments.front(); + const WhichDataType round_to_which_data_type(*round_to_argument.type); + const WhichDataType value_which_data_type(*value_argument.type); + + const auto & adjusted_round_to + = (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64() || value_which_data_type.isInterval()) && !round_to_which_data_type.isInterval() + ? interpretAsInterval(context, round_to_argument, input_rows_count) + : round_to_argument; + + if (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + { + const auto adjusted_args = std::invoke( + [this, &adjusted_round_to, &input_rows_count, &value_argument, &value_which_data_type]() -> ColumnsWithTypeAndName + { + if (value_which_data_type.isDateTime64()) + return {value_argument, adjusted_round_to}; + + const ColumnsWithTypeAndName to_datetime64_args{ + value_argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + const auto as_datetime64 = executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count); + return {asArgument(as_datetime64, "as_datetime64"), adjusted_round_to}; + }); + + return executeFunctionCall(context, "toStartOfIntervalOrNull", adjusted_args, input_rows_count); + } + + const ColumnsWithTypeAndName adjusted_args{value_argument, adjusted_round_to}; + const auto quotient = executeFunctionCall(context, "divide", adjusted_args, input_rows_count); + + const ColumnsWithTypeAndName floor_args{asArgument(quotient, adjusted_round_to.name)}; + const auto floored = executeFunctionCall(context, "floor", floor_args, input_rows_count); + + const ColumnsWithTypeAndName multiply_args{asArgument(floored, adjusted_round_to.name), adjusted_round_to}; + return executeFunctionCall(context, "multiply", multiply_args, input_rows_count); + }); + + const ColumnsWithTypeAndName conversion_args{ + asArgument(intermediate, "intermediate"), createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "accurateCastOrNull", conversion_args, input_rows_count).first; +} + +DataTypePtr FunctionKqlBin::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto nested_type = std::invoke( + [this, &arguments]() -> DataTypePtr + { + const auto & value_argument = arguments.front(); + const auto & round_to_argument = arguments.back(); + if (const WhichDataType value_which_data_type(*value_argument); value_which_data_type.isInterval() || isNumber(value_which_data_type)) + { + const WhichDataType round_to_which_data_type(*round_to_argument); + return isNumber(value_which_data_type) && (round_to_which_data_type.isFloat() || round_to_which_data_type.isDecimal()) + ? round_to_argument + : value_argument; + } + else if (value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + return std::make_shared(9, "UTC"); + + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected Date, Date32, DateTime, DateTime64, Interval or Number", + value_argument->getName(), + getName()); + }); + + return makeNullable(nested_type); +} + +REGISTER_FUNCTION(KqlBin) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlBinAt.cpp b/src/Functions/Kusto/kqlBinAt.cpp new file mode 100644 index 000000000000..55c386e3429e --- /dev/null +++ b/src/Functions/Kusto/kqlBinAt.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlBinAt : public IFunction +{ +public: + static constexpr auto name = "kql_bin_at"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlBinAt(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlBinAt() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 3; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlBinAt::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto get_or_convert_argument = [this, &input_rows_count](const ColumnWithTypeAndName & argument) + { + if (const WhichDataType which_data_type(*argument.type); which_data_type.isDateOrDate32() || which_data_type.isDateTime()) + { + const ColumnsWithTypeAndName to_datetime64_args{ + argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + return asArgument(executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count), argument.name); + } + + return argument; + }; + + const auto & value_argument = get_or_convert_argument(arguments.front()); + const auto & round_to_argument = arguments[1]; + const auto & offset_argument = get_or_convert_argument(arguments.back()); + + const ColumnsWithTypeAndName subtraction_args{value_argument, offset_argument}; + const auto difference = executeFunctionCall(context, "minus", subtraction_args, input_rows_count); + + const ColumnsWithTypeAndName bin_args{asArgument(difference, "difference"), round_to_argument}; + const auto bin_result = executeFunctionCall(context, "kql_bin", bin_args, input_rows_count); + + const ColumnsWithTypeAndName addition_args{offset_argument, asArgument(bin_result, "bin_result")}; + const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count); + + const ColumnsWithTypeAndName cast_args{ + asArgument(sum, "sum"), createConstColumnWithTypeAndName(result_type->getName(), "type")}; + return executeFunctionCall(context, "cast", cast_args, input_rows_count).first; +} + +DataTypePtr FunctionKqlBinAt::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + const auto & value_type = *arguments.front().type; + const auto & offset_type = *arguments.back().type; + + WhichDataType value_which_data_type(value_type); + WhichDataType offset_which_data_type(offset_type); + if ((value_which_data_type.isDateOrDate32OrDateTimeOrDateTime64() && offset_which_data_type.isDateOrDate32OrDateTimeOrDateTime64()) + || (value_which_data_type.isInterval() && offset_which_data_type.isInterval()) + || (isNumber(value_which_data_type) && isNumber(offset_which_data_type))) + { + const ColumnsWithTypeAndName bin_args{arguments.front(), arguments[1]}; + return FunctionFactory::instance().get("kql_bin", context)->build(bin_args)->getResultType(); + } + + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}, expected {}", + offset_type.getName(), + getName(), + value_type.getFamilyName()); +} + +REGISTER_FUNCTION(KqlBinAt) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlCountOverlappingSubstrings.cpp b/src/Functions/Kusto/kqlCountOverlappingSubstrings.cpp new file mode 100644 index 000000000000..5608003726ba --- /dev/null +++ b/src/Functions/Kusto/kqlCountOverlappingSubstrings.cpp @@ -0,0 +1,87 @@ +#include +#include + +namespace DB +{ +namespace DB::ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlCountOverlappingSubstrings : public IFunction +{ +public: + static constexpr auto name = "kql_count_overlapping_substrings"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlCountOverlappingSubstrings(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlCountOverlappingSubstrings() override = default; + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool isVariadic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const auto args_length = arguments.size(); + + if (args_length != 2) + { + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2.", + getName(), + toString(arguments.size())); + } + + if (!isString(arguments.at(0).type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + + if (!isString(arguments.at(1).type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl( + const ColumnsWithTypeAndName & arguments, + [[maybe_unused]] const DataTypePtr & result_type, + const size_t input_rows_count) const override + { + auto result = ColumnUInt32::create(); + auto & result_column = result->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + uint32_t res = 0; + + const auto source = arguments[0].column->getDataAt(i).toString(); + const auto search = arguments[1].column->getDataAt(i).toString(); + std::size_t found = 0; + + while ((found = source.find(search, found)) != std::string::npos) + { + ++res; + ++found; + } + result_column.push_back(static_cast(res)); + } + return result; + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(KqlCountOverlappingSubstrings) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlDateTime.cpp b/src/Functions/Kusto/kqlDateTime.cpp new file mode 100644 index 000000000000..a4b7beb5ec8a --- /dev/null +++ b/src/Functions/Kusto/kqlDateTime.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; +} + +namespace +{ + +enum class InputPolicy +{ + Arbitrary, + Constant +}; + +constexpr const char * getDateTimeParsingFunction(const InputPolicy input_policy) +{ + if (input_policy == InputPolicy::Arbitrary) + return "parseDateTime64BestEffortOrNull"; + else if (input_policy == InputPolicy::Constant) + return "parseDateTime64BestEffort"; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unhandled input policy: {}", magic_enum::enum_name(input_policy)); +} + +constexpr const char * getFunctionName(const InputPolicy input_policy) +{ + if (input_policy == InputPolicy::Arbitrary) + return "kql_todatetime"; + else if (input_policy == InputPolicy::Constant) + return "kql_datetime"; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unhandled input policy: {}", magic_enum::enum_name(input_policy)); +} +} + +namespace DB +{ +template +class FunctionKqlDateTime : public IFunction +{ +public: + static constexpr auto name = getFunctionName(input_policy); + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlDateTime(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlDateTime() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return makeNullable(std::make_shared(9, "UTC")); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +template +ColumnPtr FunctionKqlDateTime::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + const ColumnsWithTypeAndName conversion_args{ + argument, + createConstColumnWithTypeAndName(9, "scale"), + createConstColumnWithTypeAndName("UTC", "timezone")}; + + const auto * const conversion_function + = WhichDataType(*argument.type).isStringOrFixedString() ? getDateTimeParsingFunction(input_policy) : "toDateTime64"; + + std::pair converted; + try + { + converted = executeFunctionCall(context, conversion_function, conversion_args, input_rows_count); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::DECIMAL_OVERFLOW) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Datetime out of range"); + + throw; + } + + const ColumnsWithTypeAndName addition_args{ + asArgument(converted, "converted"), + createConstColumnWithTypeAndName(50, "interval_50", IntervalKind::Nanosecond)}; + const auto sum = executeFunctionCall(context, "plus", addition_args, input_rows_count); + + const ColumnsWithTypeAndName to_start_of_interval_args{ + asArgument(sum, "sum"), createConstColumnWithTypeAndName(100, "interval_100", IntervalKind::Nanosecond)}; + const auto [rounded_column, _] = executeFunctionCall(context, "toStartOfInterval", to_start_of_interval_args, input_rows_count); + + return wrapInNullable(rounded_column, conversion_args, result_type, input_rows_count); +} + +REGISTER_FUNCTION(KqlDateTime) +{ + factory.registerFunction>(); + factory.registerFunction>(); +} +} diff --git a/src/Functions/Kusto/kqlHasAnyIp.cpp b/src/Functions/Kusto/kqlHasAnyIp.cpp new file mode 100644 index 000000000000..acf3c5e4c270 --- /dev/null +++ b/src/Functions/Kusto/kqlHasAnyIp.cpp @@ -0,0 +1,463 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +struct HasAnyIpv4 +{ + static constexpr auto name = "kql_has_any_ipv4"; + static constexpr auto variadic = true; + static constexpr auto regex = "([^[:alnum:]]|^)([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})([^[:alnum:]]|$)"; + static std::string transformStringArgument( + const ColumnWithTypeAndName & arg, + [[maybe_unused]] const DataTypePtr & result_type, + [[maybe_unused]] const ContextPtr & context, + size_t row) + { + return arg.column->getDataAt(row).toString(); + } + static bool + checkStringArgument(const std::string & arg, const DataTypePtr & result_type, const ContextPtr & context, [[maybe_unused]] size_t row) + { + const auto is_ipv4_string = [&, result_type](const DB::ColumnsWithTypeAndName & args) + { return DB::FunctionFactory::instance().get("isIPv4String", context)->build(args)->execute(args, result_type, 1); }; + + const ColumnsWithTypeAndName is_ipv4_string_args = {createConstColumnWithTypeAndName(arg, "ip")}; + const auto is_ipv4 = is_ipv4_string(is_ipv4_string_args); + return is_ipv4->getUInt(0); + } + + static void + insertFromArrayElement(const Field & value, const DataTypePtr & result_type, const ContextPtr & context, std::vector & ips) + { + const auto is_ipv4_string = [&, result_type](const DB::ColumnsWithTypeAndName & args) + { return DB::FunctionFactory::instance().get("isIPv4String", context)->build(args)->execute(args, result_type, 1); }; + const auto value_as_string = toString(value); + const ColumnsWithTypeAndName is_ipv4_string_args = {createConstColumnWithTypeAndName(value_as_string, "ip")}; + const auto is_ipv4 = is_ipv4_string(is_ipv4_string_args); + if (is_ipv4->getUInt(0)) + { + ips.push_back(value_as_string); + } + } + + static bool checkRegexMatch( + const std::string & s, const DataTypePtr & result_type, const ContextPtr & context, const std::vector & ips) + { + const ColumnsWithTypeAndName is_ipv4_string_args = {createConstColumnWithTypeAndName(s, "ip")}; + + const auto is_ipv4 = FunctionFactory::instance() + .get("isIPv4String", context) + ->build(is_ipv4_string_args) + ->execute(is_ipv4_string_args, result_type, 1); + + if (is_ipv4->getUInt(0) == 1) + { + return std::ranges::any_of(ips, std::bind_front(std::equal_to(), std::cref(s))); + } + return false; + } +}; + +struct HasAnyIpv4Prefix +{ + static constexpr auto name = "kql_has_any_ipv4_prefix"; + static constexpr auto variadic = true; + static constexpr auto regex = "([^[:alnum:]]|^)([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})([^[:alnum:]]|$)"; + static std::string + transformStringArgument(const ColumnWithTypeAndName & arg, const DataTypePtr & result_type, const ContextPtr & context, size_t row) + { + return HasAnyIpv4::transformStringArgument(arg, result_type, context, row); + } + static bool checkStringArgument( + const std::string & arg, + [[maybe_unused]] const DataTypePtr & result_type, + [[maybe_unused]] const ContextPtr & context, + [[maybe_unused]] size_t row) + { + const auto n = std::ranges::count(arg, '.'); + return n == 3 || (arg.back() == '.' && n <= 2); + } + static void insertFromArrayElement( + const Field & value, + [[maybe_unused]] const DataTypePtr & result_type, + [[maybe_unused]] const ContextPtr & context, + std::vector & ips) + { + const auto value_as_string = toString(value); + + const auto n = std::ranges::count(value_as_string, '.'); + if (n == 3 || (value_as_string.back() == '.' && n <= 2)) + { + ips.push_back(value_as_string); + } + } + static bool checkRegexMatch( + const std::string & s, const DataTypePtr & result_type, const ContextPtr & context, const std::vector & ips) + { + const ColumnsWithTypeAndName is_ipv4_string_args = {createConstColumnWithTypeAndName(s, "ip")}; + + const auto is_ipv4 = FunctionFactory::instance() + .get("isIPv4String", context) + ->build(is_ipv4_string_args) + ->execute(is_ipv4_string_args, result_type, 1); + + if (is_ipv4->getUInt(0)) + { + return std::ranges::any_of( + ips, + [&s](const std::string & str) -> bool { return std::memcmp(str.c_str(), s.c_str(), std::min(str.size(), s.size())) == 0; }); + } + return false; + } +}; + +struct HasIpv4 : HasAnyIpv4 +{ + static constexpr auto name = "kql_has_ipv4"; + static constexpr auto variadic = false; +}; + +struct HasIpv4Prefix : HasAnyIpv4Prefix +{ + static constexpr auto name = "kql_has_ipv4_prefix"; + static constexpr auto variadic = false; +}; + +struct HasAnyIpv6 +{ + static constexpr auto name = "kql_has_any_ipv6"; + static constexpr auto variadic = true; + static constexpr auto regex = "([^a-zA-Z0-9:.]|^)([0-9a-fA-F:.]{3,})([^a-zA-Z0-9:.]|$)"; + static std::optional ipv6ToHex(const std::string & str, const DataTypePtr & result_type, const ContextPtr & context) + { + const ColumnsWithTypeAndName ipv6_string = {createConstColumnWithTypeAndName(str, "ipv6")}; + const auto is_ipv6 + = FunctionFactory::instance().get("isIPv6String", context)->build(ipv6_string)->execute(ipv6_string, result_type, 1); + if (is_ipv6->getUInt(0)) + { + const auto ipv6_string_to_num = executeFunctionCall(context, "IPv6StringToNum", ipv6_string, 1); + const ColumnsWithTypeAndName hex_args{asArgument(ipv6_string_to_num, "")}; + const auto [hex_string, _] = executeFunctionCall(context, "hex", hex_args, 1); + return hex_string->getDataAt(0).toString(); + } + return std::nullopt; + } + static std::string + transformStringArgument(const ColumnWithTypeAndName & arg, const DataTypePtr & result_type, const ContextPtr & context, size_t row) + { + return ipv6ToHex(arg.column->getDataAt(row).toString(), result_type, context).value_or(""); + } + static bool checkStringArgument( + const std::string & arg, + [[maybe_unused]] const DataTypePtr & result_type, + [[maybe_unused]] const ContextPtr & context, + [[maybe_unused]] size_t row) + { + return !arg.empty(); + } + static void + insertFromArrayElement(const Field & value, const DataTypePtr & result_type, const ContextPtr & context, std::vector & ips) + { + const auto ipv6_string = ipv6ToHex(toString(value), result_type, context).value_or(""); + if (!ipv6_string.empty()) + { + ips.push_back(ipv6_string); + } + } + static bool checkRegexMatch( + const std::string & s, const DataTypePtr & result_type, const ContextPtr & context, const std::vector & ips) + { + const auto m = ipv6ToHex(s, result_type, context).value_or(""); + + return std::ranges::any_of(ips, std::bind_front(std::equal_to(), std::cref(m))); + } +}; + +struct HasAnyIpv6Prefix +{ + static constexpr auto name = "kql_has_any_ipv6_prefix"; + static constexpr auto variadic = true; + static constexpr auto regex = "([^a-zA-Z0-9:.]|^)([0-9a-fA-F:.]{3,})([^a-zA-Z0-9:.]|$)"; + static std::optional ipv6ToHex(const std::string & str, const DataTypePtr & result_type, const ContextPtr & context) + { + return HasAnyIpv6::ipv6ToHex(str, result_type, context); + } + + static std::optional> parsePrefix(const std::string & str) + { + std::vector result; + + auto iter = str.cbegin(); + auto iter_end = str.cend(); + const auto ipv6 = boost::spirit::x3::repeat(1, 7)[boost::spirit::x3::hex >> ':']; + const auto r = boost::spirit::x3::parse(iter, iter_end, ipv6, result); + if (!r || iter != iter_end) + { + return std::nullopt; + } + return result; + } + + static std::optional> parsePrefixEmbeddedIpv4(const std::string & str) + { + std::vector result; + const auto ipv4_embedded = boost::spirit::x3::repeat(6)[boost::spirit::x3::hex >> ':'] + >> boost::spirit::x3::repeat(1, 3)[boost::spirit::x3::uint_ >> '.']; + + auto iter = str.begin(); + auto iter_end = str.end(); + + auto r = boost::spirit::x3::parse(iter, iter_end, ipv4_embedded, result); + if (!r || iter != iter_end) + { + return std::nullopt; + } + return result; + } + + static std::optional ipv6PrefixToHex(const std::string & str, const DataTypePtr & result_type, const ContextPtr & context) + { + std::vector vec_v6; + std::vector vec_v4; + if (const auto & last_char = str.back(); last_char == ':') + { + const auto result = parsePrefix(str); + + if (!result.has_value()) + { + return std::nullopt; + } + vec_v6 = result.value(); + } + else if (last_char == '.') + { + const auto result = parsePrefixEmbeddedIpv4(str); + if (!result.has_value()) + { + return std::nullopt; + } + + std::copy(result.value().cbegin(), result.value().cbegin() + 6, std::back_inserter(vec_v6)); + std::copy(result.value().cbegin() + 6, result.value().cend(), std::back_inserter(vec_v4)); + } + else + { + return ipv6ToHex(str, result_type, context); + } + + if (std::ranges::any_of(vec_v6, [](const auto & x) { return x > std::numeric_limits::max(); }) + || std::ranges::any_of(vec_v4, [](const auto & x) { return x > std::numeric_limits::max(); })) + { + return std::nullopt; + } + auto ipv6_hex = std::accumulate( + vec_v6.cbegin(), + vec_v6.cend(), + std::string(), + [](const auto & x, const auto & y) { return x + std::format("{:04X}", y); }); + if (!vec_v4.empty()) + { + ipv6_hex += std::accumulate( + vec_v4.cbegin(), + vec_v4.cend(), + std::string(), + [](const auto & x, const auto & y) { return x + std::format("{:02X}", y); }); + } + return ipv6_hex; + } + static std::string + transformStringArgument(const ColumnWithTypeAndName & arg, const DataTypePtr & result_type, const ContextPtr & context, size_t row) + { + return ipv6PrefixToHex(arg.column->getDataAt(row).toString(), result_type, context).value_or(""); + } + static bool checkStringArgument(const std::string & arg, const DataTypePtr & result_type, const ContextPtr & context, size_t row) + { + return HasAnyIpv6::checkStringArgument(arg, result_type, context, row); + } + static void + insertFromArrayElement(const Field & value, const DataTypePtr & result_type, const ContextPtr & context, std::vector & ips) + { + const auto ipv6_string = ipv6PrefixToHex(toString(value), result_type, context).value_or(""); + if (!ipv6_string.empty()) + { + ips.push_back(ipv6_string); + } + } + static bool checkRegexMatch( + const std::string & s, const DataTypePtr & result_type, const ContextPtr & context, const std::vector & ips) + { + const auto m = ipv6ToHex(s, result_type, context).value_or(""); + + return std::ranges::any_of( + ips, + [&m](const std::string & str) { return std::memcmp(str.c_str(), m.c_str(), std::min(str.size(), m.size())) == 0; }); + } +}; + +struct HasIpv6 : HasAnyIpv6 +{ + static constexpr auto name = "kql_has_ipv6"; + static constexpr auto variadic = false; +}; + +struct HasIpv6Prefix : HasAnyIpv6Prefix +{ + static constexpr auto name = "kql_has_ipv6_prefix"; + static constexpr auto variadic = false; +}; + +template +class FunctionKqlHasIpGeneric : public IFunction +{ +public: + static constexpr auto name = Func::name; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlHasIpGeneric(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlHasIpGeneric() override = default; + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return Func::variadic ? 0 : 2; } + bool isVariadic() const override { return Func::variadic; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const auto args_length = arguments.size(); + + if (args_length < 2) + { + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or more.", + getName(), + toString(arguments.size())); + } + + if (!isStringOrFixedString(arguments.at(0).type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + + if (!isStringOrFixedString(arguments.at(1).type) && !isArray(arguments.at(1).type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + + if (isStringOrFixedString(arguments.at(1).type)) + { + if (isVariadic()) + { + const auto are_arguments_valid = std::ranges::all_of( + arguments | std::views::drop(2), [](const auto & argument) { return isStringOrFixedString(argument.type); }); + if (!are_arguments_valid) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of argument of function {}", getName()); + } + } + + else if (!isVariadic() || !isArray(arguments.at(1).type)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal type of argument of function {}", getName()); + } + + return std::make_shared(); + } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const override + { + auto result = ColumnUInt8::create(); + auto & result_column = result->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + bool res = false; + const auto ips = extractIpsFromArguments(arguments, result_type, context, i); + + std::string source = arguments[0].column->getDataAt(i).toString(); + const std::regex ip_finder(Func::regex); + std::smatch matches; + + while (!res && std::regex_search(source, matches, ip_finder)) + { + res = Func::checkRegexMatch(matches[2].str(), result_type, context, ips); + + source = matches.suffix().str(); + } + result_column.push_back(static_cast(res)); + } + return result; + } + +private: + ContextPtr context; + + static std::vector extractIpsFromArguments( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ContextPtr & context, size_t row) + { + std::vector ips; + + if (isStringOrFixedString(arguments.at(1).type)) + { + std::ranges::copy_if( + arguments | std::views::drop(1) + | std::views::transform([&row, &result_type, &context](const ColumnWithTypeAndName & arg) + { return Func::transformStringArgument(arg, result_type, context, row); }), + std::back_inserter(ips), + [&row, &result_type, &context](const std::string & arg) + { return Func::checkStringArgument(arg, result_type, context, row); }); + } + + else if (isArray(arguments.at(1).type)) + { + Field array0; + arguments[1].column->get(row, array0); + const auto len0 = array0.get().size(); + + for (size_t j = 0; j < len0; ++j) + { + if (const auto & value = array0.get().at(j); value.getType() == Field::Types::String) + { + Func::insertFromArrayElement(value, result_type, context, ips); + } + } + } + return ips; + } +}; + +REGISTER_FUNCTION(FunctionKqlHasIpGeneric) +{ + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); +} +} diff --git a/src/Functions/Kusto/kqlHash.cpp b/src/Functions/Kusto/kqlHash.cpp new file mode 100644 index 000000000000..16502bc70ca0 --- /dev/null +++ b/src/Functions/Kusto/kqlHash.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlHash : public IFunction +{ +public: + static constexpr auto name = "kql_hash"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlHash(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlHash() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlHash::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + const ColumnsWithTypeAndName firstarg{argument}; + WhichDataType which(*argument.type); + ColumnsWithTypeAndName args = firstarg; + /* Int32 left out for the time being so that int KQL explicit int cast will still function + * ClickHouse will put nubmers in the smallest data type they will fit it. + * Numbers not cast interpreted as Int32 by ClickHouse will not match KQL (int64) results + */ + if (which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16() || which.isUInt32() || which.isUInt64()) + { + const auto tocast = executeFunctionCall(context, "toInt64", firstarg, input_rows_count); + args = {asArgument(tocast, name)}; + } + else if (which.isDateTime64()) + { + const auto tocast = executeFunctionCall(context, "kql_tolong", firstarg, input_rows_count); + args = {asArgument(tocast, name)}; + } + else if (which.isFloat32()) + { + const auto tocast = executeFunctionCall(context, "toFloat64", firstarg, input_rows_count); + args = {asArgument(tocast, name)}; + } + const auto tohash = executeFunctionCall(context, "xxHash64", args, input_rows_count); + if (arguments.size() == 1) + { + const ColumnsWithTypeAndName hashargs{asArgument(tohash, "tohash")}; + return executeFunctionCall(context, "toInt64", hashargs, input_rows_count).first; + } + else + { + for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) + { + if (arguments[1].column->getInt(row_idx) < 1) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "hash(): argument 2 must be a constant positive long value"); + } + const ColumnsWithTypeAndName modargs{asArgument(tohash, "tohash"), arguments.back()}; + const auto tomod = executeFunctionCall(context, "moduloOrZero", modargs, input_rows_count); + const ColumnsWithTypeAndName touint{asArgument(tomod, "tomod")}; + return executeFunctionCall(context, "toInt64", touint, input_rows_count).first; + } +} + +DataTypePtr FunctionKqlHash::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +{ + if (arguments.empty() || arguments.size() > 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "hash(): function expects [1..2] argument(s)."); + + if (arguments.size() == 2 && !isNativeInteger(arguments[1].type)) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "hash(): argument 2 must be a constant positive long value"); + } + return std::make_shared(); +} + +REGISTER_FUNCTION(KqlHash) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlIndexOfRegex.cpp b/src/Functions/Kusto/kqlIndexOfRegex.cpp new file mode 100644 index 000000000000..63d9e88f03aa --- /dev/null +++ b/src/Functions/Kusto/kqlIndexOfRegex.cpp @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int CANNOT_COMPILE_REGEXP; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace DB +{ +class FunctionKqlIndexOfRegex : public IFunction +{ +public: + static constexpr auto name = "kql_indexof_regex"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlIndexOfRegex(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlIndexOfRegex() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 4}; } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool isVariadic() const override { return true; } + +private: + ColumnPtr extractArgumentColumnAsString(const ColumnWithTypeAndName & argument, const size_t input_rows_count) const + { + if (isString(argument.type)) + return argument.column; + + const ColumnsWithTypeAndName kql_to_string_args{argument}; + return executeFunctionCall(context, "kql_tostring", kql_to_string_args, input_rows_count).first; + } + + ColumnPtr extractIntegerArgumentColumn(const ColumnsWithTypeAndName & arguments, const int index, const int default_value) const + { + if (index >= std::ssize(arguments)) + return DataTypeInt32().createColumnConst(1, toField(default_value)); + + const auto & argument = arguments[index]; + if (!isInteger(argument.type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument #{} of function {}, expected integral type", + argument.type->getName(), + index, + getName()); + + return argument.column; + } + + ContextPtr context; +}; + +ColumnPtr +FunctionKqlIndexOfRegex::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto in_column_haystack = extractArgumentColumnAsString(arguments[0], input_rows_count); + const auto in_column_pattern = extractArgumentColumnAsString(arguments[1], input_rows_count); + const auto in_column_start = extractIntegerArgumentColumn(arguments, 2, 0); + const auto in_column_length = extractIntegerArgumentColumn(arguments, 3, -1); + const auto in_column_occurrence = extractIntegerArgumentColumn(arguments, 4, 1); + + const auto pattern = in_column_pattern->getDataAt(0).toView(); + const RE2 precompiled_pattern(pattern, RE2::Quiet); + if (!precompiled_pattern.ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "{}: {}", getName(), precompiled_pattern.error()); + + auto out_column = ColumnInt64::create(input_rows_count); + auto out_null_map = ColumnUInt8::create(input_rows_count); + + auto & out_column_data = out_column->getData(); + auto & out_null_map_data = out_null_map->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto start = in_column_start->getInt(i); + const auto length = in_column_length->getInt(i); + const auto occurrence = in_column_occurrence->getInt(i); + + const auto is_invalid = start < 0 || length < -1 || occurrence <= 0; + out_null_map_data[i] = is_invalid; + + if (is_invalid) + continue; + + const auto haystack = in_column_haystack->getDataAt(i).toView(); + const auto bounded_start = std::min(start, std::max(std::ssize(haystack) - 1, Int64(0))); + const auto shortened_haystack = haystack.substr(bounded_start, length == -1 ? std::string_view::npos : length); + + size_t offset = 0; + re2::StringPiece partial_match; + int pass = 0; + while (pass < occurrence + && precompiled_pattern.Match(shortened_haystack, offset, shortened_haystack.length(), RE2::UNANCHORED, &partial_match, 1)) + { + offset = std::distance(shortened_haystack.data(), partial_match.data()) + partial_match.length(); + ++pass; + } + + out_column_data[i] = pass == occurrence ? std::distance(haystack.data(), partial_match.data()) : -1; + } + + return ColumnNullable::create(std::move(out_column), std::move(out_null_map)); +} + +DataTypePtr FunctionKqlIndexOfRegex::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (const auto argument_count = std::ssize(arguments); argument_count < 2 || 5 < argument_count) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be between 2 and 5.", + getName(), + argument_count); + + return makeNullable(std::make_shared()); +} + +REGISTER_FUNCTION(KqlIndexOfRegex) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlMakeString.cpp b/src/Functions/Kusto/kqlMakeString.cpp new file mode 100644 index 000000000000..b878b3684d97 --- /dev/null +++ b/src/Functions/Kusto/kqlMakeString.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlMakeString : public IFunction +{ +public: + static constexpr auto name = "kql_make_string"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlMakeString(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlMakeString() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + +private: + void convertAndAppendCodePoint(int code_point, String & row_str) const; + ContextPtr context; +}; + +void FunctionKqlMakeString::convertAndAppendCodePoint(const int code_point, String & row_str) const +{ + if (code_point < 0 || code_point > 1114111) + throw DB::Exception( + DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Argument in function {} is out of range, should be between 0 and 1114111", + getName()); + + std::array buff; + const auto num_chars = UTF8::convertCodePointToUTF8(code_point, buff.data(), buff.size()); + row_str.append(buff.data(), num_chars); +} + +DataTypePtr FunctionKqlMakeString::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (const auto argument_count = std::ssize(arguments); argument_count < 1 || argument_count > 64) + throw DB::Exception( + DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be between 1 and 64", + getName(), + argument_count); + + const auto arg_it = std::ranges::find_if(arguments, [](const auto & argument) { + if (const auto * array_type = typeid_cast(argument.get())) + { + WhichDataType which(array_type->getNestedType()->getPtr()); + + return !which.isUInt() && !which.isInt() && !which.isNothing(); + } + return !WhichDataType(argument).isUInt() && !WhichDataType(argument).isInt(); + }); + + if (arg_it != arguments.cend()) + throw DB::Exception( + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments type argument # {} for function {} doesn't match: arguments should be integers int,long or a dynamic value holding " + "an array of " + "integral numbers", + std::distance(arguments.cbegin(), arg_it), + getName()); + + return std::make_shared(); +} + +ColumnPtr +FunctionKqlMakeString::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + auto out_col = ColumnString::create(); + + + for (size_t j = 0; j < input_rows_count; ++j) + { + String row_str; + for (size_t i = 0; i < arguments.size(); ++i) + { + if (WhichDataType(arguments[i].type).isArray()) + { + Field arr_field; + arguments[i].column->get(j, arr_field); + const auto len = arr_field.get().size(); + for (size_t k = 0; k < len; ++k) + { + const auto & val = arr_field.get().at(k); + const auto code_point = static_cast(val.get()); + convertAndAppendCodePoint(code_point, row_str); + } + } + else + { + const auto code_point = static_cast(arguments[i].column->getInt(j)); + convertAndAppendCodePoint(code_point, row_str); + } + } + out_col->insertData(row_str.c_str(), row_str.size()); + } + return out_col; +} + +REGISTER_FUNCTION(KqlMakeString) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlNot.cpp b/src/Functions/Kusto/kqlNot.cpp new file mode 100644 index 000000000000..abe56fef0a63 --- /dev/null +++ b/src/Functions/Kusto/kqlNot.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlNot : public IFunction +{ +public: + static constexpr auto name = "kql_not"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlNot(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlNot() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +DataTypePtr FunctionKqlNot::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (WhichDataType(*arguments[0]).isString() || WhichDataType(*arguments[0]).isArray()) + return makeNullable(std::make_shared()); + + if (!WhichDataType(*arguments[0]).isInt() && !WhichDataType(*arguments[0]).isUInt()) + throw DB::Exception( + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument type for function {} doesn't match: argument expected to be a boolean, integer or dynamic expression", + getName()); + + return DataTypeFactory::instance().get("Bool"); +} + +ColumnPtr FunctionKqlNot::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, const size_t input_rows_count) const +{ + const auto & elem = arguments[0]; + + WhichDataType arg_type(*elem.type); + + if (arg_type.isString() || arg_type.isArray()) + return makeNullable(DataTypeString().createColumnConst(input_rows_count, "NULL")); + + const ColumnsWithTypeAndName arg{arguments[0]}; + return executeFunctionCall(context, "not", arg, input_rows_count).first; +} + +REGISTER_FUNCTION(KqlNot) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlParseURL.cpp b/src/Functions/Kusto/kqlParseURL.cpp new file mode 100644 index 000000000000..ddb943dcd14c --- /dev/null +++ b/src/Functions/Kusto/kqlParseURL.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace x3 = boost::spirit::x3; + +namespace +{ +using x3::char_; +using x3::lexeme; +using x3::lit; + +struct KQLURLstate +{ + std::string schema; + std::string user; + std::string pass; + std::string host; + std::string port; + std::string path; + std::string frag; + std::vector> args; +}; + +const auto endschema = lit("://"); +const auto colon = lit(":"); +const auto at = lit("@"); +const auto slash = lit("/"); +const auto equals = lit("="); +const auto fragmark = lit("#"); +const auto openbracket = lit("["); +const auto closebracket = lit("]"); +const auto question = lit("?"); +const auto ampersand = lit("&"); + +const auto endhost = char_("/:?#"); +const auto endport = char_("/?#"); +const auto endauth = char_("@:/?#"); +const auto endpath = char_("?#"); +const auto endarg = char_("#&"); + +const auto set_schema = [](auto & ctx) { _val(ctx).schema = _attr(ctx); }; +const auto set_auth = [](auto & ctx) +{ + const auto & auth = _attr(ctx); + _val(ctx).user = at_c<0>(auth); + _val(ctx).pass = at_c<1>(auth); +}; +const auto set_host = [](auto & ctx) { _val(ctx).host = _attr(ctx); }; +const auto set_port = [](auto & ctx) { _val(ctx).port = _attr(ctx); }; +const auto set_path = [](auto & ctx) { _val(ctx).path = _attr(ctx); }; +const auto set_arg = [](auto & ctx) +{ + const auto & arg = _attr(ctx); + _val(ctx).args.emplace_back(at_c<0>(arg), at_c<1>(arg)); +}; +const auto set_frag = [](auto & ctx) { _val(ctx).frag = _attr(ctx); }; + +template +auto as = [](auto p) { return x3::rule{} = as_parser(p); }; + +const auto KQL_URL_SCHEMA_def = lexeme[+(char_ - endschema) >> endschema][set_schema]; +const auto KQL_URL_AUTH_def = lexeme[+(char_ - endauth) >> colon >> +(char_ - endauth) >> at][set_auth]; +const auto KQL_URL_HOST_def + = lexeme[as((openbracket >> +(char_ - closebracket) >> closebracket) | (+(char_ - endhost)))][set_host]; +const auto KQL_URL_PORT_def = lexeme[colon >> +(char_ - endport)][set_port]; +const auto KQL_URL_PATH_def = lexeme[&slash >> +(char_ - endpath)][set_path]; +const auto KQL_URL_ARG_def = lexeme[(question | ampersand) >> +(char_ - equals) >> equals >> +(char_ - endarg)][set_arg]; +const auto KQL_URL_FRAG_def = lexeme[fragmark >> +char_][set_frag]; + +const x3::rule KQL_URL = "KQL URL"; +const auto KQL_URL_def = KQL_URL_SCHEMA_def >> -KQL_URL_AUTH_def >> -KQL_URL_HOST_def >> -KQL_URL_PORT_def >> -KQL_URL_PATH_def + >> *KQL_URL_ARG_def >> -KQL_URL_FRAG_def; + +BOOST_SPIRIT_DEFINE(KQL_URL); +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlParseURL : public IFunction +{ +public: + static constexpr auto name = "kql_parseurl"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlParseURL(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlParseURL() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlParseURL::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + auto result = ColumnString::create(); + + if (!isStringOrFixedString(arguments.at(0).type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "parse_url(): argument #1 - invalid data type: string"); + + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto in_str = arguments[0].column->getDataAt(i).toView(); + KQLURLstate url; + parse(in_str.begin(), in_str.end(), KQL_URL, url); + bool first = false; + std::string args = "{"; + for (auto q_iter = url.args.begin(); q_iter < url.args.end(); ++q_iter) + { + args.append((first ? ",\"" : "\"") + q_iter->first + "\":\"" + q_iter->second + "\""); + first = true; + } + args.append("}"); + const auto out_str = std::format( + "{}\"Scheme\":\"{}\",\"Host\":\"{}\",\"Port\":\"{}\",\"Path\":\"{}\",\"Username\":\"{}\",\"Password\":\"{}\",\"Query " + "Parameters\":{},\"Fragment\":\"{}\"{}", + "{", + url.schema, + url.host, + url.port, + url.path, + url.user, + url.pass, + args, + url.frag, + "}"); + result->insertData(out_str.c_str(), out_str.size()); + } + return result; +} + +REGISTER_FUNCTION(KqlParseURL) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlToLong.cpp b/src/Functions/Kusto/kqlToLong.cpp new file mode 100644 index 000000000000..648eca3f57df --- /dev/null +++ b/src/Functions/Kusto/kqlToLong.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class FunctionKqlToLong : public IFunction +{ +public: + static constexpr auto name = "kql_tolong"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToLong(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToLong() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override + { + return std::make_shared(std::make_shared()); + } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlToLong::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + ColumnsWithTypeAndName firstarg{argument}; + WhichDataType which(*argument.type); + if (which.isDateTime64()) + { + const ColumnsWithTypeAndName todec_args{argument, createConstColumnWithTypeAndName(9, "precision")}; + const auto todecimal128 = executeFunctionCall(context, "toDecimal128", todec_args, input_rows_count); + //Seconds to microseconds + const ColumnsWithTypeAndName multiplier_args{ + asArgument(todecimal128, name), createConstColumnWithTypeAndName(10000000, "multplier")}; + const auto multiplied = executeFunctionCall(context, "multiply", multiplier_args, input_rows_count); + const ColumnsWithTypeAndName int_args{asArgument(multiplied, name)}; + const auto toint64 = executeFunctionCall(context, "toInt64", int_args, input_rows_count); + //ClickHouse is unix epoch. KQL is from year 0. print tolong(datetime('1970-01-01')); + const ColumnsWithTypeAndName plus_args{ + asArgument(toint64, name), createConstColumnWithTypeAndName(621355968000000000, "plus")}; + const auto plused = executeFunctionCall(context, "plus", plus_args, input_rows_count); + firstarg = {asArgument(plused, name)}; + } + const auto tostring = executeFunctionCall(context, "toString", firstarg, input_rows_count); + ColumnsWithTypeAndName toint_args{asArgument(tostring, "tostring")}; + const auto out = executeFunctionCall(context, "toInt64OrNull", toint_args, input_rows_count); + if (which.isInterval()) + { + const ColumnsWithTypeAndName div_args{asArgument(out, "int64"), createConstColumnWithTypeAndName(100, "intdiv")}; + return executeFunctionCall(context, "intDiv", div_args, input_rows_count).first; + } + return out.first; +} + +REGISTER_FUNCTION(KqlToLong) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlToString.cpp b/src/Functions/Kusto/kqlToString.cpp new file mode 100644 index 000000000000..c1eddc93de9d --- /dev/null +++ b/src/Functions/Kusto/kqlToString.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +class FunctionKqlToString : public IFunction +{ +public: + static constexpr auto name = "kql_tostring"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToString(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToString() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr +FunctionKqlToString::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + if (WhichDataType which_data_type(*argument.type); which_data_type.isInterval()) + { + static const auto TICKS_PER_DAY = ParserKQLTimespan::parse("1d").value(); + static const auto TICKS_PER_HOUR = ParserKQLTimespan::parse("1h").value(); + static const auto TICKS_PER_MINUTE = ParserKQLTimespan::parse("1m").value(); + static const auto TICKS_PER_SECOND = ParserKQLTimespan::parse("1s").value(); + + const auto & in_column = *argument.column; + auto out_column = ColumnString::create(); + auto & chars = out_column->getChars(); + auto & offsets = out_column->getOffsets(); + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto value = in_column.getInt(i); + const auto abs_ticks = std::abs(value / 100); + + std::string timespan_as_string = value < 0 ? "-" : ""; + if (abs_ticks >= TICKS_PER_DAY) + timespan_as_string.append(std::format("{}.", abs_ticks / TICKS_PER_DAY)); + + timespan_as_string.append(std::format( + "{:02}:{:02}:{:02}", + (abs_ticks / TICKS_PER_HOUR) % 24, + (abs_ticks / TICKS_PER_MINUTE) % 60, + (abs_ticks / TICKS_PER_SECOND) % 60)); + + if (const auto fractional_second = abs_ticks % TICKS_PER_SECOND) + timespan_as_string.append(std::format(".{:07}", fractional_second)); + + const auto chars_old_length = chars.size(); + const auto str_length_with_terminator = timespan_as_string.length() + 1; + chars.resize(chars.size() + str_length_with_terminator); + std::copy(timespan_as_string.c_str(), timespan_as_string.c_str() + str_length_with_terminator, chars.data() + chars_old_length); + offsets.push_back(chars.size()); + } + + return out_column; + } + else if (which_data_type.isDateOrDate32() || which_data_type.isDateTime() || which_data_type.isDateTime64()) + { + const ColumnsWithTypeAndName to_datetime64_args{argument, createConstColumnWithTypeAndName(7, "scale")}; + const auto as_datetime64 = executeFunctionCall(context, "toDateTime64", to_datetime64_args, input_rows_count); + + const ColumnsWithTypeAndName format_datetime_args + { + asArgument(as_datetime64, "as_datetime64"), createConstColumnWithTypeAndName("%FT%T.%fZ", "format_string") + }; + return executeFunctionCall(context, "formatDateTime", format_datetime_args, input_rows_count).first; + } + + return executeFunctionCall(context, "toString", arguments, input_rows_count).first; +} + +REGISTER_FUNCTION(KqlToString) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/Kusto/kqlToTimespan.cpp b/src/Functions/Kusto/kqlToTimespan.cpp new file mode 100644 index 000000000000..39923de9d187 --- /dev/null +++ b/src/Functions/Kusto/kqlToTimespan.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionKqlToTimespan : public IFunction +{ +public: + static constexpr auto name = "kql_totimespan"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionKqlToTimespan(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionKqlToTimespan() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + DataTypePtr getReturnTypeImpl(const DataTypes &) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionKqlToTimespan::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & argument = arguments.front(); + if (WhichDataType(*argument.type).isInterval()) + return wrapInNullable(argument.column, arguments, result_type, input_rows_count); + + const auto * in_column = typeid_cast(argument.column.get()); + if (!in_column) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected String", + argument.type->getName(), + getName()); + + auto out_column = result_type->createColumn(); + auto & out_column_as_nullable = assert_cast(*out_column); + auto & out_nested_column = assert_cast(out_column_as_nullable.getNestedColumn()); + + const auto size = in_column->size(); + auto & out_data = out_nested_column.getData(); + auto & out_null_map = out_column_as_nullable.getNullMapData(); + out_data.resize(size); + out_null_map.resize(size); + + const auto & in_chars = in_column->getChars(); + const auto & in_offsets = in_column->getOffsets(); + const auto * in_chars_data = reinterpret_cast(in_chars.data()); + size_t start = 0; + for (size_t i = 0; i < size; ++i) + { + const auto & offset = in_offsets[i]; + std::optional ticks; + const auto success = ParserKQLTimespan::tryParse({in_chars_data + start, offset - start - 1}, ticks); + out_data[i] = ticks.value_or(0) * 100; + out_null_map[i] = !ticks.has_value() || !success; + + start = offset; + } + + return out_column; +} + +DataTypePtr FunctionKqlToTimespan::getReturnTypeImpl(const DataTypes &) const +{ + return makeNullable(std::make_shared(IntervalKind::Nanosecond)); +} + +REGISTER_FUNCTION(KqlToTimespan) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp index 25ff6cc93d0a..5148bedd8c8c 100644 --- a/src/Functions/abs.cpp +++ b/src/Functions/abs.cpp @@ -10,6 +10,7 @@ template struct AbsImpl { using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfAbs::Type>; + static constexpr bool allow_interval = false; static constexpr bool allow_string_or_fixed_string = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 8efc51819197..fec84df41fa9 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -17,6 +17,7 @@ struct BitAndImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static constexpr bool allow_fixed_string = true; + static constexpr bool allow_interval = false; static constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp index 11c0c1d1b7d6..4e354b5e64ac 100644 --- a/src/Functions/bitBoolMaskAnd.cpp +++ b/src/Functions/bitBoolMaskAnd.cpp @@ -22,6 +22,7 @@ struct BitBoolMaskAndImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp index 7940bf3e2caa..42a69a4dc5eb 100644 --- a/src/Functions/bitBoolMaskOr.cpp +++ b/src/Functions/bitBoolMaskOr.cpp @@ -22,6 +22,7 @@ struct BitBoolMaskOrImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index 566a11481be9..d0548a86d48e 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -10,6 +10,7 @@ template struct BitCountImpl { using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>; + static constexpr bool allow_interval = false; static constexpr bool allow_string_or_fixed_string = true; static inline ResultType apply(A a) diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 2eaa397dd041..7de71bfe339b 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -9,6 +9,7 @@ struct BitHammingDistanceImpl { using ResultType = UInt8; static constexpr bool allow_fixed_string = true; + static constexpr bool allow_interval = false; static constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index 62ebdc7c52ad..573d7c71be14 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -17,6 +17,7 @@ template struct BitNotImpl { using ResultType = typename NumberTraits::ResultOfBitNot::Type; + static constexpr bool allow_interval = false; static constexpr bool allow_string_or_fixed_string = true; static inline ResultType NO_SANITIZE_UNDEFINED apply(A a) diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index 9e19fc552190..9021cc5613bd 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -16,6 +16,7 @@ struct BitOrImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static constexpr bool allow_fixed_string = true; + static constexpr bool allow_interval = false; static constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp index c72466b8d495..eb165dd73e23 100644 --- a/src/Functions/bitRotateLeft.cpp +++ b/src/Functions/bitRotateLeft.cpp @@ -17,6 +17,7 @@ struct BitRotateLeftImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp index 045758f9a311..d04d386ffea8 100644 --- a/src/Functions/bitRotateRight.cpp +++ b/src/Functions/bitRotateRight.cpp @@ -17,6 +17,7 @@ struct BitRotateRightImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index 7b3748edb5c9..13686a7ae5b3 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -17,6 +17,7 @@ struct BitShiftLeftImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = true; template diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 21a0f7584aa0..ccbd51d485dc 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -18,6 +18,7 @@ struct BitShiftRightImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = true; template diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp index d8957598c624..e3bf4aa5b256 100644 --- a/src/Functions/bitSwapLastTwo.cpp +++ b/src/Functions/bitSwapLastTwo.cpp @@ -19,7 +19,8 @@ template struct BitSwapLastTwoImpl { using ResultType = UInt8; - static constexpr const bool allow_string_or_fixed_string = false; + static constexpr bool allow_interval = false; + static constexpr bool allow_string_or_fixed_string = false; static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a) { diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index 4c9c6aa2dfb0..c7341323ba34 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -18,6 +18,7 @@ struct BitTestImpl { using ResultType = UInt8; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp index 99c06172c307..d8ecfbcfc046 100644 --- a/src/Functions/bitWrapperFunc.cpp +++ b/src/Functions/bitWrapperFunc.cpp @@ -19,7 +19,8 @@ template struct BitWrapperFuncImpl { using ResultType = UInt8; - static constexpr const bool allow_string_or_fixed_string = false; + static constexpr bool allow_interval = false; + static constexpr bool allow_string_or_fixed_string = false; static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]]) { diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp index 78c4c64d06ec..984754d06051 100644 --- a/src/Functions/bitXor.cpp +++ b/src/Functions/bitXor.cpp @@ -15,7 +15,8 @@ template struct BitXorImpl { using ResultType = typename NumberTraits::ResultOfBit::Type; - static constexpr bool allow_fixed_string = true; + static const constexpr bool allow_fixed_string = true; + static const constexpr bool allow_interval = false; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/dateTime64Diff.cpp b/src/Functions/dateTime64Diff.cpp new file mode 100644 index 000000000000..f03bd189d042 --- /dev/null +++ b/src/Functions/dateTime64Diff.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +class FunctionDateTime64Diff : public IFunction +{ + using ColumnDateTime64 = ColumnDecimal; + +public: + static constexpr auto name = "dateTime64Diff"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(std::move(context)); } + + explicit FunctionDateTime64Diff(ContextPtr context_) : context(std::move(context_)) { } + ~FunctionDateTime64Diff() override = default; + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override; + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + +private: + ContextPtr context; +}; + +ColumnPtr FunctionDateTime64Diff::executeImpl( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t input_rows_count) const +{ + const auto & lhs_arg = arguments.front(); + const auto & rhs_arg = arguments.back(); + const auto * lhs_type = checkAndGetDataType(lhs_arg.type.get()); + const auto * rhs_type = checkAndGetDataType(rhs_arg.type.get()); + if (!lhs_type || !rhs_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unexpected arguments of function {}", getName()); + + const auto common_scale = std::max(lhs_type->getScale(), rhs_type->getScale()); + const auto scale_arg = createConstColumnWithTypeAndName(common_scale, "scale"); + const auto convert_to_decimal = [this, &input_rows_count, &scale_arg](const ColumnWithTypeAndName & argument) + { + const ColumnsWithTypeAndName cast_args{argument, scale_arg}; + return executeFunctionCall(context, "toDecimal64", cast_args, input_rows_count); + }; + + const auto lhs_arg_as_decimal = convert_to_decimal(lhs_arg); + const auto rhs_arg_as_decimal = convert_to_decimal(rhs_arg); + const ColumnsWithTypeAndName subtraction_args{asArgument(lhs_arg_as_decimal, "lhs"), asArgument(rhs_arg_as_decimal, "rhs")}; + const auto difference = executeFunctionCall(context, "minus", subtraction_args, input_rows_count); + + const ColumnsWithTypeAndName to_decimal128_args{asArgument(difference, "difference"), scale_arg}; + const auto as_decimal128 = executeFunctionCall(context, "toDecimal128", to_decimal128_args, input_rows_count); + + const ColumnsWithTypeAndName scale_args{ + asArgument(as_decimal128, "difference"), createConstColumnWithTypeAndName(1'000'000'000, "multiplier")}; + const auto scaled = executeFunctionCall(context, "multiply", scale_args, input_rows_count); + + const ColumnsWithTypeAndName to_int64_args{asArgument(scaled, "scaled")}; + const auto as_int64 = executeFunctionCall(context, "toInt64", to_int64_args, input_rows_count); + + const ColumnsWithTypeAndName cast_args{ + asArgument(as_int64, "as_int64"), createConstColumnWithTypeAndName(result_type->getName(), "target_type")}; + return executeFunctionCall(context, "cast", cast_args, input_rows_count).first; +} + +DataTypePtr FunctionDateTime64Diff::getReturnTypeImpl(const DataTypes & arguments) const +{ + const auto & lhs = arguments.front(); + const auto & rhs = arguments.back(); + if (!WhichDataType(*lhs).isDateTime64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected DateTime64", + lhs->getName(), + getName()); + + if (!WhichDataType(*rhs).isDateTime64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected DateTime64", + rhs->getName(), + getName()); + + return std::make_shared(IntervalKind::Nanosecond); +} + +REGISTER_FUNCTION(DateTime64Diff) +{ + factory.registerFunction(); +} +} diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp index ca552256cd16..2d35e9017f36 100644 --- a/src/Functions/divide.cpp +++ b/src/Functions/divide.cpp @@ -13,6 +13,7 @@ struct DivideFloatingImpl { using ResultType = typename NumberTraits::ResultOfFloatingPointDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/extract.cpp b/src/Functions/extract.cpp index 74c5a2fdd366..24aa95d2b127 100644 --- a/src/Functions/extract.cpp +++ b/src/Functions/extract.cpp @@ -70,4 +70,63 @@ REGISTER_FUNCTION(Extract) factory.registerFunction(); } +struct KqlExtractImpl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + const std::string & pattern, + unsigned capture, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + res_data.reserve(data.size() / 5); + res_offsets.resize(offsets.size()); + + const Regexps::Regexp regexp = Regexps::createRegexp(pattern); + + OptimizedRegularExpression::MatchVec matches; + matches.reserve(capture + 1); + size_t prev_offset = 0; + size_t res_offset = 0; + + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t cur_offset = offsets[i]; + + unsigned count + = regexp.match(reinterpret_cast(&data[prev_offset]), cur_offset - prev_offset - 1, matches, capture + 1); + if (count > capture && matches[capture].offset != std::string::npos) + { + const auto & match = matches[capture]; + res_data.resize(res_offset + match.length + 1); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + match.offset], match.length); + res_offset += match.length; + } + else + { + res_data.resize(res_offset + 1); + } + + res_data[res_offset] = 0; + ++res_offset; + res_offsets[i] = res_offset; + + prev_offset = cur_offset; + } + } +}; + +struct NameKqlExtract +{ + static constexpr auto name = "kql_extract"; +}; + +using FunctionKqlExtract = KqlStringSearchToString; + +REGISTER_FUNCTION(KqlExtract) +{ + factory.registerFunction(); +} + } diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp index b814e8198e68..467dfc8b7d96 100644 --- a/src/Functions/factorial.cpp +++ b/src/Functions/factorial.cpp @@ -17,6 +17,7 @@ struct FactorialImpl { using ResultType = UInt64; static const constexpr bool allow_decimal = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_or_fixed_string = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp index 93fd7e24853f..a9d4637884af 100644 --- a/src/Functions/greatest.cpp +++ b/src/Functions/greatest.cpp @@ -12,6 +12,7 @@ struct GreatestBaseImpl { using ResultType = NumberTraits::ResultOfGreatest; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -43,6 +44,7 @@ struct GreatestSpecialImpl { using ResultType = make_unsigned_t; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp index 96ff6ea80fc4..708186dadfb4 100644 --- a/src/Functions/intDivOrZero.cpp +++ b/src/Functions/intDivOrZero.cpp @@ -10,6 +10,7 @@ struct DivideIntegralOrZeroImpl { using ResultType = typename NumberTraits::ResultOfIntegerDivision::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 6944c4701bcb..7611ead5213b 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -17,7 +17,8 @@ template struct IntExp10Impl { using ResultType = UInt64; - static constexpr const bool allow_string_or_fixed_string = false; + static constexpr bool allow_interval = false; + static constexpr bool allow_string_or_fixed_string = false; static inline ResultType apply([[maybe_unused]] A a) { diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index 4e5cc60a731c..dd3fff4ee3c5 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -18,6 +18,7 @@ template struct IntExp2Impl { using ResultType = UInt64; + static constexpr bool allow_interval = false; static constexpr bool allow_string_or_fixed_string = false; static inline ResultType apply([[maybe_unused]] A a) diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp index f5680d4d468f..2de1805dc2cc 100644 --- a/src/Functions/least.cpp +++ b/src/Functions/least.cpp @@ -12,6 +12,7 @@ struct LeastBaseImpl { using ResultType = NumberTraits::ResultOfLeast; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -43,6 +44,7 @@ struct LeastSpecialImpl { using ResultType = std::make_signed_t; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 04877a42b18d..32e8e053df67 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -10,6 +10,7 @@ struct MinusImpl { using ResultType = typename NumberTraits::ResultOfSubtraction::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp index 3551ae74c5f1..dae58a7f7dd1 100644 --- a/src/Functions/moduloOrZero.cpp +++ b/src/Functions/moduloOrZero.cpp @@ -4,14 +4,12 @@ namespace DB { -namespace -{ - template struct ModuloOrZeroImpl { using ResultType = typename NumberTraits::ResultOfModulo::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template @@ -39,8 +37,6 @@ struct ModuloOrZeroImpl struct NameModuloOrZero { static constexpr auto name = "moduloOrZero"; }; using FunctionModuloOrZero = BinaryArithmeticOverloadResolver; -} - REGISTER_FUNCTION(ModuloOrZero) { factory.registerFunction(); diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp index 4dc8cd10f317..535c0a1758de 100644 --- a/src/Functions/multiply.cpp +++ b/src/Functions/multiply.cpp @@ -11,6 +11,7 @@ struct MultiplyImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; template diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp index bd47780dea8f..d0b8b351a378 100644 --- a/src/Functions/negate.cpp +++ b/src/Functions/negate.cpp @@ -9,7 +9,8 @@ template struct NegateImpl { using ResultType = std::conditional_t, A, typename NumberTraits::ResultOfNegate::Type>; - static constexpr const bool allow_string_or_fixed_string = false; + static constexpr bool allow_interval = true; + static constexpr bool allow_string_or_fixed_string = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) { diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index cd9cf6cec5c3..4f792e569ce8 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -10,6 +10,7 @@ struct PlusImpl { using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; static const constexpr bool allow_fixed_string = false; + static const constexpr bool allow_interval = true; static const constexpr bool allow_string_integer = false; static const constexpr bool is_commutative = true; diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp index cca92c19b0cb..bb71f3173cce 100644 --- a/src/Functions/roundAge.cpp +++ b/src/Functions/roundAge.cpp @@ -10,7 +10,8 @@ template struct RoundAgeImpl { using ResultType = UInt8; - static constexpr const bool allow_string_or_fixed_string = false; + static constexpr bool allow_interval = false; + static constexpr bool allow_string_or_fixed_string = false; static inline ResultType apply(A x) { diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp index 918f0b3425db..9c24231fec59 100644 --- a/src/Functions/roundDuration.cpp +++ b/src/Functions/roundDuration.cpp @@ -10,6 +10,7 @@ template struct RoundDurationImpl { using ResultType = UInt16; + static constexpr bool allow_interval = false; static constexpr bool allow_string_or_fixed_string = false; static inline ResultType apply(A x) diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index 607c67b742e5..0ed740e20c09 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -63,7 +63,8 @@ template struct RoundToExp2Impl { using ResultType = T; - static constexpr const bool allow_string_or_fixed_string = false; + static constexpr bool allow_interval = false; + static constexpr bool allow_string_or_fixed_string = false; static inline T apply(T x) { diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp index 6c849760eed0..e8d1a937f8d9 100644 --- a/src/Functions/sign.cpp +++ b/src/Functions/sign.cpp @@ -9,6 +9,7 @@ template struct SignImpl { using ResultType = Int8; + static constexpr bool allow_interval = true; static constexpr bool allow_string_or_fixed_string = false; static inline NO_SANITIZE_UNDEFINED ResultType apply(A a) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 48bf88cb14c3..5803d520de94 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -22,12 +23,17 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; extern const int DECIMAL_OVERFLOW; + extern const int LOGICAL_ERROR; } namespace { - constexpr auto function_name = "toStartOfInterval"; + enum class ExecutionErrorPolicy + { + Null, + Throw + }; template struct Transform; @@ -35,22 +41,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } @@ -59,22 +65,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } @@ -83,22 +89,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } @@ -107,22 +113,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } @@ -131,22 +137,22 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } - static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } - static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64, const char*) { return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } - static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } @@ -155,16 +161,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfHourInterval(t, hours); } - static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } @@ -173,16 +179,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } @@ -191,16 +197,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64, const char*) { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier, const char*) { return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } @@ -209,13 +215,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000) { @@ -246,13 +252,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000000) { @@ -283,13 +289,13 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { throwDateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateIsNotSupported(function_name); } - static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { throwDateTimeIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64, const char* function_name) { throwDateTimeIsNotSupported(function_name); } - static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier, const char*) { if (scale_multiplier < 1000000000) { @@ -309,12 +315,23 @@ namespace } }; +template class FunctionToStartOfInterval : public IFunction { public: static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static constexpr auto name = function_name; + static constexpr auto name = std::invoke( + [] + { + if (execution_error_policy == ExecutionErrorPolicy::Null) + return "toStartOfIntervalOrNull"; + else if (execution_error_policy == ExecutionErrorPolicy::Throw) + return "toStartOfInterval"; + + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unhandled execution policy"); + }); + String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -380,36 +397,43 @@ class FunctionToStartOfInterval : public IFunction getName(), arguments.size()); } - if (result_type_is_date) - return std::make_shared(); - else if (result_type_is_datetime) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); - else - { - auto scale = 0; + auto return_type = std::invoke( + [&arguments, &interval_type, &result_type_is_date, &result_type_is_datetime]() -> std::shared_ptr + { + if (result_type_is_date) + return std::make_shared(); + else if (result_type_is_datetime) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + else + { + auto scale = 0; - if (interval_type->getKind() == IntervalKind::Nanosecond) - scale = 9; - else if (interval_type->getKind() == IntervalKind::Microsecond) - scale = 6; - else if (interval_type->getKind() == IntervalKind::Millisecond) - scale = 3; + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); - } + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false)); + } + }); + + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return makeNullable(return_type); + return return_type; } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const size_t) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); - return result_column; + return dispatchForColumns(time_column, interval_column, result_type, time_zone); } bool hasInformationAboutMonotonicity() const override @@ -469,13 +493,12 @@ class FunctionToStartOfInterval : public IFunction throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column for second argument of function {}, must be a const interval of time.", - getName()); - Int64 num_units = interval_column_const_int64->getValue(); - if (num_units <= 0) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be a const interval of time.", + getName()); + const auto num_units = interval_column_const_int64->getValue(); switch (interval_type->getKind()) { case IntervalKind::Nanosecond: @@ -512,18 +535,54 @@ class FunctionToStartOfInterval : public IFunction using ToFieldType = typename ToDataType::FieldType; const auto & time_data = time_column_type.getData(); - size_t size = time_data.size(); + const auto size = time_data.size(); auto result_col = result_type->createColumn(); - auto *col_to = assert_cast(result_col.get()); - auto & result_data = col_to->getData(); - result_data.resize(size); + auto [result_null_map_data, result_value_data] = std::invoke( + [&result_col]() -> std::pair + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + { + auto & nullable_column = assert_cast(*result_col); + auto & nested_column = assert_cast(nullable_column.getNestedColumn()); + return {&nullable_column.getNullMapData(), nested_column.getData()}; + } + else if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + { + auto & target_column = assert_cast(*result_col); + return {nullptr, target_column.getData()}; + } + }); + + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + result_null_map_data->resize(size, true); + + result_value_data.resize(size); + if (num_units <= 0) + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + return result_col; + else if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); + } - Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + const auto scale_multiplier = DecimalUtils::scaleMultiplier(scale); for (size_t i = 0; i != size; ++i) - result_data[i] = static_cast( - Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); + { + try + { + result_value_data[i] + = static_cast(Transform::execute(time_data[i], num_units, time_zone, scale_multiplier, name)); + if constexpr (execution_error_policy == ExecutionErrorPolicy::Null) + (*result_null_map_data)[i] = false; + } + catch (...) + { + if constexpr (execution_error_policy == ExecutionErrorPolicy::Throw) + throw; + } + } return result_col; } @@ -533,7 +592,8 @@ class FunctionToStartOfInterval : public IFunction REGISTER_FUNCTION(ToStartOfInterval) { - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 56b81b3d224e..9e5232de78ea 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -196,8 +196,8 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data) { if (func_node->tryGetQueryArgument()) { - if (func_node->name != "view") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Query argument can only be used in the `view` TableFunction"); + if (func_node->name != "getschema" && func_node->name != "view") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Query argument can only be used in the `getschema` and `view` table functions"); /// Don't go into query argument. return; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 688d3b9967d4..13a57aa6a65a 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -697,8 +697,7 @@ static std::tuple executeQueryImpl( { if (settings.dialect == Dialect::kusto && !internal) { - ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); - + ParserKQLStatement parser; /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index d74137f8a914..501596b3a3c6 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,9 +4,10 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./Kusto/KustoFunctions) add_headers_and_sources(clickhouse_parsers ./PRQL) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) -target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access ch_contrib::roaring string_utils) if (TARGET ch_rust::prql) target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) endif () diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 0149526da79c..a77ba7cb60e8 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -42,7 +42,7 @@ #include #include - +#include namespace DB { @@ -105,62 +105,70 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserSelectWithUnionQuery select; ParserExplainQuery explain; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - ASTPtr result_node = nullptr; + ParserKeyword s_kql("KQL"); - if (ASTPtr select_node; select.parse(pos, select_node, expected)) + if (s_kql.ignore(pos, expected)) { - result_node = std::move(select_node); + if (KQLContext kql_context; !ParserKQLTableFunction(kql_context).parse(pos, result_node, expected)) + return false; } - else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + else { - const auto & explain_query = explain_node->as(); + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; - if (explain_query.getTableFunction() || explain_query.getTableOverride()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); + if (ASTPtr select_node; select.parse(pos, select_node, expected)) + { + result_node = std::move(select_node); + } + else if (ASTPtr explain_node; explain.parse(pos, explain_node, expected)) + { + const auto & explain_query = explain_node->as(); - /// Replace subquery `(EXPLAIN SELECT ...)` - /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` + if (explain_query.getTableFunction() || explain_query.getTableOverride()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); - String kind_str = ASTExplainQuery::toString(explain_query.getKind()); + /// Replace subquery `(EXPLAIN SELECT ...)` + /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` - String settings_str; - if (ASTPtr settings_ast = explain_query.getSettings()) - { - if (!settings_ast->as()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN settings must be a SET query"); - settings_str = queryToString(settings_ast); - } + String kind_str = ASTExplainQuery::toString(explain_query.getKind()); - const ASTPtr & explained_ast = explain_query.getExplainedQuery(); - if (explained_ast) - { - auto view_explain = makeASTFunction("viewExplain", - std::make_shared(kind_str), - std::make_shared(settings_str), - explained_ast); - result_node = buildSelectFromTableFunction(view_explain); + String settings_str; + if (ASTPtr settings_ast = explain_query.getSettings()) + { + if (!settings_ast->as()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN settings must be a SET query"); + settings_str = queryToString(settings_ast); + } + + const ASTPtr & explained_ast = explain_query.getExplainedQuery(); + if (explained_ast) + { + auto view_explain = makeASTFunction("viewExplain", + std::make_shared(kind_str), + std::make_shared(settings_str), + explained_ast); + result_node = buildSelectFromTableFunction(view_explain); + } + else + { + auto view_explain = makeASTFunction("viewExplain", + std::make_shared(kind_str), + std::make_shared(settings_str)); + result_node = buildSelectFromTableFunction(view_explain); + } } else { - auto view_explain = makeASTFunction("viewExplain", - std::make_shared(kind_str), - std::make_shared(settings_str)); - result_node = buildSelectFromTableFunction(view_explain); + return false; } - } - else - { - return false; - } - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + } node = std::make_shared(); node->children.push_back(result_node); @@ -170,6 +178,16 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + /// 'kql(' is used for subuquery in Kusto, should not be treated as an identifier if kql followed by ( + ParserKeyword s_kql("KQL"); + if (s_kql.ignore(pos, expected)) + { + if (pos->type == TokenType::OpeningRoundBracket) + { --pos; + return false; + } + --pos; + } /// Identifier in backquotes or in double quotes if (pos->type == TokenType::QuotedIdentifier) { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index cd3995310647..12221868f131 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -662,6 +662,26 @@ class Layer } else { + /// enable using subscript operator for kql_array_sort + if (cur_op.function_name == "arrayElement" && !operands.empty()) + { + auto* first_arg_as_node = operands.front()->as(); + if (first_arg_as_node) + { + if (first_arg_as_node->name == "kql_array_sort_asc" || first_arg_as_node->name == "kql_array_sort_desc") + { + cur_op.function_name = "tupleElement"; + cur_op.type = OperatorType::TupleElement; + } + else if (first_arg_as_node->name == "arrayElement" && !first_arg_as_node->arguments->children.empty()) + { + auto *arg_inside = first_arg_as_node->arguments->children[0]->as(); + if (arg_inside && (arg_inside->name == "kql_array_sort_asc" || arg_inside->name == "kql_array_sort_desc")) + first_arg_as_node->name = "tupleElement"; + } + } + } + function = makeASTFunction(cur_op); if (!popLastNOperands(function->children[0]->children, cur_op.arity)) @@ -2082,11 +2102,11 @@ class CaseLayer : public Layer bool has_case_expr; }; -/// Layer for table function 'view' and 'viewIfPermitted' +/// Layer for table function `schema`, 'view' and 'viewIfPermitted' class ViewLayer : public Layer { public: - explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} + explicit ViewLayer(std::string function_name_lowercase_) : function_name_lowercase(std::move(function_name_lowercase_)) {} bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override { @@ -2114,7 +2134,7 @@ class ViewLayer : public Layer pushResult(query); - if (!if_permitted) + if (function_name_lowercase != "viewifpermitted") { if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) return false; @@ -2147,16 +2167,13 @@ class ViewLayer : public Layer protected: bool getResultImpl(ASTPtr & node) override { - if (if_permitted) - node = makeASTFunction("viewIfPermitted", std::move(elements)); - else - node = makeASTFunction("view", std::move(elements)); - + const auto function_name = function_name_lowercase == "viewifpermitted" ? "viewIfPermitted" : function_name_lowercase; + node = makeASTFunction(function_name, std::move(elements)); return true; } private: - bool if_permitted; + std::string function_name_lowercase; }; @@ -2186,16 +2203,12 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio /// SUBSTRING(x FROM a) /// SUBSTRING(x FROM a FOR b) - String function_name = getIdentifierName(identifier); - String function_name_lowercase = Poco::toLower(function_name); + const auto function_name = getIdentifierName(identifier); + const auto function_name_lowercase = Poco::toLower(function_name); - if (is_table_function) - { - if (function_name_lowercase == "view") - return std::make_unique(false); - else if (function_name_lowercase == "viewifpermitted") - return std::make_unique(true); - } + if (is_table_function + && (function_name_lowercase == "getschema" || function_name_lowercase == "view" || function_name_lowercase == "viewifpermitted")) + return std::make_unique(function_name_lowercase); if (function_name == "tuple") return std::make_unique(); @@ -2584,8 +2597,9 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else if (pos->type == TokenType::OpeningRoundBracket) + else if (pos->type == TokenType::OpeningRoundBracket || String(pos->begin , pos->end) == "kql") { + if (subquery_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); diff --git a/src/Parsers/IParserBase.h b/src/Parsers/IParserBase.h index 46f0e672e0d8..14a0b8d43fc9 100644 --- a/src/Parsers/IParserBase.h +++ b/src/Parsers/IParserBase.h @@ -11,6 +11,8 @@ namespace DB class IParserBase : public IParser { public: + ~IParserBase() override = default; + template ALWAYS_INLINE static bool wrapParseImpl(Pos & pos, const F & func) { diff --git a/src/Parsers/Kusto/KQLContext.cpp b/src/Parsers/Kusto/KQLContext.cpp new file mode 100644 index 000000000000..95d015091ab9 --- /dev/null +++ b/src/Parsers/Kusto/KQLContext.cpp @@ -0,0 +1,43 @@ +#include "KQLContext.h" + +#include + +namespace +{ +constexpr std::string_view ANONYMOUS_COLUMN_NAME = "Column"; +} + +namespace DB +{ +void KQLContext::checkForDefaultColumnName(const std::string & column_name) +{ + if (!column_name.starts_with(ANONYMOUS_COLUMN_NAME)) + return; + + try + { + size_t pos; + const auto column_ordinal = std::stoi(column_name.substr(ANONYMOUS_COLUMN_NAME.length()), &pos); + if (column_ordinal >= next_column_ordinal && column_name.length() == pos + ANONYMOUS_COLUMN_NAME.length()) + taken_upcoming_column_ordinals.insert(column_ordinal); + } + catch (...) + { + // if conversion to integer failed, we can just carry on + } +} + +std::string KQLContext::nextDefaultColumnName() +{ + while (taken_upcoming_column_ordinals.contains(next_column_ordinal)) + { + taken_upcoming_column_ordinals.erase(next_column_ordinal); + ++next_column_ordinal; + } + + const auto column_ordinal = next_column_ordinal; + ++next_column_ordinal; + + return std::format("{}{}", ANONYMOUS_COLUMN_NAME, column_ordinal); +} +} diff --git a/src/Parsers/Kusto/KQLContext.h b/src/Parsers/Kusto/KQLContext.h new file mode 100644 index 000000000000..dce543687cbc --- /dev/null +++ b/src/Parsers/Kusto/KQLContext.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ +class KQLContext +{ +public: + void checkForDefaultColumnName(const std::string & column_name); + std::string nextDefaultColumnName(); + +private: + int next_column_ordinal = 1; + std::unordered_set taken_upcoming_column_ordinals; +}; +} diff --git a/src/Parsers/Kusto/KQLDataType.cpp b/src/Parsers/Kusto/KQLDataType.cpp new file mode 100644 index 000000000000..97f76b1c39af --- /dev/null +++ b/src/Parsers/Kusto/KQLDataType.cpp @@ -0,0 +1,104 @@ +#include "KQLDataType.h" + +#include + +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int UNKNOWN_TYPE; +} + +namespace +{ +class KQLScopeToType +{ +public: + explicit KQLScopeToType(DB::KQLDataType column_type_) : KQLScopeToType(column_type_, column_type_) { } + KQLScopeToType(DB::KQLDataType column_type_, DB::KQLDataType row_type_) : column_type(column_type_), row_type(row_type_) { } + + DB::KQLDataType getType(DB::KQLScope scope) const; + +private: + DB::KQLDataType column_type; + DB::KQLDataType row_type; +}; + +DB::KQLDataType KQLScopeToType::getType(const DB::KQLScope scope) const +{ + if (scope == DB::KQLScope::Column) + return column_type; + else if (scope == DB::KQLScope::Row) + return row_type; + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unexpected KQL scope: {}", magic_enum::enum_name(scope)); +} + +const std::unordered_map CLICKHOUSE_TO_KQL_TYPE{ + {DB::TypeIndex::AggregateFunction, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Array, KQLScopeToType(DB::KQLDataType::Dynamic, DB::KQLDataType::Array)}, + {DB::TypeIndex::Date, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Date32, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::DateTime, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::DateTime64, KQLScopeToType(DB::KQLDataType::DateTime)}, + {DB::TypeIndex::Decimal32, KQLScopeToType(DB::KQLDataType::Decimal)}, + {DB::TypeIndex::Decimal64, KQLScopeToType(DB::KQLDataType::Decimal)}, + {DB::TypeIndex::Decimal128, KQLScopeToType(DB::KQLDataType::Decimal)}, + {DB::TypeIndex::Decimal256, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Enum16, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Enum8, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::FixedString, KQLScopeToType(DB::KQLDataType::String)}, + {DB::TypeIndex::Float32, KQLScopeToType(DB::KQLDataType::Real)}, + {DB::TypeIndex::Float64, KQLScopeToType(DB::KQLDataType::Real)}, + {DB::TypeIndex::Function, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Int8, KQLScopeToType(DB::KQLDataType::Int)}, + {DB::TypeIndex::Int16, KQLScopeToType(DB::KQLDataType::Int)}, + {DB::TypeIndex::Int32, KQLScopeToType(DB::KQLDataType::Int)}, + {DB::TypeIndex::Int64, KQLScopeToType(DB::KQLDataType::Long)}, + {DB::TypeIndex::Int128, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Int256, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Interval, KQLScopeToType(DB::KQLDataType::Timespan)}, + {DB::TypeIndex::IPv4, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::IPv6, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::LowCardinality, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Map, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Nothing, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Nullable, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Object, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::Set, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::String, KQLScopeToType(DB::KQLDataType::String)}, + {DB::TypeIndex::Tuple, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::UInt8, KQLScopeToType(DB::KQLDataType::Int)}, + {DB::TypeIndex::UInt16, KQLScopeToType(DB::KQLDataType::Int)}, + {DB::TypeIndex::UInt32, KQLScopeToType(DB::KQLDataType::Int)}, + {DB::TypeIndex::UInt64, KQLScopeToType(DB::KQLDataType::Long)}, + {DB::TypeIndex::UInt128, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::UInt256, KQLScopeToType(DB::KQLDataType::Invalid)}, + {DB::TypeIndex::UUID, KQLScopeToType(DB::KQLDataType::Guid)}}; +} + +namespace DB +{ +KQLDataType toKQLDataType(const TypeIndex type_id, const KQLScope scope) +{ + const auto it = CLICKHOUSE_TO_KQL_TYPE.find(type_id); + if (it == CLICKHOUSE_TO_KQL_TYPE.cend()) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unable to map {} to a KQL type", magic_enum::enum_name(type_id)); + + return it->second.getType(scope); +} + +std::string toString(const KQLDataType data_type) +{ + if (data_type == KQLDataType::Invalid) + return "n/a"; + + const auto data_type_name = magic_enum::enum_name(data_type); + std::string str(data_type_name.data(), data_type_name.length()); + std::ranges::transform(str, str.begin(), [](const unsigned char c) { return static_cast(std::tolower(c)); }); + return str; +} +} diff --git a/src/Parsers/Kusto/KQLDataType.h b/src/Parsers/Kusto/KQLDataType.h new file mode 100644 index 000000000000..b100fbbaaba2 --- /dev/null +++ b/src/Parsers/Kusto/KQLDataType.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +#include + +namespace DB +{ +enum class KQLDataType +{ + Array, + Bool, + DateTime, + Decimal, + Dictionary, + Dynamic, + Guid, + Int, + Invalid, + Long, + Real, + String, + Timespan +}; + +enum class KQLScope +{ + Column, + Row +}; + +KQLDataType toKQLDataType(TypeIndex type_id, KQLScope scope); +std::string toString(KQLDataType data_type); +} diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md new file mode 100644 index 000000000000..56d195fb1842 --- /dev/null +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -0,0 +1,1700 @@ +## KQL implemented features +# July, 2023 +## Features +- Enable kql table function support both heredoc and regular format +## Bugfixes + - Fix printing datetime throws exceptions + - Fix dcount, dcountif aren't using HyperLogLog + - Fix base64 decode to string should return null for invalid encoded values + - Fix wildcard to Regex is not correct in the project-away operator +# June, 2023 +## Bugfixes + - Fix core dump when table is missing in pipeline + - Corrected issues related to fractional seconds in tolong, datetime_add, datetime_part and datetime_diff. + - Corrected an issue with timezone conversion in make_datetime. + - Corrected an issue with hash_sha256 when an empty string is used. + - Corrected an issue with extract_all not accepting the capture group argument. + +# May, 2023 +## New Features +- Save intervals to tables + ``` + INSERT INTO t1 WITH toDateTime64('2023-01-01 00:00:00.000000001', 9, 'US/Eastern') AS c + SELECT toMinute(c + toIntervalSecond(number * 60)) + FROM numbers(2) + so , enabled the kql timespane to be stored: + CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(print days=2d*3 | take 1) + ``` +- Add ability to ignore skippings indexes for a query : + ``` + SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx'; + ``` +## Bugfixes + - Fix core dump when table is missing in pipeline +- Corrected an issue with `countof` operator where plain string matches were not correctly counting overlapping strings. + - produce an exception if `arg_max` or `arg_min` have fewer than 2 arguments + - core dump if there is no closing parenthesis in kql function + + +# April, 2023 +## Bugfixes + - Corrected an issue with parse_url in which hostnames and port numbers were not correctly parsed. + ``` + parse_url follows the folowing structure. + + Scheme://Username:Password@Host:Port/Path?QueryParameters#Fragment + + '://' is required for further parsing. + All other fields are optional and are parsed from left to right. + Username and Password are parsed together, require ':' and '@', and will not match if either contains '/', '?', or '#'. + IPv6 addresses are required to be encapsulated in brackets. + Host ends with '/', ':', '?' or '#'. + Port starts with ':' and ends with '/', '?' or '#'. + Path requires to start with '/' and ends with '?' or '#'. + Query Parameters is recursive, starts with '?', ends with '#', expected to be in the form of argument=value, and separated by '&'. + Fragment must start with '#'. + + Notes on differences between ADX and ClickHouse: + + ClickHouse will return a formated string. 'extract_json' can be used to convert the string. + print x = parse_url("http://[2001:0db8:0000:0000:0000:ff00:0042:8329]?asd=qwe&qwe=asd") | project extract_json("$.Scheme", x); + ClickHouse includes Path as '/' where ADX requires anything after '/' to populate Path. + print parse_url("http://host:1234/"); + ClickHouse includes Port where ADX requires '/' for Port and without '/' will treat Port as part of Host. + print parse_url("http://host:1234?arg=value") + ClickHouse includes arg value in Query parameters where ADX treats this as host. + print parse_url("http://?arg=value"); + ClickHouse will not parse IPv6 addresses not encapsulated in brackets [RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2) + Correct IPv6 + print parse_url("http://[2001:db8:3333:4444:5555:6666:7777:8888]:1234/filepath/index.htm") + Incorrect IPv6 + print parse_url("http://2001:db8:3333:4444:5555:6666:7777:8888:1234/filepath/index.htm"); + print parse_url("http://2001:db8:3333:4444:5555:6666:7777:8888/filepath/index.htm"); + ADX will incorrectly consume part of encapsulated IPv6 Host as Port from last colon to '/'. + print parse_url("http://[2001:db8:3333:4444:5555:6666:7777:8888]/filepath/index.htm") + ``` + +## Aggregate Functions +- [hll](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/hll-aggfunction) + `Customers | summarize x = hll(Education), y = hll(Occupation);` + + Current implementation of this function seeks to reuse ClickHouse's [`uniqCombined64`](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64), which has different intermediate values when compared to KQL. + + Please note that only accuracy level 4 is implemented, which becomes the default instead of 0. +- [hll_merge](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/hll-merge-aggfunction) + `Customers | summarize x = hll(Education), y = hll(Occupation) | summarize xy = hll_merge(x, y);` + + Current implementation of this function seeks to reuse ClickHouse's [`uniqCombined64`](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64), which has different intermediate values when compared to KQL. + + Please note that only accuracy level 4 is implemented, which becomes the default instead of 0. + +## Functions +- [dcount_hll](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/dcount-hllfunction) + `Customers | summarize x = hll(Education), y = hll(Occupation) | project xy = hll_merge(x, y) | project dcount_hll(xy);` + + Current implementation of this function seeks to reuse ClickHouse's [`uniqCombined64`](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64), which has different intermediate values when compared to KQL. + + Please note that only accuracy level 4 is implemented, which becomes the default instead of 0. +- [hll_merge](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/hllmergefunction) + `Customers | summarize x = hll(Education), y = hll(Occupation) | project xy = hll_merge(x, y);` + + Current implementation of this function seeks to reuse ClickHouse's [`uniqCombined64`](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64), which has different intermediate values when compared to KQL. + + Please note that only accuracy level 4 is implemented, which becomes the default instead of 0. +- [isascii()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/isascii) + `print str = isascii('ab১২ufghi🐂🐇🐒')` + +## Operator +- [project-rename](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/projectrenameoperator) + `print x='First Name', y='Last Name, z=20 | project-rename FirstName=x, LastName=y, Age=z` + Note: The output table doesn't preserve the order of the columns. + +# March 29, 2023 +## Bugfixes +- [arg_max()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arg-max-aggfunction) and [arg_min()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arg-min-aggfunction) + support multiple arguments now. + `Customers | arg_max(Age, FirstName, LastName)` + Note: The wildcard parameter (`*`) does not currently work, and will be implemented in a future build. Additionally, the parameter to maximize or minimize is always the last parameter in the output. +- Corrected an issue with tolong when used with datetime. +- Corrected an issue with hash when used with datetime. +## Functions +- [isutf8](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/isutf8) + `print t = isutf8('؄');` + *Note* The functions is correctly implemented, but in some cases its output is different from ADX. + For example, for invalid code points, ADX returns `true` which should be `false` instead. For example, `\x80` is an invalid utf8 (with integer value 128), but ADX returns `true` for this. Our implementation of this functions uses Clickhouses isValidUtf8() function which returns `false` for invalid input like this. +- [indexof_regex](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/indexofregexfunction) + `print idx1 = indexof_regex("abcabc", "a.c");` +- [make_string()](https://github.com/microsoft/Kusto-Query-Language/blob/master/doc/makestringfunction.md) + `print str = make_string(75, 117, 115, 116, 111)` + +# March 19, 2023 +## Functions +- [hash()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/hashfunction) + `print hash('World')` + Note: ClickHouse will attempt to fit a number within the smallest data type possible. As a result + Int32 data types not cast with KQL int() may not match ADX results. +- [hash_sha256()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/sha256hashfunction) + `print hash_sha256('World')` +- [not()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/notfunction) + `print not(1)` +## Special Functions +- [toscalar](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/toscalarfunction) + `range z from toscalar(print x=1) to toscalar(range x from 1 to 9 step 1 | count) step toscalar(2);` + +## Operator + - [between, !between](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/betweenoperator) + ``` + select * from kql(TableWithVariousDataTypes | project Age | where Age between (10 .. 12)); + select * from kql(TableWithVariousDataTypes | project Age | where Age !between (10 .. 30)); + select * from kql(TableWithVariousDataTypes | project Height | where Height between (5.2 .. 6.6)); + select * from kql(TableWithVariousDataTypes | project Height | where Height !between (5.3 .. 7.6)); + select * from kql(TableWithVariousDataTypes | project JoinDate | where JoinDate between (datetime('2020-01-01') .2d)); + select * from kql(TableWithVariousDataTypes | project JoinDate | where JoinDate !between (datetime('2020-01-01') .. 2d)); + select * from kql(TableWithVariousDataTypes | project JoinDate | where JoinDate between (datetime('2020-06-30') .. datetime('2025-06-30'))); + select * from kql(TableWithVariousDataTypes | project JoinDate | where JoinDate !between (datetime('2020-06-30') .. datetime('2025-06-30'))); + ``` +# March 15, 2023 +## Feature + - KQL - improve timespan textual representation in the CLI + The textual representation of `timespan` will now be identical to ADX, whenever the `dialect` setting is `kusto` or `kusto_auto`. The internal representation shall remain unchanged as `IntervalNanosecond`. In essence, any `Interval` type will also be represented this way even when running regular SQL queries as long as the `dialect` option is `kusto_auto`. + `print a = timespan(2d), b = timespan(4h), c = timespan(8m), d = timespan(16s), e = timespan(123millis), f = timespan(456micros), g = timespan(789nanos) | extend x = a + b + c + d + e + f + g;` +## Operator +- [getschema](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/getschemaoperator) + `print x = 'asd' | extend strlen(x) | getschema` + +## Bugfixes +## Functions +- [has_ipv4()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-ipv4-function) + `print has_ipv4('10:00:00 192.168.1.1 GET /index.html 404', '192.168.1.1')` +- [has_any_ipv4()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-ipv4-function) + `print has_any_ipv4('10:00:00 192.168.1.1 GET /index.html 404', '127.0.0.1', '192.168.1.1')` +- [has_ipv4_prefix()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-ipv4-prefix-function) + `print has_ipv4_prefix('10:00:00 192.168.1.1 GET /index.html 404', '192.168.')` +- [has_any_ipv4_prefix()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-ipv4-prefix-function) + `print has_any_ipv4_prefix('10:00:00 192.168.1.1 GET /index.html 404', '127.', '192.168.1.')` +- [string_size()](https://github.com/microsoft/Kusto-Query-Language/blob/master/doc/stringsizefunction.md) + `print string_size('⒦⒰⒮⒯⒪')` +- [to_utf8()](https://github.com/microsoft/Kusto-Query-Language/blob/master/doc/toutf8function.md) + `print arr = to_utf8('⒦⒰⒮⒯⒪')` +- [new_guid()](https://github.com/microsoft/Kusto-Query-Language/blob/master/doc/newguidfunction.md) + `print g = new_guid()` +- [gettype()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/gettypefunction) + `print t = gettype(1)` +- has_ipv6() + `Note: Not part of Microsoft's Kusto. Functions similar to has_ipv4()` + `print has_ipv6('09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404', '2600:1404:6400:1695::1e89')` +- has_any_ipv6() + ``` + Note: Not part of Microsoft's Kusto. Functions similar to has_any_ipv4() + print has_any_ipv6('09:46:00 2600:1404:6400:1695::1e89', '::1', '2600:1404:6400:1695::1e89') + ``` +- has_ipv6_prefix() + ``` + Note: Not part of Microsoft's Kusto. Functions similar to has_ipv4_prefix() although the prefixes can't contain :: compression syntax. + print has_ipv6_prefix('09:46:00 2600:1404:6400:1695::1e89', '2600:1404:6400:1695:0:0:0:') + ``` +- has_any_ipv6_prefix() + ``` + Note: Not part of Microsoft's Kusto. Functions similar to has_any_ipv4_prefix() although the prefixes can't contain :: compression syntax. + print has_any_ipv6_prefix('09:46:00 2600:1404:6400:1695::1e89', '0:0:0:0:0:ffff:127.', '2600:1404:6400:1695:') + ``` + +# February 28, 2023 +## Operator +- [project-away](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/projectawayoperator) + ``` + print '1-- remove one column'; + Customers | project-away FirstName; + print '2-- remove two columns'; + Customers | project-away FirstName, LastName; + print '3-- remove columns by one wildcard'; + Customers | project-away *Name; + print '4-- remove columns by two wildcards'; + Customers | project-away *Name, *tion; + print '5-- remove columns by one wildcard, one regular column'; + Customers | project-away *Name, Age; + print '6-- remove columns by one wildcard, two regular column'; + Customers | project-away *Name, Age, Education; + print '7-- remove columns by two wildcard, two regular column'; + Customers | project-away *irstName, Age, *astName, Education; + print '8-- remove one column from previous piple result'; + Customers | where Age< 30 | limit 2 | project-away FirstName; + print '9-- remove one column from summized piple result'; + Customers|summarize sum(Age), avg(Age) by FirstName | project-away sum_Age; + print '10-- remove columns after extend'; + Customers|extend FullName = strcat(FirstName,' ',LastName) | project-away FirstName, LastName; + ``` + + +## Bugfixes +- Fixed count operator issue (2112): + ``` + Customers|project FirstName|where FirstName != 'Peter'|sort by FirstName asc nulls first|count + ``` +- Fixed KQL sub-query issues: + - Multiple columns in sub-query. + Multiple columns in sub-query works in KQL ADX but only the first column is effective, while not working in ClickHouse. this fixed issue. e.g. + ``` + Customers | where FirstName in ((Customers|project FirstName, LastName)) + ``` + limitation: the `select *` noit work in sub-querym because there's individula column. + - Negative operators in sub-query + fixed the issue for negative operators not work in KQL sub-query. e.g + ``` + Customers | where FirstName in ((Customers|project FirstName, LastName|where FirstName !has 'Peter')) + ``` + - Case-insensitive compare in sub-query + fixed the case-insensitive compare issuse for multiple pipe in sub-query. e.g + ``` + Customers | where FirstName in~ ((Customers|where FirstName !has 'Peter'|project FirstName, LastName)) + ``` + - Check functional test `tests/queries/0_stateless/02366_kql_test_subquery.sql` for details. + +- KQL - has operator fails to return result when needle has separator character +- strcat_delim fails when encountered with escaped double quotes (2159) +- summarize throw exception if Aggregation is missing (2113) +- todecimal() doesn't work with column arguments (1413) +- extract_json value cast to boolean causes exception (1490) +- [isempty() and isnotempty() not accepting non-quoted strings] + +## Functions +- [abs()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/abs-function) + `print abs(-5)` +- [acos()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/acosfunction) +- [asin()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/asinfunction) + `print asin(0.5)` +- [atan()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/atanfunction) + `print atan(0.5)` +- [atan2()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/atan2function) + `print atan2(1,1)` +- [ceiling()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/ceilingfunction) + `print c1 = ceiling(-1.1), c2 = ceiling(0), c3 = ceiling(0.9)` +- [cos()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/cosfunction) + `print cos(1)` +- [cot()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/cotfunction) + `print cot(1)` +- [degrees()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/degreesfunction) + `print degrees(pi()/4)` +- [exp()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/exp-function) + `print exp(2)` +- [exp2()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/exp2-function) + `print exp2(2)` +- [exp10()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/exp10-function) + `print exp10(3)` +- [gamma()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/gammafunction) +- [isfinite()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/isfinitefunction) + `print isfinite(1.0/0.0)` +- [isinf()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/isinffunction) + `print isinf(1.0/0.0)` +- [log()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/log-function) + `print log(5)` +- [log2()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/log2-function) + `print log2(5)` +- [log10()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/log10-function) + `print log10(5)` +- [loggamma()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/loggammafunction) + `print loggamma(5)` +- [max_of()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/max-offunction) + `print result = max_of(10, 1, -3, 17)` +- [min_of()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/min-offunction) + `print result = min_of(10, 1, -3, 17)` +- [pi()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/pifunction) + `print pi()` +- [pow()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/powfunction) + `print pow(2, 3)` +- [radians()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/radiansfunction) + `print radians0 = radians(90), radians1 = radians(180), radians2 = radians(360)` +- [rand()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/randfunction) + `print rand(1000)` +- [round()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/roundfunction) + `print round(2.15, 1)` +- [sign()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/signfunction) + `print s1 = sign(-42), s2 = sign(0), s3 = sign(11.2)` +- [sin()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/sinfunction) +- [sqrt()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/sqrtfunction) + `print sqrt(256)` +- [tan()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/tanfunction) +- [variance()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/variance-aggfunction) + `Customers | summarize variance(Age);` +- [variancep()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/variancep-aggfunction) + `Customers | summarize variancep(Age);` +- [varianceif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/varianceif-aggfunction) + `Customers | summarize varianceif(Age, Age < 30)` +- [lookup()](Not a KQL function, it's an IBM specific suggested implementation. +Supports simple keys only. Do not suppoer RANGE_HASHED keys.) + `print lookup('dictionary_table', 'value', '1')` + `print lookup('dictionary_table', 'value', '100', 'default')` + +# January 26, 2023 +## Functions +- [range()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/rangefunction) +Difference from ADX: + Return empty array [] if range is empty, while ADX return NULL + The maxamum number of elements of array is 1000000 (limitation of clickhouse), 1,048,576 in ADX +``` +print '-- range function int, int, int --'; +print range(1, 10, 2); +print '-- range function int, int --'; +print range(1, 10); +print '-- range function float, float, float --'; +print range(1.2, 10.3, 2.2); +print '-- range function positive float, float, int --'; +print range(1.2, 10.3, 2); +print '-- range function positive float, int, float --'; +print range(1.2, 10, 2.2); +print '-- range function positive integer, int, float --'; +print range(1, 10, 2.2); +print '-- range function positive integer, float, float --'; +print range(1, 10.5, 2.2); +print '-- range function positive float, int, int --'; +print range(1.2, 10, 2); +print '-- range function positive int, int, negative int --'; +print range(12, 3, -2); +print '-- range function positive float, int, negative float --'; +print range(12.8, 3, -2.3); +print '-- range function datetime, datetime, timespan --'; +print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h); +print '-- range function datetime, datetime, negative timespan --'; +print range(datetime('2001-01-03'), datetime('2001-01-02'), -5h); +print '-- range function datetime, datetime --'; +print range(datetime('2001-01-01'), datetime('2001-01-02')); +print '-- range function timespan, timespan, timespan --'; +print range(1h, 5h, 2h); +print '-- range function timespan, timespan --'; +print range(1h, 5h); +print '-- range function timespan, timespan, negative timespan --'; +print range(11h, 5h, -2h); +print '-- range function float timespan, timespan, timespan --'; +print range(1.5h, 5h, 2h); +print '-- range function endofday, endofday, timespan --'; +print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); +``` + +## Improvement +- [dcount()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/dcount-aggfunction) and [dcountif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/dcountif-aggfunction) + docunt and dcountif now accept the additional accuracy parameter which is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). +## Case Insensitive Operators +- [in~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/inoperator) + `print t = 'a' in~ ('A', 'b', 'c')` + `Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))` +- [!in~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/not-in-operator) + `print t = 'a' !in~ (dynamic(['A', 'b', 'c']))` + `Customers | where FirstName !in~ ('peter', 'apple')` +- [=~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/equals-operator) + `Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'` +- [!~](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/not-equals-operator) + `Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'` +## Aggregate Functions +- [take_any()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/take-any-aggfunction) + ``` + Note: * is not currently a supported argument. + ``` +- [take_anyif()](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/take-anyif-aggfunction) +- [dcount() and dcountif()] +## Operator +- [range](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/rangeoperator) + `range LastWeek from ago(7d) to now() step 1d` + `range Steps from 1 to 8 step 3` +- [top-nested](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/topnestedoperator) + + ``` + CREATE TABLE sales + (salesdate String,salesperson String,region String,amount UInt32) ENGINE = Memory; + + INSERT INTO sales VALUES ( '12/31/1995','Robert','ON-Ontario',1); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','ON-Ontario',2); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','QC-Quebec',3); + INSERT INTO sales VALUES ( '12/31/1995','Joseph','MA-Manitoba',4); + INSERT INTO sales VALUES ( '12/31/1995','Steven','QC-Quebec',5); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON-Ontario',6); + INSERT INTO sales VALUES ( '03/29/1996','Robert','QC-Quebec',7); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON-Ontario',8); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','BC-British Columbia',9); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','QC-Quebec',10); + INSERT INTO sales VALUES ( '03/29/1996','Joseph','MA-Manitoba',11); + INSERT INTO sales VALUES ( '03/29/1996','Steven','ON-Ontario',12); + INSERT INTO sales VALUES ( '03/29/1996','Steven','QC-Quebec',13); + INSERT INTO sales VALUES ( '03/29/1996','Steven','MA-Manitoba',14); + INSERT INTO sales VALUES ( '03/30/1996','Robert','ON-Ontario',15); + INSERT INTO sales VALUES ( '03/30/1996','Robert','QC-Quebec',16); + INSERT INTO sales VALUES ( '03/30/1996','Robert','MA-Manitoba',17); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','ON-Ontario',18); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','BC-British Columbia',19); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','QC-Quebec',20); + INSERT INTO sales VALUES ( '03/30/1996','Joseph','MA-Manitoba',21); + INSERT INTO sales VALUES ( '03/30/1996','Steven','ON-Ontario',22); + INSERT INTO sales VALUES ( '03/30/1996','Steven','QC-Quebec',23); + INSERT INTO sales VALUES ( '03/30/1996','Steven','MA-Manitoba',24); + INSERT INTO sales VALUES ( '03/31/1996','Robert','MA-Manitoba',25); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','ON-Ontario',26); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','BC-British Columbia',27); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','QC-Quebec',28); + INSERT INTO sales VALUES ( '03/31/1996','Thomas','MA-Manitoba',29); + INSERT INTO sales VALUES ( '03/31/1996','Steven','ON-Ontario',30); + + print '-- top 3 regions by sales--'; + sales | top-nested 3 of region by sum(amount); + + print '-- top 2 salespeople in each of these regions?--'; + sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount); + + print '--top 3 and other regions by sales--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount); + + print '--top 3 and other regions by sales and top 2 and other salespeople in each of these regions--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount); + + print '--top 3 and other regions by sales and top 2 salespeople in each of these regions--'; + sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount) + + print '--top 3 regions by sales and top 2 and other salespeople in each of these regions--'; + sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount); + + print '--top 3 regions by difference between max sales and min sales--'; + sales | top-nested 3 of region by sum(amount) - min(amount); + + print '-- top 3 regions using abbreviations by sales--'; + sales | top-nested 3 of substring(region, 0, 2) by sum(amount); + + print '-- all top regions by sales--'; + sales | top-nested of region by sum(amount); + ``` + +## Bugs +- [KQL Phase 2 - base64_encode_fromguid encodes strings as opposed to binary] +- [KQL Phase 2: summarize with bin and format_datetime] +- [make_datetime creates wrong date time] +- [KQL Phase 2: summarize using bin has different result than Azure Data Explorer using the same sample data] +- [KQL Phase 3: datetime should be rounded in certain cases] +- [kql_bin does not accept DateTime type] +- [KQL Phase 2 - totimespan should return null when conversion fails.] +- [reverse() with datetime and timespan arguments needs to be improved.] +- [String operator has throws exception when needle has white space or separator characters] + + +# December 7, 2022 + +## Functions +- [count_distinct](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-distinct-aggfunction) + `Customers | summarize count_distinct(Education);` +- [count_distinctif](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/count-distinctif-aggfunction) + `Customers | summarize count_distinctif(Education, Age > 30);` +- [iff](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/ifffunction) + `Customers | extend t = iff(Age <= 10, "smaller", "bigger");` +- [iif](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/iiffunction) + `Customers | extend t = iif(Age <= 10, "smaller", "bigger");` +## bug fixed +- [indexOf function doesn't work for extended parameters] +- [Create generic function for time arithmetic] +- [KQL Phase 2: tolong should return the number of ticks when supplied with a timespan] + +# November 23, 2022 + +## Operator +- [join](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/joinoperator?pivots=azuredataexplorer) + ``` + CREATE TABLE X (Key String, Value1 Int64) ENGINE = Memory; + INSERT INTO X VALUES ('a',1); + INSERT INTO X VALUES ('b',2); + INSERT INTO X VALUES ('b',3); + INSERT INTO X VALUES ('c',4); + + CREATE TABLE Y (Key String, Value2 Int64) ENGINE = Memory; + INSERT INTO Y VALUES ('b',10); + INSERT INTO Y VALUES ('c',20); + INSERT INTO Y VALUES ('c',30); + INSERT INTO Y VALUES ('d',40); + + Join flavor : + + Default join is innerunique + X | join Y on $left.Key == $right.Key ; + X | join kind=innerunique Y on Key ; + + Inner-join + X | join kind=inner Y on Key ; + + Left outer-join + X | join kind=leftouter Y on Key ; + + Right outer-join + X | join kind=rightouter Y on Key ; + + Full outer-join + X | join kind=fullouter Y on Key ; + + Left anti-join + X | join kind=leftanti Y on Key ; + + Right anti-join + X | join kind=rightanti Y on Key ; + + Left semi-join + X | join kind=leftsemi Y on Key ; + + Right semi-join + X | join kind=rightsemi Y on Key ; + ``` + **Deviation from ADX** + Because of the limitation between KQL and SQL. the result may different from ADX.(KQL-CH take the result of ClickHouse) + - columns + ADX : common columns are duplicatedc in output + KQL-CH : only one column for common columns + - column name + ADX : column with same name (not common) ->column1 + KQL-CH : column with same name (not common) -> right_.column + - filters + ADX: Kusto is optimized to push filters that come after the join, towards the appropriate join side, left or right, when possible + KQL-CH: because in the domanin of KQL, does not know the schema of tables, so the push need to manually done by user, like: + ``` + t1|join kind = innerunique t2 on key | where value == 'val1.2' + ``` + need to chang as the fowllowing by user(if user want) : + ``` + t1| where value == 'val1.2' | join kind = innerunique t2 on key + ``` + - semi join flavor + ADX : only returns left side or right side columns + KQL-CH : returns columns from both side + - Join hints : not supported yet +- [lookup](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/lookupoperator) + lookup is a subset of join, only support : kind=leftouter and kind=inner. if kind unspecified, kind=leftouter + ``` + DROP TABLE IF EXISTS FactTable; + CREATE TABLE FactTable (Row String, Personal String, Family String) ENGINE = Memory; + INSERT INTO FactTable VALUES ('1', 'Bill', 'Gates'); + INSERT INTO FactTable VALUES ('2', 'Bill', 'Clinton'); + INSERT INTO FactTable VALUES ('3', 'Bill', 'Clinton'); + INSERT INTO FactTable VALUES ('4', 'Steve', 'Ballmer'); + INSERT INTO FactTable VALUES ('5', 'Tim', 'Cook'); + + DROP TABLE IF EXISTS DimTable; + CREATE TABLE DimTable (Personal String, Family String, Alias String) ENGINE = Memory; + INSERT INTO DimTable VALUES ('Bill', 'Gates', 'billg'); + INSERT INTO DimTable VALUES ('Bill', 'Clinton', 'billc'); + INSERT INTO DimTable VALUES ('Steve', 'Ballmer', 'steveb'); + INSERT INTO DimTable VALUES ('Tim', 'Cook', 'timc'); + + FactTable | lookup kind=leftouter DimTable on Personal, Family + + FactTable | lookup kind=inner DimTable on Personal, Family + ``` + +## Bugs fixed + - [Incorrect Regx conversion] + - [KQL phase 2 - timespan calculation results in exception] + - [KQL phase 2 - format_timespan returns incorrect results] + - [Bin function should support time intervals less than 1 second] + - [KQL Phase 2: datetime subtraction results in exception] + - [Timespan() doesn't parse bareword arguments.] + - [KQL-phase2 distinct operator does not support alias] + +# November 7, 2022 +## Improvement +- [array_sort_asc](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortascfunction) and [array_sort_desc](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortdescfunction) + ``` + Returns the same number of arrays as in the input, with the first array sorted in ascending order, and the remaining arrays ordered to match the reordered first array. + + null will be returned for every array that differs in length from the first one. + ``` + Because array in ClickHouse is not nullable, so an array with a single NULL ( `[NULL]`) is returned instead of a null if array that differs in length from the first one: + ``` + array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) -> [1,2,3,NULL],[10,20,30,40],[NULL] + ``` + the result can be used as a condition + ``` + DROP TABLE IF EXISTS visit; + CREATE TABLE visit(pageid UInt8, ip_country Array(Nullable(String)), hit Array(Int64),duration Array(Int64)) ENGINE = Memory; + INSERT INTO visit VALUES (1,['CA', 'US','FR','Eng'], [11,16,12,20],[100,500,300,200]); + INSERT INTO visit VALUES (2,['Japan', 'Gem','FR','Eng'], [31,22,33,10],[510,410,310,210]); + INSERT INTO visit VALUES (3,['CA', 'Gem','Japan','Eng'], [25,10,23,11],[120,110,130]); + INSERT INTO visit VALUES (4,['CA', 'Gem',null,'Eng'], [5,10,3,2],[220,320,310,150]); + INSERT INTO visit VALUES (5,['FR', null,'US','Eng'], [16,12,23,10],[210,250,110,260]); + + visit | project *, array_sort_asc(ip_country, hit, duration) + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────────┐ + │ 2 │ ['Japan','Gem','FR','Eng'] │ [31,22,33,10] │ [510,410,310,210] │ (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) │ + └────────┴────────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────┐ + │ 1 │ ['CA','US','FR','Eng'] │ [11,16,12,20] │ [100,500,300,200] │ (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) │ + └────────┴────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country──────────────┬─hit────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)──────────┐ + │ 4 │ ['CA','Gem',NULL,'Eng'] │ [5,10,3,2] │ [220,320,310,150] │ (['CA','Eng','Gem',NULL],[5,2,10,3],[220,150,320,310]) │ + └────────┴─────────────────────────┴────────────┴───────────────────┴────────────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────┬─kql_array_sort_asc(ip_country, hit, duration)─────┐ + │ 3 │ ['CA','Gem','Japan','Eng'] │ [25,10,23,11] │ [120,110,130] │ (['CA','Eng','Gem','Japan'],[25,11,10,23],[NULL]) │ + └────────┴────────────────────────────┴───────────────┴───────────────┴───────────────────────────────────────────────────┘ + ┌─pageid─┬─ip_country─────────────┬─hit───────────┬─duration──────────┬─kql_array_sort_asc(ip_country, hit, duration)────────────┐ + │ 5 │ ['FR',NULL,'US','Eng'] │ [16,12,23,10] │ [210,250,110,260] │ (['Eng','FR','US',NULL],[10,16,23,12],[260,210,110,250]) │ + └────────┴────────────────────────┴───────────────┴───────────────────┴──────────────────────────────────────────────────────────┘ + + visit | where isnull((array_sort_asc(ip_country, hit, duration))[2][0]) + ┌─pageid─┬─ip_country─────────────────┬─hit───────────┬─duration──────┐ + │ 3 │ ['CA','Gem','Japan','Eng'] │ [25,10,23,11] │ [120,110,130] │ + └────────┴────────────────────────────┴───────────────┴───────────────┘ + ``` + + the following behaviours are same as Azure Data Explorer + if no alias specified, the functions return a single tuple includes arrays. can use array sbscripon to access the element inside. for exapmple: + ``` + print array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200]))[0] -> [1,2,3] + ``` + if a single alias is used the first array as an column is returned : + ``` + print t = array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) + ┌─t───────┐ + │ [1,2,3] │ + └─────────┘ + ``` + if a n aliasies are used the first n arrays as columns are returned : + ``` + print 5, (t,w) = array_sort_asc(dynamic([2, 1, 3]), dynamic([20, 40, 30]), dynamic([100, 200])) + ┌─5─┬─t───────┬─w──────────┐ + │ 5 │ [1,2,3] │ [40,20,30] │ + └───┴─────────┴────────────┘ + ``` +## New Functions +- [case](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/casefunction) + `Customers | extend t = case(Age <= 10, "A", Age <= 20, "B", Age <= 30, "C", "D");` +## Bug fixed +- [summarize crash if aggregation function is missing] + ``` + fixed with throw exception: + + Exception on client: + Code: 62. DB::Exception: Syntax error near keyword "by". (SYNTAX_ERROR) + ``` +- [make_datetime creates wrong date time] + +- [todecimal() doesn't work with column arguments] + + + +# October 25, 2022 +## New Operators +- [count](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/countoperator) +`Customers | count;` +`Customers | where Age< 30 | count;` +`Customers | where Age< 30 | limit 2 | count;` +`Customers | where Age< 30 | limit 2 | count | project Count;` + +- [top](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/topoperator) +`Customers | top 3 by Age;` +`Customers | top 3 by Age desc;` +`Customers | top 3 by Age asc | order by FirstName;` +`Customers | top 3 by FirstName desc nulls first;` +`Customers | top 3 by FirstName desc nulls last;` +`Customers | top 3 by Age | top 2 by FirstName;` + +- [top-hitters](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/tophittersoperator) +`Customers | top-hitters a = 2 of Age by extra;` +`Customers | top-hitters 2 of Age;` +`Customers | top-hitters 2 of Age by extra | top-hitters 2 of Age | order by Age;` +`Customers | top-hitters 2 of Age by extra | where Age > 30;` +`Customers | top-hitters 2 of Age by extra | where approximate_sum_extra < 200;` +`Customers | top-hitters 2 of Age | where approximate_count_Age > 2;` + +## Bugs fixed +- [parse_version needs to return null when parameter is empty string] +- [Different expressions with the same alias in function substring] +- [parse_version needs to return null when parameter is empty string] +- [parse_url() output mismatch for empty string] +- [array_sum and array_length return incorrect results] + +# October 9, 2022 + +## operator +- [distinct](https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/distinctoperator) + `Customers | distinct *` + `Customers | distinct Occupation` + `Customers | distinct Occupation, Education` + `Customers | where Age <30 | distinct Occupation, Education` + `Customers | where Age <30 | order by Age| distinct Occupation, Education` + +## String functions +- [reverse](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/reversefunction) + `print reverse(123)` + `print reverse(123.34)` + `print reverse('clickhouse')` + `print reverse(3h)` + `print reverse(datetime(2017-1-1 12:23:34))` + +- [parse_command_line](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-command-line) + `print parse_command_line('echo \"hello world!\" print$?', \"Windows\")` + +- [parse_csv](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parsecsvfunction) + `print result=parse_csv('aa,b,cc')` + `print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z')` + +- [parse_json](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parsejsonfunction) + `print parse_json( dynamic([1, 2, 3]))` + `print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}')` + +- [extract_json](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + `print extract_json( "$.a" , '{"a":123, "b":"{\\"c\\":456}"}' , typeof(int))` + +- [parse_version](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-versionfunction) + `print parse_version('1')` + `print parse_version('1.2.3.40')` + +## Bug fixed +- [correct array index in expression] + array index should start with 0 +- [Summarize should generate alias or use correct columns] + - if bin is used , the column should be in select list if no alias include + - if no column included in aggregate functions, ( like count() ), should has alias with fun name + '_',e.g count_ + - if column name included in aggregate functions, should have fun name + "_" + column name , like count(Age) -> count_Age + - if argument of an aggregate functions is an exprision, Columns1 ... Columnsn should be used as alias + ``` + Customers | summarize count() by bin(Age, 10) + ┌─Age─┬─count_─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └─────┴────────┘ + Customers | summarize count(Age) by bin(Age, 10) + ┌─Age─┬─count_Age─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └─────┴───────────┘ + Customers | summarize count(Age+1) by bin(Age+1, 10) + ┌─Columns1─┬─count_─┐ + │ 40 │ 2 │ + │ 20 │ 6 │ + │ 30 │ 4 │ + └──────────┴────────┘ + ``` +- [extend doesn't replace existing columns] + +- [throw exception if use quoted string as alias] + +- [repeat() doesn't work with count argument as negative value] + +- [substring() doesn't work right with negative offsets] +- [endofmonth() doesn't return correct result] + +- [split() outputs array instead of string] + +- [split() returns empty string when arg goes out of bound] + +- [split() doesn't work with negative index] + + +# September 26, 2022 +## Bug fixed : +["select * from kql" results in syntax error] +[Parsing ipv4 with arrayStringConcat throws exception] +[CH Client crashes on invalid function name] +[extract() doesn't work right with 4th argument i.e typeof()] +[parse_ipv6_mask return incorrect results] +[timespan returns wrong output in seconds] +[timespan doesn't work for nanoseconds and tick] +[totimespan() doesn't work for nanoseconds and tick timespan unit] +[data types should throw exception in certain cases] +[decimal does not support scientific notation] +[extend statement causes client core dumping] +[extend crashes with array sorting] +[Core dump happens when WHERE keyword doesn't follow field name] +[Null values are missing in the result of `make_list_with_nulls'] +[trim functions use non-unique aliases] +[format_ipv4_mask returns incorrect mask value] + +# September 12, 2022 +## Extend operator +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extendoperator +`T | extend T | extend duration = endTime - startTime` +`T | project endTime, startTime | extend duration = endTime - startTime` +## Array functions +- [array_reverse](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-reverse-function) + `print array_reverse(dynamic(["this", "is", "an", "example"])) == dynamic(["example","an","is","this"])` + +- [array_rotate_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_rotate_leftfunction) + `print array_rotate_left(dynamic([1,2,3,4,5]), 2) == dynamic([3,4,5,1,2])` + `print array_rotate_left(dynamic([1,2,3,4,5]), -2) == dynamic([4,5,1,2,3])` + +- [array_rotate_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_rotate_rightfunction) + `print array_rotate_right(dynamic([1,2,3,4,5]), -2) == dynamic([3,4,5,1,2])` + `print array_rotate_right(dynamic([1,2,3,4,5]), 2) == dynamic([4,5,1,2,3])` + +- [array_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_shift_leftfunction) + `print array_shift_left(dynamic([1,2,3,4,5]), 2) == dynamic([3,4,5,null,null])` + `print array_shift_left(dynamic([1,2,3,4,5]), -2) == dynamic([null,null,1,2,3])` + `print array_shift_left(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3,4,5,-1,-1])` + `print array_shift_left(dynamic(['a', 'b', 'c']), 2) == dynamic(['c','',''])` + +- [array_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array_shift_rightfunction) + `print array_shift_right(dynamic([1,2,3,4,5]), -2) == dynamic([3,4,5,null,null])` + `print array_shift_right(dynamic([1,2,3,4,5]), 2) == dynamic([null,null,1,2,3])` + `print array_shift_right(dynamic([1,2,3,4,5]), -2, -1) == dynamic([3,4,5,-1,-1])` + `print array_shift_right(dynamic(['a', 'b', 'c']), -2) == dynamic(['c','',''])` + +- [pack_array](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/packarrayfunction) + `print x = 1, y = x * 2, z = y * 2, pack_array(x,y,z)` + + Please note that only arrays of elements of the same type may be created at this time. The underlying reasons are explained under the release note section of the `dynamic` data type. + +- [repeat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/repeatfunction) + `print repeat(1, 0) == dynamic([])` + `print repeat(1, 3) == dynamic([1, 1, 1])` + `print repeat("asd", 3) == dynamic(['asd', 'asd', 'asd'])` + `print repeat(timespan(1d), 3) == dynamic([86400, 86400, 86400])` + `print repeat(true, 3) == dynamic([true, true, true])` + +- [zip](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/zipfunction) + `print zip(dynamic([1,3,5]), dynamic([2,4,6]))` + + Please note that only arrays of the same type are supported in our current implementation. The underlying reasons are explained under the release note section of the `dynamic` data type. + +## Data types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + `print isnull(dynamic(null))` + `print dynamic(1) == 1` + `print dynamic(timespan(1d)) == 86400` + `print dynamic([1, 2, 3])` + `print dynamic([[1], [2], [3]])` + `print dynamic(['a', "b", 'c'])` + + According to the KQL specifications `dynamic` is a literal, which means that no function calls are permitted. Expressions producing literals such as `datetime` and `timespan` and their aliases (ie. `date` and `time`, respectively) along with nested `dynamic` literals are allowed. + + Please note that our current implementation supports only scalars and arrays made up of elements of the same type. Support for mixed types and property bags is deferred for now, based on our understanding of the required effort and discussion with representatives of the QRadar team. + +## Mathematical functions + - [isnan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/isnanfunction) + `print isnan(double(nan)) == true` + `print isnan(4.2) == false` + `print isnan(4) == false` + `print isnan(real(+inf)) == false` + +## Set functions +Please note that functions returning arrays with set semantics may return them in any particular order, which may be subject to change in the future. + + - [jaccard_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/jaccard-index-function) + `print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])) == 0.75` + `print jaccard_index(dynamic([1, 2, 3]), dynamic([])) == 0` + `print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])) == 0` + `print isnan(jaccard_index(dynamic([]), dynamic([])))` + `print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])) == 0` + `print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])) == 0.75` + `print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == 0.25` + + - [set_difference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setdifferencefunction) + `print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])) == dynamic([])` + `print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([4, 5, 6])` + `print set_difference(dynamic([4]), dynamic([1, 2, 3])) == dynamic([4])` + `print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 3])` + `print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['d', 's'])` + `print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Han Solo'])` + + - [set_has_element](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/sethaselementfunction) + `print set_has_element(dynamic(["this", "is", "an", "example"]), "example") == true` + `print set_has_element(dynamic(["this", "is", "an", "example"]), "examples") == false` + `print set_has_element(dynamic([1, 2, 3]), 2) == true` + `print set_has_element(dynamic([1, 2, 3, 4.2]), 4) == false` + + - [set_intersect](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setintersectfunction) + `print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print set_intersect(dynamic([4]), dynamic([1, 2, 3])) == dynamic([])` + `print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])) == dynamic([5])` + `print set_intersect(dynamic([1, 2, 3]), dynamic([])) == dynamic([])` + `print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])) == dynamic(['a'])` + `print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])) == dynamic(['Darth Vader'])` + + - [set_union](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/setunionfunction) + `print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4, 5, 6])` + `print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[1] == dynamic([1, 2, 3, 4])` + `print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[1] == dynamic([1, 2, 3, 4, 5])` + `print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[1] == dynamic([1, 2, 3])` + `print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[1] == dynamic(['a', 'd', 'f', 's'])` + `print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[1] == dynamic(['Chewbacca', 'Darth Sidious', 'Darth Vader', 'Han Solo'])` + +# August 29, 2022 + +## **mv-expand operator** +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/mvexpandoperator +Note: **expand on array columns only** +- test cases + ``` + CREATE TABLE T + ( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) + ) ENGINE = Memory; + + INSERT INTO T VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]) + + T | mv-expand c + T | mv-expand c, d + T | mv-expand b | mv-expand c + T | mv-expand c to typeof(bool) + T | mv-expand with_itemindex=index b, c, d + T | mv-expand array_concat(c,d) + T | mv-expand x = c, y = d + T | mv-expand xy = array_concat(c, d) + T | mv-expand with_itemindex=index c,d to typeof(bool) + ``` + +## **make-series operator** +https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-seriesoperator + +- test case make-series on datetime column + ``` + CREATE TABLE T + ( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date + ) ENGINE = Memory; + + INSERT INTO T VALUES ('Aldi','Apple',4,'2016-09-10'); + INSERT INTO T VALUES ('Costco','Apple',2,'2016-09-11'); + INSERT INTO T VALUES ('Aldi','Apple',6,'2016-09-10'); + INSERT INTO T VALUES ('Costco','Snargaluff',100,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Apple',7,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Snargaluff',400,'2016-09-11'); + INSERT INTO T VALUES ('Costco','Snargaluff',104,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Apple',5,'2016-09-12'); + INSERT INTO T VALUES ('Aldi','Snargaluff',600,'2016-09-11'); + INSERT INTO T VALUES ('Costco','Snargaluff',200,'2016-09-10'); + ``` + Have from and to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit + ``` + Has from , without to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit + ``` + Without from , has to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit + ``` + Without from , without to + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit + ``` + Without by clause + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d + ``` + Without aggregation alias + ``` + T | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit + ``` + Has group expression alias + ``` + T | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit + ``` + Use different step value + ``` + T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit + ``` +- test case make-series on numeric column + ``` + CREATE TABLE T2 + ( + Supplier Nullable(String), + Fruit String , + Price Int32, + Purchase Int32 + ) ENGINE = Memory; + + INSERT INTO T2 VALUES ('Aldi','Apple',4,10); + INSERT INTO T2 VALUES ('Costco','Apple',2,11); + INSERT INTO T2 VALUES ('Aldi','Apple',6,10); + INSERT INTO T2 VALUES ('Costco','Snargaluff',100,12); + INSERT INTO T2 VALUES ('Aldi','Apple',7,12); + INSERT INTO T2 VALUES ('Aldi','Snargaluff',400,11); + INSERT INTO T2 VALUES ('Costco','Snargaluff',104,12); + INSERT INTO T2 VALUES ('Aldi','Apple',5,12); + INSERT INTO T2 VALUES ('Aldi','Snargaluff',600,11); + INSERT INTO T2 VALUES ('Costco','Snargaluff',200,10); + ``` + Have from and to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; + ``` + Has from , without to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; + ``` + Without from , has to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; + ``` + Without from , without to + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; + ``` + Without by clause + ``` + T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; + ``` + +## Aggregate Functions +- [bin](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binfunction) + `print bin(4.5, 1)` + `print bin(time(16d), 7d)` + `print bin(datetime(1970-05-11 13:45:07), 1d)` +- [stdev](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdev-aggfunction) + `Customers | summarize t = stdev(Age) by FirstName` + +- [stdevif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/stdevif-aggfunction) + `Customers | summarize t = stdevif(Age, Age < 10) by FirstName` + +- [binary_all_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-and-aggfunction) + `Customers | summarize t = binary_all_and(Age) by FirstName` + +- [binary_all_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-or-aggfunction) + `Customers | summarize t = binary_all_or(Age) by FirstName` + +- [binary_all_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-all-xor-aggfunction) + `Customers | summarize t = binary_all_xor(Age) by FirstName` + +- [percentiles](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName` + +- [percentilesw](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)` + +- [percentile](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `Customers | summarize t = percentile(Age, 50) by FirstName` + +- [percentilew](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/percentiles-aggfunction) + `DataTable | summarize t = percentilew(Bucket, Frequency, 50)` + +## Dynamic functions +- [array_sort_asc](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortascfunction) + **Only support the constant dynamic array.** + **Returns an array. So, each element of the input has to be of same datatype.** + `print t = array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c']))` + `print t = array_sort_asc(dynamic([4, 1, 3, 2]))` + `print t = array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))` + `print t = array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world']))` + `print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false)` + `print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)` + `print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)` + `print t = array_sort_asc( dynamic([null, null, null]) , false)` + `print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 > 2)` + `print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30, 50, 3]), 1 > 2)` + +- [array_sort_desc](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysortdescfunction) **(only support the constant dynamic array)** + + `print t = array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c']))` + `print t = array_sort_desc(dynamic([4, 1, 3, 2]))` + `print t = array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))` + `print t = array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world']))` + `print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false)` + `print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)` + `print t = array_sort_desc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)` + `print t = array_sort_desc( dynamic([null, null, null]) , false)` + `print t = array_sort_desc(dynamic([2, 1, null, 3]), dynamic([20, 10, 40, 30]), 1 > 2)` + `print t = array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50, 3]), 1 > 2)` + +- [array_concat](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayconcatfunction) + `print array_concat(dynamic([1, 2, 3]), dynamic([4, 5]), dynamic([6, 7, 8, 9])) == dynamic([1, 2, 3, 4, 5, 6, 7, 8, 9])` + +- [array_iff / array_iif](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayifffunction) + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3])` + `print array_iif(dynamic([true, false, true, false]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, null])` + `print array_iif(dynamic([1, 0, -1, 44, 0]), dynamic([1, 2, 3, 4]), dynamic([4, 5, 6])) == dynamic([1, 5, 3, 4, null])` + `print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1.1, 2.2, 3.3, 4.4, 5.5]), 999.99);` + `print t = array_iif(dynamic([true, false, true, false, true]), 90, dynamic([1, 3]));` + +- [array_slice](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayslicefunction) + `print array_slice(dynamic([1,2,3]), 1, 2) == dynamic([2, 3])` + `print array_slice(dynamic([1,2,3,4,5]), 2, -1) == dynamic([3, 4, 5])` + `print array_slice(dynamic([1,2,3,4,5]), -3, -2) == dynamic([3, 4])` + +- [array_split](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraysplitfunction) + `print array_split(dynamic([1,2,3,4,5]), 2) == dynamic([[1,2],[3,4,5]])` + `print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])) == dynamic([[1],[2,3],[4,5]])` + +## DateTimeFunctions + +- [ago](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/agofunction) + `print ago(2h)` + +- [endofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofdayfunction) + `print endofday(datetime(2017-01-01 10:10:17), -1)` + `print endofday(datetime(2017-01-01 10:10:17), 1)` + `print endofday(datetime(2017-01-01 10:10:17))` + +- [endofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofmonthfunction) + `print endofmonth(datetime(2017-01-01 10:10:17), -1)` + `print endofmonth(datetime(2017-01-01 10:10:17), 1)` + `print endofmonth(datetime(2017-01-01 10:10:17))` + +- [endofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofweekfunction) + `print endofweek(datetime(2017-01-01 10:10:17), 1)` + `print endofweek(datetime(2017-01-01 10:10:17), -1)` + `print endofweek(datetime(2017-01-01 10:10:17))` + +- [endofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/endofyearfunction) + `print endofyear(datetime(2017-01-01 10:10:17), -1)` + `print endofyear(datetime(2017-01-01 10:10:17), 1)` + `print endofyear(datetime(2017-01-01 10:10:17))` + +- [make_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-datetimefunction) + `print make_datetime(2017,10,01)` + `print make_datetime(2017,10,01,12,10)` + `print make_datetime(2017,10,01,12,11,0.1234567)` + +- [datetime_diff](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-difffunction) + `print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))` + `print datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30))` + `print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))` + +- [unixtime_microseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-microseconds-todatetimefunction) + `print unixtime_microseconds_todatetime(1546300800000000)` + +- [unixtime_milliseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-milliseconds-todatetimefunction) + `print unixtime_milliseconds_todatetime(1546300800000)` + +- [unixtime_nanoseconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-nanoseconds-todatetimefunction) + `print unixtime_nanoseconds_todatetime(1546300800000000000)` + +- [datetime_part](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-partfunction) + `print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))` + +- [datetime_add](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/datetime-addfunction) + `print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))` + +- [format_timespan](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-timespanfunction) + `print format_timespan(time(1d), 'd-[hh:mm:ss]')` + `print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')` + +- [format_datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-datetimefunction) + `print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')` + `print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')` + +- [todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todatetimefunction) + `print todatetime('2014-05-25T08:20:03.123456Z')` + `print todatetime('2014-05-25 20:03.123')` + +- [totimespan] (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/totimespanfunction) + ` print totimespan('0.01:34:23')` + `print totimespan(1d)` + +# August 15, 2022 + **double quote support** + ``print res = strcat("double ","quote")`` +## Aggregate functions + - [bin_at](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binatfunction) + `print res = bin_at(6.5, 2.5, 7)` + `print res = bin_at(1h, 1d, 12h)` + `print res = bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))` + `print res = bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0))` + + - [array_index_of](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arrayindexoffunction) + *Supports only basic lookup. Do not support start_index, length and occurrence* + `print output = array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')` + `print output = array_index_of(dynamic([1, 2, 3]), 2)` + - [array_sum](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/array-sum-function) + `print output = array_sum(dynamic([2, 5, 3]))` + `print output = array_sum(dynamic([2.5, 5.5, 3]))` + - [array_length](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/arraylengthfunction) + `print output = array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))` + `print output = array_length(dynamic([1, 2, 3]))` + +## Conversion +- [tobool / toboolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/toboolfunction) + `print tobool(true) == true` + `print toboolean(false) == false` + `print tobool(0) == false` + `print toboolean(19819823) == true` + `print tobool(-2) == true` + `print isnull(toboolean('a'))` + `print tobool('true') == true` + `print toboolean('false') == false` + +- [todouble / toreal](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/todoublefunction) + `print todouble(4) == 4` + `print toreal(4.2) == 4.2` + `print isnull(todouble('a'))` + `print toreal('-0.3') == -0.3` + +- [toint](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tointfunction) + `print isnull(toint('a'))` + `print toint(4) == 4` + `print toint('4') == 4` + `print isnull(toint(4.2))` + +- [tostring](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/tostringfunction) + `print tostring(123) == '123'` + `print tostring('asd') == 'asd'` + +## Data Types + - [dynamic](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/dynamic) + *Supports only 1D array* + `print output = dynamic(['a', 'b', 'c'])` + `print output = dynamic([1, 2, 3])` + +- [bool,boolean](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/bool) + `print bool(1)` + `print boolean(0)` + +- [datetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/datetime) + `print datetime(2015-12-31 23:59:59.9)` + `print datetime('2015-12-31 23:59:59.9')` + `print datetime("2015-12-31:)` + +- [guid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/guid) + `print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)` + `print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')` + `print guid('74be27de1e4e49d9b579fe0b331d3642')` + +- [int](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/int) + `print int(1)` + +- [long](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/long) + `print long(16)` + +- [real](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/real) + `print real(1)` + +- [timespan ,time](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/scalar-data-types/timespan) + **Note** the timespan is used for calculating datatime, so the output is in seconds. e.g. time(1h) = 3600 + `print 1d` + `print 30m` + `print time('0.12:34:56.7')` + `print time(2h)` + `print timespan(2h)` + + +## StringFunctions + +- [base64_encode_fromguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-encode-fromguid-function) +`print Quine = base64_encode_fromguid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')` +- [base64_decode_toarray](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64_decode_toarrayfunction) +`print base64_decode_toarray('S3VzdG8=')` +- [base64_decode_toguid](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/base64-decode-toguid-function) +`print base64_decode_toguid('YWUzMTMzZjItNmUyMi00OWFlLWIwNmEtMTZlNmE5YjIxMmVi')` +- [replace_regex](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/replace-regex-function) +`print replace_regex('Hello, World!', '.', '\\0\\0')` +- [has_any_index](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-any-index-function) +`print idx = has_any_index('this is an example', dynamic(['this', 'example']))` +- [translate](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/translatefunction) +`print translate('krasp', 'otsku', 'spark')` +- [trim](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimfunction) +`print trim('--', '--https://bing.com--')` +- [trim_end](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimendfunction) +`print trim_end('.com', 'bing.com')` +- [trim_start](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/trimstartfunction) +`print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))` + +## DateTimeFunctions +- [startofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofyearfunction) + `print startofyear(datetime(2017-01-01 10:10:17), -1)` + `print startofyear(datetime(2017-01-01 10:10:17), 0)` + `print startofyear(datetime(2017-01-01 10:10:17), 1)` +- [weekofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/weekofyearfunction) + `print week_of_year(datetime(2020-12-31))` + `print week_of_year(datetime(2020-06-15))` + `print week_of_year(datetime(1970-01-01))` + `print week_of_year(datetime(2000-01-01))` + +- [startofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofweekfunction) + `print startofweek(datetime(2017-01-01 10:10:17), -1)` + `print startofweek(datetime(2017-01-01 10:10:17), 0)` + `print startofweek(datetime(2017-01-01 10:10:17), 1)` + +- [startofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofmonthfunction) + `print startofmonth(datetime(2017-01-01 10:10:17), -1)` + `print startofmonth(datetime(2017-01-01 10:10:17), 0)` + `print startofmonth(datetime(2017-01-01 10:10:17), 1)` + +- [startofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/startofdayfunction) + `print startofday(datetime(2017-01-01 10:10:17), -1)` + `print startofday(datetime(2017-01-01 10:10:17), 0)` + `print startofday(datetime(2017-01-01 10:10:17), 1)` + +- [monthofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/monthofyearfunction) + `print monthofyear(datetime("2015-12-14"))` + +- [hourofday](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/hourofdayfunction) + `print hourofday(datetime(2015-12-14 18:54:00))` + +- [getyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getyearfunction) + `print getyear(datetime(2015-10-12))` + +- [getmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/getmonthfunction) + `print getmonth(datetime(2015-10-12))` + +- [dayofyear](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofyearfunction) + `print dayofyear(datetime(2015-12-14))` + +- [dayofmonth](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofmonthfunction) + `print (datetime(2015-12-14))` + +- [unixtime_seconds_todatetime](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/unixtime-seconds-todatetimefunction) + `print unixtime_seconds_todatetime(1546300800)` + +- [dayofweek](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/dayofweekfunction) + `print dayofweek(datetime(2015-12-20))` + +- [now](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/nowfunction) + `print now()` + `print now(2d)` + `print now(-2h)` + `print now(5microseconds)` + `print now(5seconds)` + `print now(6minutes)` + `print now(-2d) ` + `print now(time(1d))` + + +## Binary functions +- [binary_and](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-andfunction) + `print binary_and(15, 3) == 3` + `print binary_and(1, 2) == 0` +- [binary_not](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-notfunction) + `print binary_not(1) == -2` +- [binary_or](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-orfunction) + `print binary_or(3, 8) == 11` + `print binary_or(1, 2) == 3` +- [binary_shift_left](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-leftfunction) + `print binary_shift_left(1, 1) == 2` + `print binary_shift_left(1, 64) == 1` +- [binary_shift_right](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-shift-rightfunction) + `print binary_shift_right(1, 1) == 0` + `print binary_shift_right(1, 64) == 1` +- [binary_xor](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/binary-xorfunction) + `print binary_xor(1, 3) == 2` +- [bitset_count_ones](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/bitset-count-onesfunction) + `print bitset_count_ones(42) == 3` + +## IP functions +- [format_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-function) + `print format_ipv4('192.168.1.255', 24) == '192.168.1.0'` + `print format_ipv4(3232236031, 24) == '192.168.1.0'` +- [format_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/format-ipv4-mask-function) + `print format_ipv4_mask('192.168.1.255', 24) == '192.168.1.0/24'` + `print format_ipv4_mask(3232236031, 24) == '192.168.1.0/24'` +- [ipv4_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-comparefunction) + `print ipv4_compare('127.0.0.1', '127.0.0.1') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255') < 0` + `print ipv4_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv4_compare('192.168.1.1', '192.168.1.255', 24) == 0` +- [ipv4_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-matchfunction) + `print ipv4_is_match('127.0.0.1', '127.0.0.1') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255') == false` + `print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv4_is_match('192.168.1.1', '192.168.1.255', 24) == true` +- [ipv6_compare](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-comparefunction) + `print ipv6_compare('::ffff:7f00:1', '127.0.0.1') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') < 0` + `print ipv6_compare('192.168.1.1/24', '192.168.1.255/24') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == 0` + `print ipv6_compare('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == 0` +- [ipv6_is_match](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv6-is-matchfunction) + `print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false` + `print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true` + `print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true` +- [parse_ipv4_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4-maskfunction) + `print parse_ipv4_mask('127.0.0.1', 24) == 2130706432` + `print parse_ipv4_mask('192.1.168.2', 31) == 3221334018` + `print parse_ipv4_mask('192.1.168.3', 31) == 3221334018` + `print parse_ipv4_mask('127.2.3.4', 32) == 2130838276` +- [parse_ipv6_mask](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6-maskfunction) + `print parse_ipv6_mask('127.0.0.1', 24) == '0000:0000:0000:0000:0000:ffff:7f00:0000'` + `print parse_ipv6_mask('fe80::85d:e82c:9446:7994', 120) == 'fe80:0000:0000:0000:085d:e82c:9446:7900'` + +# August 1, 2022 + +**The config setting to allow modify dialect setting**. + - Set dialect setting in server configuration XML at user level(` users.xml `). This sets the ` dialect ` at server startup and CH will do query parsing for all users with ` default ` profile according to dialect value. + + For example: + ` + + + random + kusto_auto + ` + + - Query can be executed with HTTP client as below once dialect is set in users.xml + ` echo "KQL query" | curl -sS "http://localhost:8123/?" --data-binary @- ` + + - To execute the query using clickhouse-client , Update clickhouse-client.xml as below and connect clickhouse-client with --config-file option (` clickhouse-client --config-file= `) + + ` + kusto_auto + ` + + OR + pass dialect setting with '--'. For example : + ` clickhouse-client --dialect='kusto_auto' -q "KQL query" ` + +- **strcmp** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcmpfunction) + `print strcmp('abc','ABC')` + +- **parse_url** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlfunction) + `print Result = parse_url('scheme://username:password@www.google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')` + +- **parse_urlquery** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parseurlqueryfunction) + `print Result = parse_urlquery('k1=v1&k2=v2&k3=v3')` + +- **print operator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/printoperator) + `print x=1, s=strcat('Hello', ', ', 'World!')` + +- **Aggregate Functions:** + - [make_list()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelist-aggfunction) + `Customers | summarize t = make_list(FirstName) by FirstName` + `Customers | summarize t = make_list(FirstName, 10) by FirstName` + - [make_list_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makelistif-aggfunction) + `Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName` + - [make_list_with_nulls()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/make-list-with-nulls-aggfunction) + `Customers | summarize t = make_list_with_nulls(Age) by FirstName` + - [make_set()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makeset-aggfunction) + `Customers | summarize t = make_set(FirstName) by FirstName` + `Customers | summarize t = make_set(FirstName, 10) by FirstName` + - [make_set_if()](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/makesetif-aggfunction) + `Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName` + `Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName` + +## IP functions + +- **The following functions now support arbitrary expressions as their argument:** + - [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + - [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + - [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + +# July 17, 2022 + +## Renamed dialect from sql_dialect to dialect + +`set dialect='clickhouse'` +`set dialect='kusto'` +`set dialect='kusto_auto'` + +## IP functions +- [parse_ipv4](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv4function) + `"Customers | project parse_ipv4('127.0.0.1')"` +- [parse_ipv6](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/parse-ipv6function) + `"Customers | project parse_ipv6('127.0.0.1')"` + +Please note that the functions listed below only take constant parameters for now. Further improvement is to be expected to support expressions. + +- [ipv4_is_private](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-privatefunction) + `"Customers | project ipv4_is_private('192.168.1.6/24')"` + `"Customers | project ipv4_is_private('192.168.1.6')"` +- [ipv4_is_in_range](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-is-in-range-function) + `"Customers | project ipv4_is_in_range('127.0.0.1', '127.0.0.1')"` + `"Customers | project ipv4_is_in_range('192.168.1.6', '192.168.1.1/24')"` +- [ipv4_netmask_suffix](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/ipv4-netmask-suffix-function) + `"Customers | project ipv4_netmask_suffix('192.168.1.1/24')"` + `"Customers | project ipv4_netmask_suffix('192.168.1.1')"` + +## string functions +- **support subquery for `in` orerator** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) + (subquery need to be wrapped with bracket inside bracket) + + `Customers | where Age in ((Customers|project Age|where Age < 30))` + Note: case-insensitive not supported yet +- **has_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator) + `Customers|where Occupation has_any ('Skilled','abcd')` + note : subquery not supported yet +- **has _any** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator) + `Customers|where Occupation has_all ('Skilled','abcd')` + note : subquery not supported yet +- **countof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) + `Customers | project countof('The cat sat on the mat', 'at')` + `Customers | project countof('The cat sat on the mat', 'at', 'normal')` + `Customers | project countof('The cat sat on the mat', 'at', 'regex')` +- **extract** ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20')` +`Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real))` + +- **extract_all** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction) + + `Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20')` + note: captureGroups not supported yet + +- **split** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) + `Customers | project split('aa_bb', '_')` + `Customers | project split('aaa_bbb_ccc', '_', 1)` + `Customers | project split('', '_')` + `Customers | project split('a__b', '_')` + `Customers | project split('aabbcc', 'bb')` + +- **strcat_delim** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction) + `Customers | project strcat_delim('-', '1', '2', 'A') , 1s)` + `Customers | project strcat_delim('-', '1', '2', strcat('A','b'))` + note: only support string now. + +- **indexof** (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) + `Customers | project indexof('abcdefg','cde')` + `Customers | project indexof('abcdefg','cde',2)` + `Customers | project indexof('abcdefg','cde',6)` + note: length and occurrence not supported yet + + + + +# July 4, 2022 + +## sql_dialect + +- default is `clickhouse` + `set sql_dialect='clickhouse'` +- only process kql + `set sql_dialect='kusto'` +- process both kql and CH sql + `set sql_dialect='kusto_auto'` +## KQL() function + + - create table + `CREATE TABLE kql_table4 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName,Age);` + verify the content of `kql_table` + `select * from kql_table` + + - insert into table + create a tmp table: + ``` + CREATE TABLE temp + ( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) + ) ENGINE = Memory; + ``` + `INSERT INTO temp select * from kql(Customers|project FirstName,LastName,Age);` + verify the content of `temp` + `select * from temp` + + - Select from kql() + `Select * from kql(Customers|project FirstName)` + +## KQL operators: + - Tabular expression statements + `Customers` + - Select Column + `Customers | project FirstName,LastName,Occupation` + - Limit returned results + `Customers | project FirstName,LastName,Occupation | take 1 | take 3` + - sort, order + `Customers | order by Age desc , FirstName asc` + - Filter + `Customers | where Occupation == 'Skilled Manual'` + - summarize + `Customers |summarize max(Age) by Occupation` + +## KQL string operators and functions + - contains + `Customers |where Education contains 'degree'` + - !contains + `Customers |where Education !contains 'degree'` + - contains_cs + `Customers |where Education contains 'Degree'` + - !contains_cs + `Customers |where Education !contains 'Degree'` + - endswith + `Customers | where FirstName endswith 'RE'` + - !endswith + `Customers | where !FirstName endswith 'RE'` + - endswith_cs + `Customers | where FirstName endswith_cs 're'` + - !endswith_cs + `Customers | where FirstName !endswith_cs 're'` + - == + `Customers | where Occupation == 'Skilled Manual'` + - != + `Customers | where Occupation != 'Skilled Manual'` + - has + `Customers | where Occupation has 'skilled'` + - !has + `Customers | where Occupation !has 'skilled'` + - has_cs + `Customers | where Occupation has 'Skilled'` + - !has_cs + `Customers | where Occupation !has 'Skilled'` + - hasprefix + `Customers | where Occupation hasprefix_cs 'Ab'` + - !hasprefix + `Customers | where Occupation !hasprefix_cs 'Ab'` + - hasprefix_cs + `Customers | where Occupation hasprefix_cs 'ab'` + - !hasprefix_cs + `Customers | where Occupation! hasprefix_cs 'ab'` + - hassuffix + `Customers | where Occupation hassuffix 'Ent'` + - !hassuffix + `Customers | where Occupation !hassuffix 'Ent'` + - hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - !hassuffix_cs + `Customers | where Occupation hassuffix 'ent'` + - in + `Customers |where Education in ('Bachelors','High School')` + - !in + `Customers | where Education !in ('Bachelors','High School')` + - matches regex + `Customers | where FirstName matches regex 'P.*r'` + - startswith + `Customers | where FirstName startswith 'pet'` + - !startswith + `Customers | where FirstName !startswith 'pet'` + - startswith_cs + `Customers | where FirstName startswith_cs 'pet'` + - !startswith_cs + `Customers | where FirstName !startswith_cs 'pet'` + + - base64_encode_tostring() + `Customers | project base64_encode_tostring('Kusto1') | take 1` + - base64_decode_tostring() + `Customers | project base64_decode_tostring('S3VzdG8x') | take 1` + - isempty() + `Customers | where isempty(LastName)` + - isnotempty() + `Customers | where isnotempty(LastName)` + - isnotnull() + `Customers | where isnotnull(FirstName)` + - isnull() + `Customers | where isnull(FirstName)` + - url_decode() + `Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1` + - url_encode() + `Customers | project url_encode('https://www.test.com/hello word') | take 1` + - substring() + `Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))` + - strcat() + `Customers | project name = strcat(FirstName, ' ', LastName)` + - strlen() + `Customers | project FirstName, strlen(FirstName)` + - strrep() + `Customers | project strrep(FirstName,2,'_')` + - toupper() + `Customers | project toupper(FirstName)` + - tolower() + `Customers | project tolower(FirstName)` + + ## Aggregate Functions + - arg_max() + - arg_min() + - avg() + - avgif() + - count() + - countif() + - max() + - maxif() + - min() + - minif() + - sum() + - sumif() + - dcount() + - dcountif() + - bin diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp new file mode 100644 index 000000000000..432e5131456e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -0,0 +1,387 @@ +#include "KQLFunctionFactory.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int SYNTAX_ERROR; +extern const int UNKNOWN_FUNCTION; +} + +namespace +{ +constexpr DB::TokenType determineClosingPair(const DB::TokenType token_type) +{ + if (token_type == DB::TokenType::OpeningCurlyBrace) + return DB::TokenType::ClosingCurlyBrace; + else if (token_type == DB::TokenType::OpeningRoundBracket) + return DB::TokenType::ClosingRoundBracket; + else if (token_type == DB::TokenType::OpeningSquareBracket) + return DB::TokenType::ClosingSquareBracket; + + throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Unhandled token: {}", magic_enum::enum_name(token_type)); +} + +constexpr bool isClosingBracket(const DB::TokenType token_type) +{ + return token_type == DB::TokenType::ClosingCurlyBrace || token_type == DB::TokenType::ClosingRoundBracket + || token_type == DB::TokenType::ClosingSquareBracket; +} + +constexpr bool isOpeningBracket(const DB::TokenType token_type) +{ + return token_type == DB::TokenType::OpeningCurlyBrace || token_type == DB::TokenType::OpeningRoundBracket + || token_type == DB::TokenType::OpeningSquareBracket; +} +} + +namespace DB +{ +bool IParserKQLFunction::convert(String & out, IParser::Pos & pos) +{ + return wrapConvertImpl( + pos, + IncreaseDepthTag{}, + [&] + { + bool res = convertImpl(out, pos); + if (!res) + out = ""; + return res; + }); +} + +bool IParserKQLFunction::directMapping( + String & out, IParser::Pos & pos, const std::string_view ch_fn, const Interval & argument_count_interval) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + out.append(ch_fn.data(), ch_fn.length()); + out.push_back('('); + + int argument_count = 0; + const auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos != begin) + out.append(", "); + + if (const auto argument = getOptionalArgument(fn_name, pos)) + { + ++argument_count; + out.append(*argument); + } + + if (pos->type == TokenType::ClosingRoundBracket) + { + if (!argument_count_interval.IsWithinBounds(argument_count)) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "{}: between {} and {} arguments are expected, but {} were provided", + fn_name, + argument_count_interval.Min(), + argument_count_interval.Max(), + argument_count); + + out.push_back(')'); + return true; + } + } + + out.clear(); + pos = begin; + return false; +} + +String IParserKQLFunction::generateUniqueIdentifier() +{ + // This particular random generator hits each number exactly once before looping over. + // Because of this, it's sufficient for queries consisting of up to 2^16 (= 65536) distinct function calls. + // Reference: https://www.pcg-random.org/using-pcg-cpp.html#insecure-generators + static pcg16_once_insecure random_generator; + return std::to_string(random_generator()); +} + +String IParserKQLFunction::getArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + if (auto optional_argument = getOptionalArgument(function_name, pos, argument_state)) + return std::move(*optional_argument); + + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Required argument was not provided in {}", function_name); +} + +std::vector IParserKQLFunction::getArguments( + const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state, const Interval & argument_count_interval) +{ + std::vector arguments; + while (auto argument = getOptionalArgument(function_name, pos, argument_state)) + { + arguments.push_back(std::move(*argument)); + } + if (!argument_count_interval.IsWithinBounds(static_cast(arguments.size()))) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "{}: between {} and {} arguments are expected, but {} were provided", + function_name, + argument_count_interval.Min(), + argument_count_interval.Max(), + arguments.size()); + + return arguments; +} + +String IParserKQLFunction::getConvertedArgument(const String & fn_name, IParser::Pos & pos) +{ + int32_t round_bracket_count = 0, square_bracket_count = 0; + if (pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + return {}; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Need more argument(s) in function: {}", fn_name); + + std::vector tokens; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++round_bracket_count; + if (pos->type == TokenType::ClosingRoundBracket) + --round_bracket_count; + + if (pos->type == TokenType::OpeningSquareBracket) + ++square_bracket_count; + if (pos->type == TokenType::ClosingSquareBracket) + --square_bracket_count; + + if (!KQLOperators::convert(tokens, pos)) + { + if (pos->type == TokenType::BareWord) + { + tokens.push_back(IParserKQLFunction::getExpression(pos)); + } + else if ( + pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket + || pos->type == TokenType::ClosingSquareBracket) + { + if (pos->type == TokenType::Comma) + break; + if (pos->type == TokenType::ClosingRoundBracket && round_bracket_count == -1) + break; + if (pos->type == TokenType::ClosingSquareBracket && square_bracket_count == 0) + break; + tokens.push_back(String(pos->begin, pos->end)); + } + else + { + String token; + if (pos->type == TokenType::QuotedIdentifier) + token = "'" + escapeSingleQuotes(String(pos->begin + 1, pos->end - 1)) + "'"; + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + String array_index; + while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + { + array_index += getExpression(pos); + ++pos; + } + if (Int64 index; (boost::conversion::try_lexical_convert(array_index, index))) + { + auto ch_index = index >= 0 ? index + 1 : index; + token = std::format("[{0}]", ch_index); + } + else + token = std::format("[ {0} >=0 ? {0} + 1 : {0}]", array_index); + } + else + token = String(pos->begin, pos->end); + + tokens.push_back(token); + } + } + + ++pos; + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket || pos->type == TokenType::ClosingSquareBracket) + { + if (pos->type == TokenType::Comma) + break; + if (pos->type == TokenType::ClosingRoundBracket && round_bracket_count == -1) + break; + if (pos->type == TokenType::ClosingSquareBracket && square_bracket_count == 0) + break; + } + } + + String converted_arg; + for (const auto & token : tokens) + converted_arg.append((converted_arg.empty() ? "" : " ") + token); + + return converted_arg; +} + +std::optional +IParserKQLFunction::getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, const ArgumentState argument_state) +{ + if (const auto type = pos->type; type != DB::TokenType::Comma && type != DB::TokenType::OpeningRoundBracket) + return {}; + + ++pos; + if (const auto type = pos->type; type == DB::TokenType::ClosingRoundBracket || type == DB::TokenType::ClosingSquareBracket) + return {}; + + if (argument_state == ArgumentState::Parsed) + return getConvertedArgument(function_name, pos); + + if (argument_state != ArgumentState::Raw) + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Argument extraction is not implemented for {}::{}", + magic_enum::enum_type_name(), + magic_enum::enum_name(argument_state)); + + const auto * begin = pos->begin; + std::stack scopes; + while (!pos->isEnd() && (!scopes.empty() || (pos->type != DB::TokenType::Comma && pos->type != DB::TokenType::ClosingRoundBracket))) + { + const auto token_type = pos->type; + if (isOpeningBracket(token_type)) + scopes.push(token_type); + else if (isClosingBracket(token_type)) + { + if (scopes.empty() || determineClosingPair(scopes.top()) != token_type) + throw Exception( + DB::ErrorCodes::SYNTAX_ERROR, "Unmatched token: {} when parsing {}", magic_enum::enum_name(token_type), function_name); + + scopes.pop(); + } + + ++pos; + } + + return std::string(begin, pos->begin); +} + +String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) +{ + String fn_name(pos->begin, pos->end); + ++pos; + if (pos->type != TokenType::OpeningRoundBracket) + { + --pos; + return ""; + } + return fn_name; +} + +String IParserKQLFunction::kqlCallToExpression( + const std::string_view function_name, const std::initializer_list params, const uint32_t max_depth) +{ + return kqlCallToExpression(function_name, std::span(params), max_depth); +} + +String IParserKQLFunction::kqlCallToExpression( + const std::string_view function_name, const std::span params, const uint32_t max_depth) +{ + const auto params_str = std::accumulate( + std::cbegin(params), + std::cend(params), + String(), + [](String acc, const std::string_view param) + { + if (!acc.empty()) + acc.append(", "); + + acc.append(param.data(), param.length()); + return acc; + }); + + const auto kql_call = std::format("{}({})", function_name, params_str); + DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); + DB::IParser::Pos tokens_pos(call_tokens, max_depth); + return DB::IParserKQLFunction::getExpression(tokens_pos); +} + +void IParserKQLFunction::validateEndOfFunction(const String & fn_name, IParser::Pos & pos) +{ + if (pos->type != TokenType::ClosingRoundBracket) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in function: {}", fn_name); +} + +String IParserKQLFunction::getExpression(IParser::Pos & pos) +{ + String arg(pos->begin, pos->end); + if (pos->type == TokenType::BareWord) + { + const auto fun = KQLFunctionFactory::get(arg); + if (String new_arg; fun && fun->convert(new_arg, pos)) + { + validateEndOfFunction(arg, pos); + arg = std::move(new_arg); + } + else + { + if (!fun) + { + ++pos; + if (pos->type == TokenType::OpeningRoundBracket) + { + if (Poco::toLower(arg) != "and" && Poco::toLower(arg) != "or") + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "{} is not a supported kusto function", arg); + } + --pos; + } + + if (std::optional ticks; ParserKQLTimespan::tryParse(extractTokenWithoutQuotes(pos), ticks) && ticks) + arg = kqlTicksToInterval(ticks); + } + } + else if (pos->type == TokenType::QuotedIdentifier) + arg = "'" + escapeSingleQuotes(String(pos->begin + 1, pos->end - 1)) + "'"; + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + String array_index; + while (!pos->isEnd() && pos->type != TokenType::ClosingSquareBracket) + { + array_index += getExpression(pos); + ++pos; + } + if (Int64 index; (boost::conversion::try_lexical_convert(array_index, index))) + { + auto ch_index = index >= 0 ? index + 1 : index; + arg = std::format("[{0}]", ch_index); + } + else + { + arg = std::format("[ {0} >=0 ? {0} + 1 : {0}]", array_index); + } + } + return arg; +} + +String IParserKQLFunction::escapeSingleQuotes(const String & input) +{ + String output; + for (const auto & ch : input) + { + if (ch == '\'') + output += ch; + output += ch; + } + return output; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h new file mode 100644 index 000000000000..147436551f97 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -0,0 +1,91 @@ +#pragma once + +#include + +#include + +namespace DB +{ +class Interval +{ +public: + using Representation = int; + + Interval(const Representation min_, const Representation max_) : max(max_), min(min_) { } + + Representation Max() const { return max; } + Representation Min() const { return min; } + bool IsWithinBounds(const Representation value) const { return min <= value && value <= max; } + + static constexpr auto max_bound = std::numeric_limits::max(); + static constexpr auto min_bound = std::numeric_limits::min(); + +private: + Representation max = max_bound; + Representation min = min_bound; +}; + +class IParserKQLFunction +{ +public: + enum class ArgumentState + { + Parsed, + Raw + }; + + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, const F & func) + { + IParser::Pos begin = pos; + bool res = func(); + if (!res) + pos = begin; + return res; + } + + struct IncreaseDepthTag + { + }; + + template + ALWAYS_INLINE static bool wrapConvertImpl(IParser::Pos & pos, IncreaseDepthTag, const F & func) + { + IParser::Pos begin = pos; + pos.increaseDepth(); + bool res = func(); + pos.decreaseDepth(); + if (!res) + pos = begin; + return res; + } + + bool convert(String & out, IParser::Pos & pos); + virtual const char * getName() const = 0; + virtual ~IParserKQLFunction() = default; + + static String generateUniqueIdentifier(); + static String getArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static std::vector getArguments( + const String & function_name, + DB::IParser::Pos & pos, + ArgumentState argument_state = ArgumentState::Parsed, + const Interval & argument_count_interval = {0, Interval::max_bound}); + static String getConvertedArgument(const String & fn_name, IParser::Pos & pos); + static String getExpression(IParser::Pos & pos); + static String getKQLFunctionName(IParser::Pos & pos); + static std::optional + getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); + static String + kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth); + static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth); + static String escapeSingleQuotes(const String & input); + +protected: + virtual bool convertImpl(String & out, IParser::Pos & pos) = 0; + + static bool directMapping( + String & out, IParser::Pos & pos, std::string_view ch_fn, const Interval & argument_count_interval = {0, Interval::max_bound}); + static void validateEndOfFunction(const String & fn_name, IParser::Pos & pos); +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp new file mode 100644 index 000000000000..0a21c5da031f --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.cpp @@ -0,0 +1,676 @@ +#include "KQLAggregationFunctions.h" +#include + +#include +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +extern const int BAD_ARGUMENTS; +} + +namespace +{ +void checkAccuracy(const std::optional & accuracy) +{ + if (accuracy && *accuracy != "4") + throw DB::Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "only accuracy of 4 is supported"); +} + +uint mapPrecisionAccuracy(const std::optional & accuracy) +{ + if (!accuracy) + return 14; //default accuracy is 1 + + if (*accuracy == "0") + return 12; + else if (*accuracy == "1") + return 14; + else if (*accuracy == "2") + return 16; + else if (*accuracy == "3") + return 17; + else if (*accuracy == "4") + return 18; + else + throw DB::Exception( + DB::ErrorCodes::BAD_ARGUMENTS, + "Accuracy argument must be a constant integer with value 0, 1, 2, 3 or 4 (0 = fast , 1 = default, 2 = accurate, 3 = extra accurate, 4 " + "= super accurate)"); +} +} + +namespace DB +{ +bool ArgMax::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + const auto args = getArguments(fn_name, pos, ArgumentState::Parsed, {2, Interval::max_bound}); + + for (const auto & expr_to_return : + args | std::views::drop(1) | std::views::filter([args](const auto & expr_to_return) { return expr_to_return != args[0]; })) + { + out += std::format("argMax({}, {}) as {},", expr_to_return, args[0], expr_to_return); + } + out += std::format("argMax({}, {})", args[0], args[0]); + + return true; +} + +bool ArgMin::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + const auto args = getArguments(fn_name, pos, ArgumentState::Parsed, {2, Interval::max_bound}); + for (const auto & expr_to_return : + args | std::views::drop(1) | std::views::filter([args](const auto & expr_to_return) { return expr_to_return != args[0]; })) + { + out += std::format("argMin({}, {}) as {},", expr_to_return, args[0], expr_to_return); + } + out += std::format("argMin({}, {})", args[0], args[0]); + + return true; +} + +bool Avg::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "avg"); +} + +bool AvgIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "avgIf"); +} + +bool BinaryAllAnd::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitAnd"); +} + +bool BinaryAllOr::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitOr"); +} + +bool BinaryAllXor::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "groupBitXor"); +} + +bool BuildSchema::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool Count::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "count"); +} + +bool CountIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "countIf"); +} + +bool DCount::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + const auto accuracy = getOptionalArgument(fn_name, pos); + + out = std::format("uniqCombined64({})({})", mapPrecisionAccuracy(accuracy), value); + return true; +} + +bool DCountIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + String value = getConvertedArgument(fn_name, pos); + ++pos; + String condition = getConvertedArgument(fn_name, pos); + + const auto accuracy = getOptionalArgument(fn_name, pos); + out = std::format("uniqCombined64If({})({},({}))", mapPrecisionAccuracy(accuracy), value, condition); + return true; +} + +bool DCountHll::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto expr = getArgument(fn_name, pos); + out = std::format("uniqCombined64Merge(18)({})", expr); + + return true; +} + +bool Hll::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto expr = getArgument(fn_name, pos); + const auto accuracy = getOptionalArgument(fn_name, pos); + + checkAccuracy(accuracy); + out = std::format("uniqCombined64State(18)({})", expr); + + return true; +} + +bool HllIf::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool HllMerge::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto arguments = getArguments(fn_name, pos, ArgumentState::Parsed, {2, 64}); + const auto arguments_as_string = std::accumulate( + arguments.cbegin(), + arguments.cend(), + std::string(), + [](const auto & acc, const auto & argument) { return acc + (acc.empty() ? "" : ", ") + argument; }); + + out = std::format("uniqCombined64MergeState(18)(arrayJoin([{}]))", arguments_as_string); + + return true; +} + +bool MakeBag::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool MakeBagIf::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool MakeList::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + expr + " IS NOT NULL)"; + } + else + out = "groupArrayIf(" + expr + " , " + expr + " IS NOT NULL)"; + return true; +} + +bool MakeListIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupArrayIf(" + max_size + ")(" + expr + " , " + predicate + " )"; + } + else + out = "groupArrayIf(" + expr + " , " + predicate + " )"; + return true; +} + +bool MakeListWithNulls::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto column_name = getConvertedArgument(fn_name, pos); + out = "arrayConcat(groupArray(" + column_name + "), arrayMap(x -> null, range(0, toUInt32(count(*)-length( groupArray(" + column_name + + "))),1)))"; + return true; +} + +bool MakeSet::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupUniqArray(" + max_size + ")(" + expr + ")"; + } + else + out = "groupUniqArray(" + expr + ")"; + return true; +} + +bool MakeSetIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + const auto max_size = getConvertedArgument(fn_name, pos); + out = "groupUniqArrayIf(" + max_size + ")(" + expr + " , " + predicate + " )"; + } + else + out = "groupUniqArrayIf(" + expr + " , " + predicate + " )"; + return true; +} + +bool Max::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "max"); +} + +bool MaxIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "maxIf"); +} + +bool Min::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "min"); +} + +bool MinIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "minIf"); +} + +bool Percentile::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + + ++pos; + String value = getConvertedArgument(fn_name, pos); + trim(value); + + out = "quantile(" + value + "/100)(" + column_name + ")"; + return true; +} + +bool Percentilew::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + ++pos; + String value = getConvertedArgument(fn_name, pos); + trim(value); + + out = "quantileExactWeighted(" + value + "/100)(" + bucket_column + "," + frequency_column + ")"; + return true; +} + +bool Percentiles::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + String expr = "quantiles("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + out = expr + ")(" + column_name + ")"; + return true; +} + +bool PercentilesArray::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String column_name = getConvertedArgument(fn_name, pos); + trim(column_name); + String expr = "quantiles("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket + && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if (pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + } + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + column_name + ")"; + out = expr; + return true; +} + +bool Percentilesw::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + String expr = "quantilesExactWeighted("; + String value; + + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + expr += ", "; + } + else + ++pos; + } + expr = expr + ")(" + bucket_column + "," + frequency_column + ")"; + out = expr; + return true; +} + +bool PercentileswArray::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String bucket_column = getConvertedArgument(fn_name, pos); + trim(bucket_column); + + ++pos; + String frequency_column = getConvertedArgument(fn_name, pos); + trim(frequency_column); + + String expr = "quantilesExactWeighted("; + String value; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos->type != TokenType::Comma && String(pos->begin, pos->end) != "dynamic" && pos->type != TokenType::OpeningRoundBracket + && pos->type != TokenType::OpeningSquareBracket && pos->type != TokenType::ClosingSquareBracket) + { + value = String(pos->begin, pos->end); + expr = expr + value + "/100"; + + if (pos->type != TokenType::Comma && pos->type != TokenType::OpeningRoundBracket && pos->type != TokenType::OpeningSquareBracket + && pos->type != TokenType::ClosingSquareBracket) + expr += ", "; + ++pos; + } + else + { + ++pos; + } + } + ++pos; + if (pos->type != TokenType::ClosingRoundBracket) + --pos; + + expr.pop_back(); + expr.pop_back(); + expr = expr + ")(" + bucket_column + "," + frequency_column + ")"; + out = expr; + return true; +} + +bool Stdev::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + out = "sqrt(varSamp(" + expr + "))"; + return true; +} + +bool StdevIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + out = "sqrt(varSampIf(" + expr + ", " + predicate + "))"; + return true; +} + +bool Sum::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sum"); +} + +bool SumIf::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sumIf"); +} + +bool TakeAny::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + String expr; + String arg; + const auto begin = pos; + while (pos->type != TokenType::ClosingRoundBracket) + { + if (pos != begin) + expr.append(", "); + ++pos; + arg = getConvertedArgument(fn_name, pos); + expr = expr + "any(" + arg + ")"; + } + out = expr; + return true; +} + +bool TakeAnyIf::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + ++pos; + const auto expr = getConvertedArgument(fn_name, pos); + if (pos->type != TokenType::Comma) + return false; + + ++pos; + const auto predicate = getConvertedArgument(fn_name, pos); + out = "anyIf(" + expr + ", " + predicate + ")"; + return true; +} + +bool Variance::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + out = std::format( + "IF (isNaN(varSamp(if(toTypeName({0}) = 'Nullable(Nothing)', throwIf(toTypeName({0}) = 'Nullable(Nothing)', " + "'summarize operator: Failed to resolve scalar expression named null'), {0})) AS variance_{1}), 0, variance_{1})", + expr, + generateUniqueIdentifier()); + + return true; +} + +bool VarianceIf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + const String predicate = getArgument(fn_name, pos); + out = std::format( + "IF (isNaN(varSampIf((if(toTypeName({0}) = 'Nullable(Nothing)', throwIf(toTypeName({0}) = 'Nullable(Nothing)', " + "'summarize operator: Failed to resolve scalar expression named null'), {0})), {1}) AS variance_{2}), 0, variance_{2})", + expr, + predicate, + generateUniqueIdentifier()); + + return true; +} + +bool VarianceP::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + out = std::format( + "IF (isNaN(varPop(if(toTypeName({0}) = 'Nullable(Nothing)', throwIf(toTypeName({0}) = 'Nullable(Nothing)', " + "'summarize operator: Failed to resolve scalar expression named null'), {0})) AS variance_{1}), 0, variance_{1})", + expr, + generateUniqueIdentifier()); + + return true; +} + +bool CountDistinct::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + out = std::format("count(DISTINCT {})", expr); + + return true; +} + + +bool CountDistinctIf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const String expr = getArgument(fn_name, pos); + const String predicate = getArgument(fn_name, pos); + out = std::format("countIf(DISTINCT {}, {})", expr, predicate); + + return true; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h new file mode 100644 index 000000000000..d8fbe7a9cfe3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLAggregationFunctions.h @@ -0,0 +1,316 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ArgMax : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_max()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ArgMin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "arg_min()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Avg : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avg()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class AvgIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "avgif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class BinaryAllAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_and()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class BinaryAllOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_or()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class BinaryAllXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_all_xor()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class BuildSchema : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "buildschema()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Count : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class CountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class DCount : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class DCountIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcountif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class DCountHll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dcount_hll()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Hll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hll()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HllIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hll_if()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + + +class HllMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hll_merge()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeBag : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeBagIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_bag_if()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeList : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeListIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_if()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeListWithNulls : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_list_with_nulls()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeSet : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeSetIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_set_if()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Max : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MaxIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "maxif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Min : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MinIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "minif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Percentile : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentile()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Percentilew : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilew()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Percentiles : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class PercentilesArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentiles_array()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Percentilesw : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class PercentileswArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "percentilesw_array()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Stdev : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdev()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StdevIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "stdevif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sum()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class SumIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sumif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class TakeAny : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_any()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class TakeAnyIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "take_anyif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Variance : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variance()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class VarianceIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "varianceif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class VarianceP : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "variancep()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class CountDistinct : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count_distinct()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class CountDistinctIf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "count_distinctif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp new file mode 100644 index 000000000000..5779f0ed3708 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.cpp @@ -0,0 +1,84 @@ +#include "KQLBinaryFunctions.h" + +#include + +namespace DB +{ + +bool BinaryAnd::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitAnd(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BinaryNot::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + out = std::format("bitNot(cast({0}, 'Int64'))", value); + return true; +} + +bool BinaryOr::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitOr(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BinaryShiftLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftLeft(cast({0}, 'Int64'), {1}))", value, count); + return true; +} + +bool BinaryShiftRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto value = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format("if({1} < 0, null, bitShiftRight(cast({0}, 'Int64'), {1}))", value, count); + return true; +} + +bool BinaryXor::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + out = std::format("bitXor(cast({0}, 'Int64'), cast({1}, 'Int64'))", lhs, rhs); + return true; +} + +bool BitsetCountOnes::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "bitCount"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h new file mode 100644 index 000000000000..591c0fd236e6 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLBinaryFunctions.h @@ -0,0 +1,57 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class BinaryAnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_and()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryNot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_not()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryOr : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_or()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BinaryXor : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "binary_xor()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BitsetCountOnes : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bitset_count_ones()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp new file mode 100644 index 000000000000..5891ee1fd9e5 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -0,0 +1,102 @@ +#include "KQLCastingFunctions.h" + +#include + +namespace DB +{ +bool ToBool::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format( + "multiIf(toString({0}) = 'true', true, " + "toString({0}) = 'false', false, toInt64OrNull(toString({0})) != 0)", + param); + return true; +} + +bool ToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_todatetime"); +} + +bool ToDouble::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("toFloat64OrNull(toString({0})) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", param); + return true; +} + +bool ToInt::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto param = getArgument(function_name, pos); + out = std::format("intDiv(toInt32OrNull(toString({0})), if(toTypeName({0}) = 'IntervalNanosecond', 100, 1))", param); + return true; +} + +bool ToLong::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_tolong"); +} + +bool ToString::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format("ifNull(kql_tostring({}), '')", argument); + return true; +} + +bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_totimespan"); +} + +bool ToDecimal::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral || pos->type == TokenType::Number) + { + --pos; + const auto arg = getArgument(fn_name, pos); + const auto scale = std::format( + "if (position({0}::String,'e') = 0,(countSubstrings({0}::String,'.') = 1 ? length(substr({0}::String, " + "position({0}::String,'.') + 1)): 0), toUInt64(multiIf ((position({0}::String,'e+') as x) > 0, substr({0}::String, x + " + "2),(position({0}::String, 'e-') as y) > 0, substr({0}::String, y + 2), position({0}::String, 'e-') = 0 AND " + "position({0}::String, 'e+') =0 AND position({0}::String, 'e') > 0,substr({0}::String, position({0}::String, 'e') + 1), " + "0::String)))", + arg); + out = std::format( + "toTypeName({0}) = 'String' OR toTypeName({0}) = 'FixedString' ? toDecimal128OrNull({0}::String , ({1}::UInt8)) : " + "toDecimal128OrNull({0}::String , ({1}::UInt8))", + arg, + scale); + } + else + { + --pos; + const auto arg = getArgument(fn_name, pos); + out = std::format("toDecimal128OrNull({0}::Nullable(String), 17) / if(toTypeName({0}) = 'IntervalNanosecond', 100, 1)", arg); + } + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h new file mode 100644 index 000000000000..72d5602dfcb3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.h @@ -0,0 +1,62 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ToBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tobool()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todatetime()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDouble : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todouble()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toint()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolong()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tostring()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "totimespan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "todecimal()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp new file mode 100644 index 000000000000..d044b3db7497 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.cpp @@ -0,0 +1,216 @@ +#include "KQLDataTypeFunctions.h" + +#include +#include + +#include +#include + +#include +#include +#include + +namespace DB::ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int SYNTAX_ERROR; +} + +namespace +{ +bool mapToAccurateCast(std::string & out, DB::IParser::Pos & pos, const std::string_view type_name) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + ++pos; + if (const auto & type = pos->type; type == DB::TokenType::QuotedIdentifier || type == DB::TokenType::StringLiteral) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "String cannot be parsed as a literal of type {}", type_name); + + --pos; + + const auto arg = DB::IParserKQLFunction::getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) = 'IntervalNanosecond' or isNull(accurateCastOrNull({0}, '{1}')) != isNull({0}), " + "accurateCastOrNull(throwIf(true, 'Failed to parse {1} literal'), '{1}'), accurateCastOrNull({0}, '{1}'))", + arg, + type_name); + + return true; +} +} + +namespace DB +{ +bool DatatypeBool::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Bool"); +} + +bool DatatypeDatetime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto argument = extractLiteralArgumentWithoutQuotes(fn_name, pos); + const auto mutated_argument = std::invoke( + [&argument] + { + if (Int64 value; (boost::conversion::try_lexical_convert(argument, value) && (value < 1900 || value > 2261)) + || Poco::toLower(argument) == "null") + return argument; + + return "'" + argument + "'"; + }); + + out = std::format("kql_datetime({})", mutated_argument); + return true; +} + +bool DatatypeDynamic::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::unordered_set ALLOWED_FUNCTIONS{"date", "datetime", "dynamic", "time", "timespan"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::OpeningCurlyBrace) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Property bags are not supported for now in {}", function_name); + + while (!pos->isEnd() && pos->type != TokenType::ClosingRoundBracket) + { + if (const auto token_type = pos->type; token_type == TokenType::BareWord || token_type == TokenType::Number + || token_type == TokenType::QuotedIdentifier || token_type == TokenType::StringLiteral) + { + if (const std::string_view token(pos->begin, pos->end); token_type == TokenType::BareWord && !ALLOWED_FUNCTIONS.contains(token)) + { + ++pos; + if (pos->type != TokenType::ClosingRoundBracket && pos->type != TokenType::ClosingSquareBracket + && pos->type != TokenType::Comma) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Expression {} is not supported inside {}", token, function_name); + + --pos; + } + + out.append(getConvertedArgument(function_name, pos)); + } + else + { + out.append(pos->begin, pos->end); + ++pos; + } + } + + return true; +} + +bool DatatypeGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String guid_str; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + guid_str = String(pos->begin + 1, pos->end - 1); + else + { + auto start = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + ++pos; + if (pos->type == TokenType::ClosingRoundBracket) + break; + } + --pos; + guid_str = String(start->begin, pos->end); + } + out = std::format("toUUIDOrNull('{}')", guid_str); + ++pos; + return true; +} + +bool DatatypeInt::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Int32"); +} + +bool DatatypeLong::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Int64"); +} + +bool DatatypeReal::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToAccurateCast(out, pos, "Float64"); +} + +bool DatatypeTimespan::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto argument = extractLiteralArgumentWithoutQuotes(fn_name, pos); + const auto ticks = ParserKQLTimespan::parse(argument); + out = kqlTicksToInterval(ticks); + + return true; +} + +bool DatatypeDecimal::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String arg; + int scale = 0; + int precision = 34; + + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); + + --pos; + arg = getArgument(fn_name, pos); + + //NULL expr returns NULL not exception + static const std::regex expr{"^[0-9]+e[+-]?[0-9]+"}; + bool is_string = std::any_of(arg.begin(), arg.end(), ::isalpha) && Poco::toUpper(arg) != "NULL" && !(std::regex_match(arg, expr)); + if (is_string) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); + + if (std::regex_match(arg, expr)) + { + auto exponential_pos = arg.find('e'); + if (arg[exponential_pos + 1] == '+' || arg[exponential_pos + 1] == '-') + scale = std::stoi(arg.substr(exponential_pos + 2, arg.length())); + else + scale = std::stoi(arg.substr(exponential_pos + 1, arg.length())); + + out = std::format("toDecimal128({}::String,{})", arg, scale); + return true; + } + + if (const auto dot_pos = arg.find('.'); dot_pos != String::npos) + { + const auto length = static_cast(std::ssize(arg.substr(0, dot_pos - 1))); + scale = std::max(precision - length, 0); + } + if (is_string) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse String as decimal Literal: {}", fn_name); + + if (scale < 0 || Poco::toUpper(arg) == "NULL") + out = "NULL"; + else + out = std::format("toDecimal128({}::String,{})", arg, scale); + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h new file mode 100644 index 000000000000..f2a5013ed1f8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDataTypeFunctions.h @@ -0,0 +1,69 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class DatatypeBool : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bool(),boolean()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDatetime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime(),date()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "guid()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeInt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "int()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeLong : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "long()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeReal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "real(),double()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeTimespan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "timespan(), time()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatatypeDecimal : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "decimal()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp new file mode 100644 index 000000000000..55ddfdc39e25 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -0,0 +1,568 @@ +#include "KQLDateTimeFunctions.h" + +#include + +#include + +#include +#include +#include +#include + +namespace +{ + +bool mapToEndOfPeriod(std::string & out, DB::IParser::Pos & pos, const std::string_view period) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto datetime = DB::IParserKQLFunction::getArgument(function_name, pos, DB::IParserKQLFunction::ArgumentState::Raw); + const auto offset = DB::IParserKQLFunction::getOptionalArgument(function_name, pos, DB::IParserKQLFunction::ArgumentState::Raw); + out = std::format( + "minus({}, {})", + DB::IParserKQLFunction::kqlCallToExpression( + std::format("startof{}", Poco::toLower(std::string(period))), + {datetime, std::format("{} + 1", offset.value_or("0"))}, + pos.max_depth), + DB::IParserKQLFunction::kqlCallToExpression("timespan", {"1tick"}, pos.max_depth)); + return true; +} + +bool mapToStartOfPeriod(std::string & out, DB::IParser::Pos & pos, const std::string_view period) +{ + const auto function_name = DB::IParserKQLFunction::getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto datetime = DB::IParserKQLFunction::getArgument(function_name, pos); + const auto offset = DB::IParserKQLFunction::getOptionalArgument(function_name, pos); + out = std::format("kql_todatetime(add{0}s(toStartOf{0}({1}), {2}))", period, datetime, offset.value_or("0")); + return true; +} +} + +namespace DB::ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int SYNTAX_ERROR; +extern const int LOGICAL_ERROR; +} + +namespace DB +{ +bool Ago::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto offset = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = kqlCallToExpression( + "now", {std::format("-1 * {}", offset.value_or(kqlCallToExpression("timespan", {"0"}, pos.max_depth)))}, pos.max_depth); + return true; +} + +bool DatetimeAdd::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + auto period = getArgument(fn_name, pos); + //remove quotes from period. + trim(period); + if (period.front() == '\"' || period.front() == '\'') + { + //period.remove + period.erase(0, 1); // erase the first quote + period.erase(period.size() - 1); // erase the last quote + } + + const auto offset = getArgument(fn_name, pos); + const auto datetime = getArgument(fn_name, pos); + + out = std::format("kql_datetime(date_add({}, {}, {}))", period, offset, datetime); + return true; +}; + +bool DatetimePart::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String part = Poco::toUpper(getConvertedArgument(fn_name, pos)); + trim(part); + if (part.front() == '\"' || part.front() == '\'') + { + //period.remove + part.erase(0, 1); // erase the first quote + part.erase(part.size() - 1); // erase the last quote + } + String date; + if (pos->type == TokenType::Comma) + { + ++pos; + date = getConvertedArgument(fn_name, pos); + } + String format; + String head; + String trail; + + if (part == "YEAR") + format = "%G"; + else if (part == "QUARTER") + format = "%Q"; + else if (part == "MONTH") + format = "%m"; + else if (part == "WEEK_OF_YEAR") + format = "%V"; + else if (part == "DAY") + format = "%e"; + else if (part == "DAYOFYEAR") + format = "%j"; + else if (part == "HOUR") + format = "%I"; + else if (part == "MINUTE") + format = "%i"; + else if (part == "SECOND") + format = "%S"; + else if (part == "MILLISECOND") + { + format = "%f"; + head = "intDivOrZero(toInt64("; + trail = "), 1000000)"; + } + else if (part == "MICROSECOND") + { + format = "%f"; + head = "intDivOrZero(toInt64("; + trail = "), 1000)"; + } + else if (part == "NANOSECOND") + { + format = "%f"; + } + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected argument {} for {}", part, fn_name); + + out = std::format("{}formatDateTime({}, '{}'){}", head, date, format, trail); + return true; +} + +bool DatetimeDiff::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + String period = Poco::toUpper(getArgument(fn_name, pos)); + trim(period); + if (period.front() == '\"' || period.front() == '\'') + { + //period.remove + period.erase(0, 1); // erase the first quote + period.erase(period.size() - 1); // erase the last quote + } + const auto datetime_lhs = getArgument(fn_name, pos); + const auto datetime_rhs = getArgument(fn_name, pos); + out = std::format("dateDiff({}, {}, {})", period, datetime_rhs, datetime_lhs); + if (period == "MILLISECOND") + { + out = std::format("toInt64(multiply(minus(toDecimal128({}, 3), toDecimal128({}, 3)), 1000))", datetime_lhs, datetime_rhs); + } + else if (period == "MICROSECOND") + { + out = std::format("toInt64(multiply(minus(toDecimal128({}, 6), toDecimal128({}, 6)), 1000000))", datetime_lhs, datetime_rhs); + } + else if (period == "NANOSECOND") + { + out = std::format("toInt64(multiply(minus(toDecimal128({}, 9), toDecimal128({}, 9)), 1000000000))", datetime_lhs, datetime_rhs); + } + return true; +} + +bool DayOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toDayOfMonth"); +} + +bool DayOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto datetime = getArgument(fn_name, pos); + out = std::format("(toDayOfWeek({}) % 7) * {}", datetime, kqlCallToExpression("timespan", {"1d"}, pos.max_depth)); + + return true; +} + +bool DayOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toDayOfYear"); +} + +bool EndOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Month"); +} + +bool EndOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Day"); +} + +bool EndOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Week"); +} + +bool EndOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToEndOfPeriod(out, pos, "Year"); +} + +bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String formatspecifier; + ++pos; + const auto datetime = getConvertedArgument(fn_name, pos); + ++pos; + auto format = getConvertedArgument(fn_name, pos); + trim(format); + + //remove quotes and end space from format argument. + if (format.front() == '\"' || format.front() == '\'') + { + format.erase(0, 1); // erase the first quote + format.erase(format.size() - 1); // erase the last quote + } + + std::vector res; + getTokens(format, res); + std::string::size_type i = 0; + size_t decimal = 0; + while (i < format.size()) + { + char c = format[i]; + if (!isalpha(c)) + { + //delimiter + if (c == ' ' || c == '-' || c == '_' || c == '[' || c == ']' || c == '/' || c == ',' || c == '.' || c == ':') + formatspecifier = formatspecifier + c; + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid format delimiter in function: {}", fn_name); + ++i; + } + else + { + //format specifier + String arg = res.back(); + + if (arg == "y" || arg == "yy") + formatspecifier = formatspecifier + "%y"; + else if (arg == "yyyy") + formatspecifier = formatspecifier + "%Y"; + else if (arg == "M" || arg == "MM") + formatspecifier = formatspecifier + "%m"; + else if (arg == "s" || arg == "ss") + formatspecifier = formatspecifier + "%S"; + else if (arg == "m" || arg == "mm") + formatspecifier = formatspecifier + "%i"; + else if (arg == "h" || arg == "hh") + formatspecifier = formatspecifier + "%I"; + else if (arg == "H" || arg == "HH") + formatspecifier = formatspecifier + "%H"; + else if (arg == "d") + formatspecifier = formatspecifier + "%e"; + else if (arg == "dd") + formatspecifier = formatspecifier + "%d"; + else if (arg == "tt") + formatspecifier = formatspecifier + "%p"; + else if (arg.starts_with('f') || arg.starts_with('F')) + decimal = arg.size(); + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Format specifier {} in function: {} is not supported", arg, fn_name); + res.pop_back(); + i = i + arg.size(); + } + } + if (decimal > 0 && formatspecifier.find('.') != String::npos) + { + out = std::format( + "concat(" + "substring(toString(formatDateTime({0}, '{1}')), 1, position(toString(formatDateTime({0}, '{1}')), '.')) ," + "substring(substring(toString({0}), position(toString({0}),'.')+1),1,{2})," + "substring(toString(formatDateTime({0}, '{1}')), position(toString(formatDateTime({0}, '{1}')), '.') + 1, " + "length(toString(formatDateTime({0}, '{1}')))))", + datetime, + formatspecifier, + decimal); + } + else + out = std::format("formatDateTime({0}, '{1}')", datetime, formatspecifier); + + return true; +} + +bool FormatTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::unordered_set ALLOWED_DELIMITERS{' ', '/', '-', ':', ',', '.', '_', '[', ']'}; + static const std::unordered_map, bool, int, std::optional>> + ATTRIBUTES_BY_FORMAT_CHARACTER{ + {'d', {"1d", std::nullopt, false, 8, "leftPad"}}, + {'f', {"1tick", 10'000'000, true, 7, "rightPad"}}, + {'F', {"1tick", 10'000'000, true, 7, std::nullopt}}, + {'h', {"1h", 24, false, 2, "leftPad"}}, + {'H', {"1h", 24, false, 2, "leftPad"}}, + {'m', {"1m", 60, false, 2, "leftPad"}}, + {'s', {"1s", 60, false, 2, "leftPad"}}}; + + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto timespan = getArgument(fn_name, pos); + const auto format = getArgument(fn_name, pos); + if (std::ssize(format) < 3 || format.front() != format.back() || format.front() != '\'') + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected non-empty string literal as the second argument to {}", fn_name); + + std::string current_streak; + std::string delimited_parts; + const auto convert_streak = [¤t_streak, ×pan, &delimited_parts, &pos] + { + while (!current_streak.empty()) + { + if (!delimited_parts.empty()) + delimited_parts.append(", "); + + const auto attributes_it = ATTRIBUTES_BY_FORMAT_CHARACTER.find(current_streak.front()); + if (attributes_it == ATTRIBUTES_BY_FORMAT_CHARACTER.cend()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format character: {}", current_streak.front()); + + const auto & [timespan_unit, modulus, should_truncate, max_length, pad_function] = attributes_it->second; + const auto streak_length = std::ssize(current_streak); + const auto part_length = std::min(streak_length, static_cast(max_length)); + current_streak.erase(current_streak.cbegin(), current_streak.cbegin() + part_length); + + auto expression = std::format("intDiv({}, {})", timespan, kqlCallToExpression("timespan", {timespan_unit}, pos.max_depth)); + expression = std::format("toString({})", modulus ? std::format("modulo({}, {})", expression, *modulus) : expression); + if (should_truncate) + expression = std::format("substring({}, 1, {})", expression, part_length); + + delimited_parts.append( + pad_function ? std::format("if(length({1}) < {2}, {0}({1}, {2}, '0'), {1})", *pad_function, expression, part_length) + : expression); + } + }; + + for (const auto & c : std::string_view(format.cbegin() + 1, format.cend() - 1)) + { + if (ALLOWED_DELIMITERS.contains(c)) + { + convert_streak(); + delimited_parts.append(std::format(", '{}'", c)); + } + else if (ATTRIBUTES_BY_FORMAT_CHARACTER.contains(c)) + { + if (!current_streak.empty() && current_streak.back() != c) + convert_streak(); + + current_streak.push_back(c); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected character '{}' in format string of {}", c, fn_name); + } + + convert_streak(); + out = "concat(" + delimited_parts + ", '')"; + return true; +} + +bool GetMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toMonth"); +} + +bool GetYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toYear"); +} + +bool HoursOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toHour"); +} + +bool MakeTimeSpan::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto arg1 = getArgument(fn_name, pos); + const auto arg2 = getArgument(fn_name, pos); + const auto arg3 = getOptionalArgument(fn_name, pos); + const auto arg4 = getOptionalArgument(fn_name, pos); + + const auto & [day, hour, minute, second] + = std::invoke([&arg1, &arg2, &arg3, &arg4] + { return arg4 ? std::make_tuple(arg1, arg2, *arg3, *arg4) : std::make_tuple("0", arg1, arg2, arg3.value_or("0")); }); + + out = std::format( + "{} * {} + {} * {} + {} * {} + {} * {}", + day, + kqlCallToExpression("timespan", {"1d"}, pos.max_depth), + hour, + kqlCallToExpression("timespan", {"1h"}, pos.max_depth), + minute, + kqlCallToExpression("timespan", {"1m"}, pos.max_depth), + second, + kqlCallToExpression("timespan", {"1s"}, pos.max_depth)); + + return true; +} + +bool MakeDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto year = getArgument(fn_name, pos); + const auto month = getArgument(fn_name, pos); + const auto day = getArgument(fn_name, pos); + const auto hour = getOptionalArgument(fn_name, pos); + const auto minute = getOptionalArgument(fn_name, pos); + const auto second = getOptionalArgument(fn_name, pos); + out = std::format( + "if({0} between 1900 and 2261 and {1} between 1 and 12 and {3} between 0 and 59 and {4} between 0 and 59 and {5} >= 0 and {5} < 60 " + " and isNotNull(toModifiedJulianDayOrNull(concat(leftPad(toString({0}), 4, '0'), '-', leftPad(toString({1}), 2, '0'), '-', " + "leftPad(toString({2}), 2, '0')))), " + "toDateTime64OrNull(toString(makeDateTime64({0}, {1}, {2}, {3}, {4}, truncate({5}), ({5} - truncate({5})) * 1e7, 7, 'UTC')), 9, " + "'UTC'), " + "null)", + year, + month, + day, + hour.value_or("0"), + minute.value_or("0"), + second.value_or("0")); + + return true; +} + +bool Now::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto offset = getOptionalArgument(fn_name, pos); + out = "now64(9, 'UTC')" + (offset ? " + " + *offset : ""); + + return true; +} + +bool StartOfDay::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Day"); +} + +bool StartOfMonth::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Month"); +} + +bool StartOfWeek::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Week"); +} + +bool StartOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return mapToStartOfPeriod(out, pos, "Year"); +} + +bool UnixTimeMicrosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Micro({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeMillisecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Milli({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeNanosecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto value = getArgument(fn_name, pos); + out = std::format("kql_todatetime(fromUnixTimestamp64Nano({}, 'UTC'))", value); + + return true; +} + +bool UnixTimeSecondsToDateTime::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} accepts only long, int and double type of arguments", fn_name); + + const auto expression = getConvertedArgument(fn_name, pos); + out = std::format( + "if(toTypeName(assumeNotNull({0})) in ['Int32', 'Int64', 'Float64', 'UInt32', 'UInt64'], " + "kql_todatetime({0}), kql_todatetime(throwIf(true, '{1} only accepts int, long and double type of arguments')))", + expression, + fn_name); + + return true; +} + +bool WeekOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String time_str = getConvertedArgument(fn_name, pos); + out = std::format("toWeek({},3,'UTC')", time_str); + return true; +} + +bool MonthOfYear::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "toMonth"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h new file mode 100644 index 000000000000..35b99795faaa --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.h @@ -0,0 +1,235 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Ago : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ago()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeAdd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_add()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimePart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_part()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DatetimeDiff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "datetime_diff()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class DayOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "dayofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class EndOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "endofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class FormatTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class GetYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "getyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class HoursOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hourofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeTimeSpan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_timespan()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MakeDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_datetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Now : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "now()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfDay : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofday()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfMonth : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofmonth()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfWeek : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofweek()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class StartOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "startofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMicrosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_microseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeMillisecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_milliseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeNanosecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_nanoseconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class UnixTimeSecondsToDateTime : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "unixtime_seconds_todatetime()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class WeekOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "week_of_year()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class MonthOfYear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "monthofyear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +void inline getTokens(String format, std::vector & res) +{ + String str = format; + String token; + auto pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + while (pos != String::npos) + { + if (pos != 0) + { + // Found a token + token = str.substr(0, pos); + res.insert(res.begin(), token); + } + str.erase(0, pos+1); // Always remove pos+1 to get rid of delimiter + pos = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzQWERTYUIOPASDFGHJKLZXCVBNM"); + } + // Cover the last (or only) token + if (str.length() > 0) + { + token = str; + res.insert(res.begin(), token); + } +} + +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp new file mode 100644 index 000000000000..ff521a845f21 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -0,0 +1,346 @@ +#include "KQLDynamicFunctions.h" + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int SYNTAX_ERROR; +} + +bool ArrayConcat::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayConcat"); +} + +bool ArrayIif::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_ArrayIif"); +} + +bool ArrayIndexOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto array = getArgument(fn_name, pos); + const auto needle = getArgument(fn_name, pos); + out = "minus(indexOf(" + array + ", " + needle + "), 1)"; + + return true; +} + +bool ArrayLength::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + out = std::format("arrayLastIndex(x -> true, {0})", array); + + return true; +} + +bool ArrayReverse::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayReverse"); +} + +bool ArrayRotateLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + out = std::format( + "arrayMap(x -> {0}[moduloOrZero(x + length({0}) + moduloOrZero({1}, toInt64(length({0}))), length({0})) + 1], " + "range(0, length({0})))", + array, + count); + + return true; +} + +bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos, ArgumentState::Raw); + const auto count = getArgument(function_name, pos, ArgumentState::Raw); + out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth); + + return true; +} + +bool ArrayShiftLeft::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto count = getArgument(function_name, pos); + const auto fill = getOptionalArgument(function_name, pos); + out = std::format( + "arrayResize(if({1} > 0, arraySlice({0}, {1} + 1), arrayConcat(arrayWithConstant(abs({1}), fill_value_{3}), {0})), " + "length({0}), if(isNull({2}) and (extract(toTypeName({0}), 'Array\\((.*)\\)') as element_type_{3}) = 'String', " + "defaultValueOfTypeName(if(element_type_{3} = 'Nothing', 'Nullable(Nothing)', element_type_{3})), {2}) as fill_value_{3})", + array, + count, + fill.value_or("null"), + generateUniqueIdentifier()); + + return true; +} + +bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos, ArgumentState::Raw); + const auto count = getArgument(function_name, pos, ArgumentState::Raw); + const auto fill = getOptionalArgument(function_name, pos, ArgumentState::Raw); + + const auto negated_count = "-1 * " + count; + out = kqlCallToExpression( + "array_shift_left", + fill ? std::initializer_list{array, negated_count, *fill} + : std::initializer_list{array, negated_count}, + pos.max_depth); + + return true; +} + +bool ArraySlice::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto start = getArgument(function_name, pos); + const auto end = getArgument(function_name, pos); + + out = std::format( + "arraySlice({0}, plus(1, if({1} >= 0, {1}, arrayMax([-length({0}), {1}]) + length({0}))) as offset_{3}, " + " plus(1, if({2} >= 0, {2}, arrayMax([-length({0}), {2}]) + length({0}))) - offset_{3} + 1)", + array, + start, + end, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySortAsc::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_array_sort_asc"); +} + +bool ArraySortDesc::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_array_sort_desc"); +} + +bool ArraySplit::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto array = getArgument(function_name, pos); + const auto indices = getArgument(function_name, pos); + + out = std::format( + "if(empty(arrayMap(x -> if(x >= 0, x, arrayMax([0, x + length({0})::Int64])), flatten([{1}])) as indices_{2}), [{0}], " + "arrayConcat([arraySlice({0}, 1, indices_{2}[1])], arrayMap(i -> arraySlice({0}, indices_{2}[i] + 1, " + "if(i = length(indices_{2}), length({0})::Int64, indices_{2}[i + 1]::Int64) - indices_{2}[i]), " + "range(1, length(indices_{2}) + 1))))", + array, + indices, + generateUniqueIdentifier()); + + return true; +} + +bool ArraySum::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> isNotNull(x), {0}))), " + "'Array\\((.*)\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), " + "arraySum(x -> toFloat64OrDefault(x), {0}), null)", + argument, + generateUniqueIdentifier()); + + return true; +} + +bool BagKeys::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool BagMerge::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool BagRemoveKeys::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format( + "divide(length({0}), length({1}))", + kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth), + kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth)); + + return true; +} + +bool Pack::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool PackAll::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool PackArray::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "array", {1, Interval::max_bound}); +} + +bool Repeat::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + String value = getArgument(function_name, pos); + String count = getArgument(function_name, pos); + + value.erase(remove(value.begin(), value.end(), ' '), value.end()); + count.erase(remove(count.begin(), count.end(), ' '), count.end()); + + if (count.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "number of arguments do not match in function: {}", function_name); + else + out = "if(" + count + " < 0, [NULL], " + std::format("arrayWithConstant(abs({1}), {0}))", value, count); + + return true; +} + +bool SetDifference::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = std::invoke( + [&function_name, &pos] + { + std::vector arrays{getArgument(function_name, pos, ArgumentState::Raw)}; + while (auto next_array = getOptionalArgument(function_name, pos, ArgumentState::Raw)) + arrays.push_back(*next_array); + + return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth); + }); + + out = std::format("arrayFilter(x -> not has({1}, x), arrayDistinct({0}))", lhs, rhs); + + return true; +} + +bool SetHasElement::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "has"); +} + +bool SetIntersect::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "arrayIntersect"); +} + +bool SetUnion::convertImpl(String & out, IParser::Pos & pos) +{ + if (!directMapping(out, pos, "arrayConcat")) + return false; + + out = std::format("arrayDistinct({0})", out); + + return true; +} + +bool TreePath::convertImpl([[maybe_unused]] String & out, [[maybe_unused]] IParser::Pos & pos) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not yet implemented", getName()); +} + +bool Zip::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arguments = getArguments(function_name, pos, ArgumentState::Parsed, {2, 16}); + const auto unique_identifier = generateUniqueIdentifier(); + const auto resized_arguments = std::invoke( + [&arguments, &unique_identifier] + { + String lengths; + for (int i = 0; i < std::ssize(arguments); ++i) + { + lengths.append(i > 0 ? ", " : ""); + lengths.append(std::format( + "length(if(match(toTypeName({0}), 'Array\\(Nullable\\(.*\\)\\)'), {0}, " + "cast({0}, concat('Array(', extract(toTypeName(if(length({0}) = 0, [NULL], {0})), 'Array\\((.*)\\)'), ')'))) as " + "arg{1}_{2})", + arguments[i], + i, + unique_identifier)); + } + + auto result = std::format("arrayResize(arg0_{1}, arrayMax([{0}]) as max_length_{1}, null)", lengths, unique_identifier); + for (int i = 1; i < std::ssize(arguments); ++i) + result.append(std::format(", arrayResize(arg{0}_{1}, max_length_{1}, null)", i, unique_identifier)); + + return result; + }); + + out = std::format("arrayMap(t -> [untuple(t)], arrayZip({0}))", resized_arguments); + + return true; +} + +bool Range::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_range"); +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h new file mode 100644 index 000000000000..553ed57332c8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.h @@ -0,0 +1,210 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class ArrayConcat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_concat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_iif()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayIndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_index_of()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayLength : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_length()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayReverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_reverse()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayRotateRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_rotate_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftLeft : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_left()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArrayShiftRight : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_shift_right()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySlice : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_slice()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortAsc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_asc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySortDesc : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sort_desc()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySplit : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_split()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class ArraySum : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "array_sum()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagMerge : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_merge()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class BagRemoveKeys : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bag_remove_keys()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class JaccardIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "jaccard_index()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Pack : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_all()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class PackArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pack_array()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Repeat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "repeat()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetDifference : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_difference()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetHasElement : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_has_element()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetIntersect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_intersect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SetUnion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "set_union()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class TreePath : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "treepath()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Zip : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "zip()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class Range : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "range()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp new file mode 100644 index 000000000000..6c650b554870 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.cpp @@ -0,0 +1,1321 @@ +#include "KQLFunctionFactory.h" +#include "KQLAggregationFunctions.h" +#include "KQLBinaryFunctions.h" +#include "KQLCastingFunctions.h" +#include "KQLDataTypeFunctions.h" +#include "KQLDateTimeFunctions.h" +#include "KQLDynamicFunctions.h" +#include "KQLGeneralFunctions.h" +#include "KQLHashFunctions.h" +#include "KQLIPFunctions.h" +#include "KQLMathematicalFunctions.h" +#include "KQLStringFunctions.h" +#include "KQLTimeSeriesFunctions.h" + +#include + +namespace +{ +enum class KQLFunction : uint16_t +{ + none, + ago, + datetime_add, + datetime_part, + datetime_diff, + dayofmonth, + dayofweek, + dayofyear, + endofday, + endofweek, + endofyear, + endofmonth, + monthofyear, + format_datetime, + format_timespan, + getmonth, + getyear, + hourofday, + make_timespan, + make_datetime, + now, + startofday, + startofmonth, + startofweek, + startofyear, + todatetime, + totimespan, + unixtime_microseconds_todatetime, + unixtime_milliseconds_todatetime, + unixtime_nanoseconds_todatetime, + unixtime_seconds_todatetime, + week_of_year, + + base64_encode_tostring, + base64_encode_fromguid, + base64_decode_tostring, + base64_decode_toarray, + base64_decode_toguid, + countof, + extract, + extract_all, + extract_json, + has_any_index, + indexof, + indexof_regex, + isascii, + isempty, + isnan, + isnotempty, + isnotnull, + isutf8, + isnull, + make_string, + new_guid, + parse_command_line, + parse_csv, + parse_json, + parse_url, + parse_urlquery, + parse_version, + replace_regex, + reverse, + split, + strcat, + strcat_delim, + strcmp, + string_size, + strlen, + strrep, + substring, + tolower, + toupper, + to_utf8, + translate, + trim, + trim_end, + trim_start, + url_decode, + url_encode, + + array_concat, + array_iif, + array_index_of, + array_length, + array_reverse, + array_rotate_left, + array_rotate_right, + array_shift_left, + array_shift_right, + array_slice, + array_sort_asc, + array_sort_desc, + array_split, + array_sum, + bag_keys, + bag_merge, + bag_remove_keys, + jaccard_index, + pack, + pack_all, + pack_array, + repeat, + set_difference, + set_has_element, + set_intersect, + set_union, + treepath, + zip, + + tobool, + todouble, + toint, + tolong, + toscalar, + tostring, + todecimal, + + arg_max, + arg_min, + avg, + avgif, + binary_all_and, + binary_all_or, + binary_all_xor, + buildschema, + count, + countif, + dcount, + dcountif, + dcount_hll, + hll, + hll_if, + hll_merge, + make_bag, + make_bag_if, + make_list, + make_list_if, + make_list_with_nulls, + make_set, + make_set_if, + max, + maxif, + min, + minif, + percentile, + percentilew, + percentiles, + percentiles_array, + percentilesw, + percentilesw_array, + stdev, + stdevif, + sum, + sumif, + take_any, + take_anyif, + variance, + varianceif, + variancep, + count_distinct, + count_distinctif, + + series_fir, + series_iir, + series_fit_line, + series_fit_line_dynamic, + series_fit_2lines, + series_fit_2lines_dynamic, + series_outliers, + series_periods_detect, + series_periods_validate, + series_stats_dynamic, + series_stats, + series_fill_backward, + series_fill_const, + series_fill_forward, + series_fill_linear, + + ipv4_compare, + ipv4_is_in_range, + ipv4_is_match, + ipv4_is_private, + ipv4_netmask_suffix, + parse_ipv4, + parse_ipv4_mask, + has_ipv6, + has_any_ipv6, + has_ipv6_prefix, + has_any_ipv6_prefix, + ipv6_compare, + ipv6_is_match, + parse_ipv6, + parse_ipv6_mask, + format_ipv4, + format_ipv4_mask, + has_ipv4, + has_any_ipv4, + has_ipv4_prefix, + has_any_ipv4_prefix, + + binary_and, + binary_not, + binary_or, + binary_shift_left, + binary_shift_right, + binary_xor, + bitset_count_ones, + + bin, + bin_at, + kase, + iff, + iif, + lookup, + gettype, + not_, + + datatype_bool, + datatype_datetime, + datatype_dynamic, + datatype_guid, + datatype_int, + datatype_long, + datatype_real, + datatype_timespan, + datatype_decimal, + range, + + abs, + acos, + asin, + atan, + atan2, + ceiling, + cos, + cot, + degrees, + exp, + exp2, + exp10, + gamma, + isfinite, + isinf, + log, + log2, + log10, + loggamma, + max_of, + min_of, + pi, + pow, + radians, + rand, + round, + sign, + sin, + sqrt, + tan, + + hash, + hash_sha256 +}; + +const std::unordered_map KQL_FUNCTIONS{ + {"ago", KQLFunction::ago}, + {"datetime_add", KQLFunction::datetime_add}, + {"datetime_part", KQLFunction::datetime_part}, + {"datetime_diff", KQLFunction::datetime_diff}, + {"dayofmonth", KQLFunction::dayofmonth}, + {"dayofweek", KQLFunction::dayofweek}, + {"dayofyear", KQLFunction::dayofyear}, + {"endofday", KQLFunction::endofday}, + {"endofweek", KQLFunction::endofweek}, + {"endofyear", KQLFunction::endofyear}, + {"endofmonth", KQLFunction::endofmonth}, + + {"format_datetime", KQLFunction::format_datetime}, + {"format_timespan", KQLFunction::format_timespan}, + {"getmonth", KQLFunction::getmonth}, + {"getyear", KQLFunction::getyear}, + {"hourofday", KQLFunction::hourofday}, + {"make_timespan", KQLFunction::make_timespan}, + {"make_datetime", KQLFunction::make_datetime}, + {"now", KQLFunction::now}, + {"startofday", KQLFunction::startofday}, + {"startofmonth", KQLFunction::startofmonth}, + {"startofweek", KQLFunction::startofweek}, + {"startofyear", KQLFunction::startofyear}, + {"todatetime", KQLFunction::todatetime}, + {"totimespan", KQLFunction::totimespan}, + {"unixtime_microseconds_todatetime", KQLFunction::unixtime_microseconds_todatetime}, + {"unixtime_milliseconds_todatetime", KQLFunction::unixtime_milliseconds_todatetime}, + {"unixtime_nanoseconds_todatetime", KQLFunction::unixtime_nanoseconds_todatetime}, + {"unixtime_seconds_todatetime", KQLFunction::unixtime_seconds_todatetime}, + {"week_of_year", KQLFunction::week_of_year}, + {"monthofyear", KQLFunction::monthofyear}, + {"base64_encode_tostring", KQLFunction::base64_encode_tostring}, + {"base64_encode_fromguid", KQLFunction::base64_encode_fromguid}, + {"base64_decode_tostring", KQLFunction::base64_decode_tostring}, + {"base64_decode_toarray", KQLFunction::base64_decode_toarray}, + {"base64_decode_toguid", KQLFunction::base64_decode_toguid}, + {"countof", KQLFunction::countof}, + {"extract", KQLFunction::extract}, + {"extract_all", KQLFunction::extract_all}, + {"extract_json", KQLFunction::extract_json}, + {"extractjson", KQLFunction::extract_json}, + {"has_any_index", KQLFunction::has_any_index}, + {"indexof", KQLFunction::indexof}, + {"indexof_regex", KQLFunction::indexof_regex}, + {"isempty", KQLFunction::isempty}, + {"isascii", KQLFunction::isascii}, + {"isnan", KQLFunction::isnan}, + {"isnotempty", KQLFunction::isnotempty}, + {"notempty", KQLFunction::isnotempty}, + {"isnotnull", KQLFunction::isnotnull}, + {"notnull", KQLFunction::isnotnull}, + {"isutf8", KQLFunction::isutf8}, + {"isnull", KQLFunction::isnull}, + {"make_string", KQLFunction::make_string}, + {"new_guid", KQLFunction::new_guid}, + {"parse_command_line", KQLFunction::parse_command_line}, + {"parse_csv", KQLFunction::parse_csv}, + {"parse_json", KQLFunction::parse_json}, + {"parse_url", KQLFunction::parse_url}, + {"parse_urlquery", KQLFunction::parse_urlquery}, + {"parse_version", KQLFunction::parse_version}, + {"replace_regex", KQLFunction::replace_regex}, + {"reverse", KQLFunction::reverse}, + {"split", KQLFunction::split}, + {"strcat", KQLFunction::strcat}, + {"strcat_delim", KQLFunction::strcat_delim}, + {"strcmp", KQLFunction::strcmp}, + {"string_size", KQLFunction::string_size}, + {"strlen", KQLFunction::strlen}, + {"strrep", KQLFunction::strrep}, + {"substring", KQLFunction::substring}, + {"tolower", KQLFunction::tolower}, + {"toupper", KQLFunction::toupper}, + {"to_utf8", KQLFunction::to_utf8}, + {"translate", KQLFunction::translate}, + {"trim", KQLFunction::trim}, + {"trim_end", KQLFunction::trim_end}, + {"trim_start", KQLFunction::trim_start}, + {"url_decode", KQLFunction::url_decode}, + {"url_encode", KQLFunction::url_encode}, + + {"array_concat", KQLFunction::array_concat}, + {"array_iff", KQLFunction::array_iif}, + {"array_iif", KQLFunction::array_iif}, + {"array_index_of", KQLFunction::array_index_of}, + {"array_length", KQLFunction::array_length}, + {"array_reverse", KQLFunction::array_reverse}, + {"array_rotate_left", KQLFunction::array_rotate_left}, + {"array_rotate_right", KQLFunction::array_rotate_right}, + {"array_shift_left", KQLFunction::array_shift_left}, + {"array_shift_right", KQLFunction::array_shift_right}, + {"array_slice", KQLFunction::array_slice}, + {"array_sort_asc", KQLFunction::array_sort_asc}, + {"array_sort_desc", KQLFunction::array_sort_desc}, + {"array_split", KQLFunction::array_split}, + {"array_sum", KQLFunction::array_sum}, + {"bag_keys", KQLFunction::bag_keys}, + {"bag_merge", KQLFunction::bag_merge}, + {"bag_remove_keys", KQLFunction::bag_remove_keys}, + {"jaccard_index", KQLFunction::jaccard_index}, + {"pack", KQLFunction::pack}, + {"pack_all", KQLFunction::pack_all}, + {"pack_array", KQLFunction::pack_array}, + {"repeat", KQLFunction::repeat}, + {"set_difference", KQLFunction::set_difference}, + {"set_has_element", KQLFunction::set_has_element}, + {"set_intersect", KQLFunction::set_intersect}, + {"set_union", KQLFunction::set_union}, + {"treepath", KQLFunction::treepath}, + {"zip", KQLFunction::zip}, + + {"tobool", KQLFunction::tobool}, + {"toboolean", KQLFunction::tobool}, + {"todouble", KQLFunction::todouble}, + {"toint", KQLFunction::toint}, + {"tolong", KQLFunction::tolong}, + {"toreal", KQLFunction::todouble}, + {"toscalar", KQLFunction::toscalar}, + {"tostring", KQLFunction::tostring}, + {"totimespan", KQLFunction::totimespan}, + {"todecimal", KQLFunction::todecimal}, + + {"arg_max", KQLFunction::arg_max}, + {"arg_min", KQLFunction::arg_min}, + {"avg", KQLFunction::avg}, + {"avgif", KQLFunction::avgif}, + {"binary_all_and", KQLFunction::binary_all_and}, + {"binary_all_or", KQLFunction::binary_all_or}, + {"binary_all_xor", KQLFunction::binary_all_xor}, + {"buildschema", KQLFunction::buildschema}, + {"count", KQLFunction::count}, + {"countif", KQLFunction::countif}, + {"dcount", KQLFunction::dcount}, + {"dcountif", KQLFunction::dcountif}, + {"dcount_hll", KQLFunction::dcount_hll}, + {"hll", KQLFunction::hll}, + {"hll_if", KQLFunction::hll_if}, + {"hll_merge", KQLFunction::hll_merge}, + {"make_bag", KQLFunction::make_bag}, + {"make_bag_if", KQLFunction::make_bag_if}, + {"make_list", KQLFunction::make_list}, + {"make_list_if", KQLFunction::make_list_if}, + {"make_list_with_nulls", KQLFunction::make_list_with_nulls}, + {"make_set", KQLFunction::make_set}, + {"make_set_if", KQLFunction::make_set_if}, + {"max", KQLFunction::max}, + {"maxif", KQLFunction::maxif}, + {"min", KQLFunction::min}, + {"minif", KQLFunction::minif}, + {"percentile", KQLFunction::percentile}, + {"percentilew", KQLFunction::percentilew}, + {"percentiles", KQLFunction::percentiles}, + {"percentiles_array", KQLFunction::percentiles_array}, + {"percentilesw", KQLFunction::percentilesw}, + {"percentilesw_array", KQLFunction::percentilesw_array}, + {"stdev", KQLFunction::stdev}, + {"stdevif", KQLFunction::stdevif}, + {"sum", KQLFunction::sum}, + {"sumif", KQLFunction::sumif}, + {"take_any", KQLFunction::take_any}, + {"take_anyif", KQLFunction::take_anyif}, + {"variance", KQLFunction::variance}, + {"varianceif", KQLFunction::varianceif}, + {"variancep", KQLFunction::variancep}, + {"count_distinct", KQLFunction::count_distinct}, + {"count_distinctif", KQLFunction::count_distinctif}, + + {"series_fir", KQLFunction::series_fir}, + {"series_iir", KQLFunction::series_iir}, + {"series_fit_line", KQLFunction::series_fit_line}, + {"series_fit_line_dynamic", KQLFunction::series_fit_line_dynamic}, + {"series_fit_2lines", KQLFunction::series_fit_2lines}, + {"series_fit_2lines_dynamic", KQLFunction::series_fit_2lines_dynamic}, + {"series_outliers", KQLFunction::series_outliers}, + {"series_periods_detect", KQLFunction::series_periods_detect}, + {"series_periods_validate", KQLFunction::series_periods_validate}, + {"series_stats_dynamic", KQLFunction::series_stats_dynamic}, + {"series_stats", KQLFunction::series_stats}, + {"series_fill_backward", KQLFunction::series_fill_backward}, + {"series_fill_const", KQLFunction::series_fill_const}, + {"series_fill_forward", KQLFunction::series_fill_forward}, + {"series_fill_linear", KQLFunction::series_fill_linear}, + + {"ipv4_compare", KQLFunction::ipv4_compare}, + {"ipv4_is_in_range", KQLFunction::ipv4_is_in_range}, + {"ipv4_is_match", KQLFunction::ipv4_is_match}, + {"ipv4_is_private", KQLFunction::ipv4_is_private}, + {"ipv4_netmask_suffix", KQLFunction::ipv4_netmask_suffix}, + {"parse_ipv4", KQLFunction::parse_ipv4}, + {"parse_ipv4_mask", KQLFunction::parse_ipv4_mask}, + {"ipv6_compare", KQLFunction::ipv6_compare}, + {"ipv6_is_match", KQLFunction::ipv6_is_match}, + {"parse_ipv6", KQLFunction::parse_ipv6}, + {"parse_ipv6_mask", KQLFunction::parse_ipv6_mask}, + {"format_ipv4", KQLFunction::format_ipv4}, + {"format_ipv4_mask", KQLFunction::format_ipv4_mask}, + {"has_ipv4", KQLFunction::has_ipv4}, + {"has_any_ipv4", KQLFunction::has_any_ipv4}, + {"has_ipv4_prefix", KQLFunction::has_ipv4_prefix}, + {"has_any_ipv4_prefix", KQLFunction::has_any_ipv4_prefix}, + {"has_ipv6", KQLFunction::has_ipv6}, + {"has_any_ipv6", KQLFunction::has_any_ipv6}, + {"has_ipv6_prefix", KQLFunction::has_ipv6_prefix}, + {"has_any_ipv6_prefix", KQLFunction::has_any_ipv6_prefix}, + + {"binary_and", KQLFunction::binary_and}, + {"binary_not", KQLFunction::binary_not}, + {"binary_or", KQLFunction::binary_or}, + {"binary_shift_left", KQLFunction::binary_shift_left}, + {"binary_shift_right", KQLFunction::binary_shift_right}, + {"binary_xor", KQLFunction::binary_xor}, + {"bitset_count_ones", KQLFunction::bitset_count_ones}, + + {"bin", KQLFunction::bin}, + {"floor", KQLFunction::bin}, + {"bin_at", KQLFunction::bin_at}, + {"case", KQLFunction::kase}, + {"iff", KQLFunction::iff}, + {"iif", KQLFunction::iif}, + {"lookup", KQLFunction::lookup}, + {"gettype", KQLFunction::gettype}, + {"not", KQLFunction::not_}, + + {"bool", KQLFunction::datatype_bool}, + {"boolean", KQLFunction::datatype_bool}, + {"datetime", KQLFunction::datatype_datetime}, + {"date", KQLFunction::datatype_datetime}, + {"dynamic", KQLFunction::datatype_dynamic}, + {"guid", KQLFunction::datatype_guid}, + {"int", KQLFunction::datatype_int}, + {"long", KQLFunction::datatype_long}, + {"real", KQLFunction::datatype_real}, + {"double", KQLFunction::datatype_real}, + {"timespan", KQLFunction::datatype_timespan}, + {"time", KQLFunction::datatype_timespan}, + {"decimal", KQLFunction::datatype_decimal}, + {"range", KQLFunction::range}, + + {"abs", KQLFunction::abs}, + {"acos", KQLFunction::acos}, + {"asin", KQLFunction::asin}, + {"atan", KQLFunction::atan}, + {"atan2", KQLFunction::atan2}, + {"ceiling", KQLFunction::ceiling}, + {"cos", KQLFunction::cos}, + {"cot", KQLFunction::cot}, + {"degrees", KQLFunction::degrees}, + {"exp", KQLFunction::exp}, + {"exp2", KQLFunction::exp2}, + {"exp10", KQLFunction::exp10}, + {"gamma", KQLFunction::gamma}, + {"isfinite", KQLFunction::isfinite}, + {"isinf", KQLFunction::isinf}, + {"log", KQLFunction::log}, + {"log2", KQLFunction::log2}, + {"log10", KQLFunction::log10}, + {"loggamma", KQLFunction::loggamma}, + {"max_of", KQLFunction::max_of}, + {"min_of", KQLFunction::min_of}, + {"pi", KQLFunction::pi}, + {"pow", KQLFunction::pow}, + {"radians", KQLFunction::radians}, + {"rand", KQLFunction::rand}, + {"round", KQLFunction::round}, + {"sign", KQLFunction::sign}, + {"sin", KQLFunction::sin}, + {"sqrt", KQLFunction::sqrt}, + {"tan", KQLFunction::tan}, + + {"hash", KQLFunction::hash}, + {"hash_sha256", KQLFunction::hash_sha256}}; +} + +namespace DB +{ +std::unique_ptr KQLFunctionFactory::get(const String & kql_function) +{ + const auto kql_function_it = KQL_FUNCTIONS.find(kql_function); + if (kql_function_it == KQL_FUNCTIONS.end()) + return nullptr; + + const auto & kql_function_id = kql_function_it->second; + switch (kql_function_id) + { + case KQLFunction::none: + return nullptr; + + case KQLFunction::ago: + return std::make_unique(); + + case KQLFunction::datetime_add: + return std::make_unique(); + + case KQLFunction::datetime_part: + return std::make_unique(); + + case KQLFunction::datetime_diff: + return std::make_unique(); + + case KQLFunction::dayofmonth: + return std::make_unique(); + + case KQLFunction::dayofweek: + return std::make_unique(); + + case KQLFunction::dayofyear: + return std::make_unique(); + + case KQLFunction::endofday: + return std::make_unique(); + + case KQLFunction::endofweek: + return std::make_unique(); + + case KQLFunction::endofyear: + return std::make_unique(); + + case KQLFunction::endofmonth: + return std::make_unique(); + + case KQLFunction::monthofyear: + return std::make_unique(); + + case KQLFunction::format_datetime: + return std::make_unique(); + + case KQLFunction::format_timespan: + return std::make_unique(); + + case KQLFunction::getmonth: + return std::make_unique(); + + case KQLFunction::getyear: + return std::make_unique(); + + case KQLFunction::hourofday: + return std::make_unique(); + + case KQLFunction::make_timespan: + return std::make_unique(); + + case KQLFunction::make_datetime: + return std::make_unique(); + + case KQLFunction::now: + return std::make_unique(); + + case KQLFunction::startofday: + return std::make_unique(); + + case KQLFunction::startofmonth: + return std::make_unique(); + + case KQLFunction::startofweek: + return std::make_unique(); + + case KQLFunction::startofyear: + return std::make_unique(); + + case KQLFunction::unixtime_microseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_milliseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_nanoseconds_todatetime: + return std::make_unique(); + + case KQLFunction::unixtime_seconds_todatetime: + return std::make_unique(); + + case KQLFunction::week_of_year: + return std::make_unique(); + + case KQLFunction::base64_encode_tostring: + return std::make_unique(); + + case KQLFunction::base64_encode_fromguid: + return std::make_unique(); + + case KQLFunction::base64_decode_tostring: + return std::make_unique(); + + case KQLFunction::base64_decode_toarray: + return std::make_unique(); + + case KQLFunction::base64_decode_toguid: + return std::make_unique(); + + case KQLFunction::countof: + return std::make_unique(); + + case KQLFunction::extract: + return std::make_unique(); + + case KQLFunction::extract_all: + return std::make_unique(); + + case KQLFunction::extract_json: + return std::make_unique(); + + case KQLFunction::has_any_index: + return std::make_unique(); + + case KQLFunction::indexof: + return std::make_unique(); + + case KQLFunction::indexof_regex: + return std::make_unique(); + + case KQLFunction::isascii: + return std::make_unique(); + + case KQLFunction::isempty: + return std::make_unique(); + + case KQLFunction::isnan: + return std::make_unique(); + + case KQLFunction::isnotempty: + return std::make_unique(); + + case KQLFunction::isnotnull: + return std::make_unique(); + + case KQLFunction::isutf8: + return std::make_unique(); + + case KQLFunction::isnull: + return std::make_unique(); + + case KQLFunction::make_string: + return std::make_unique(); + + case KQLFunction::new_guid: + return std::make_unique(); + + case KQLFunction::parse_command_line: + return std::make_unique(); + + case KQLFunction::parse_csv: + return std::make_unique(); + + case KQLFunction::parse_json: + return std::make_unique(); + + case KQLFunction::parse_url: + return std::make_unique(); + + case KQLFunction::parse_urlquery: + return std::make_unique(); + + case KQLFunction::parse_version: + return std::make_unique(); + + case KQLFunction::replace_regex: + return std::make_unique(); + + case KQLFunction::reverse: + return std::make_unique(); + + case KQLFunction::split: + return std::make_unique(); + + case KQLFunction::strcat: + return std::make_unique(); + + case KQLFunction::strcat_delim: + return std::make_unique(); + + case KQLFunction::strcmp: + return std::make_unique(); + + case KQLFunction::string_size: + return std::make_unique(); + + case KQLFunction::strlen: + return std::make_unique(); + + case KQLFunction::strrep: + return std::make_unique(); + + case KQLFunction::substring: + return std::make_unique(); + + case KQLFunction::tolower: + return std::make_unique(); + + case KQLFunction::toupper: + return std::make_unique(); + + case KQLFunction::to_utf8: + return std::make_unique(); + + case KQLFunction::translate: + return std::make_unique(); + + case KQLFunction::trim: + return std::make_unique(); + + case KQLFunction::trim_end: + return std::make_unique(); + + case KQLFunction::trim_start: + return std::make_unique(); + + case KQLFunction::url_decode: + return std::make_unique(); + + case KQLFunction::url_encode: + return std::make_unique(); + + case KQLFunction::array_concat: + return std::make_unique(); + + case KQLFunction::array_iif: + return std::make_unique(); + + case KQLFunction::array_index_of: + return std::make_unique(); + + case KQLFunction::array_length: + return std::make_unique(); + + case KQLFunction::array_reverse: + return std::make_unique(); + + case KQLFunction::array_rotate_left: + return std::make_unique(); + + case KQLFunction::array_rotate_right: + return std::make_unique(); + + case KQLFunction::array_shift_left: + return std::make_unique(); + + case KQLFunction::array_shift_right: + return std::make_unique(); + + case KQLFunction::array_slice: + return std::make_unique(); + + case KQLFunction::array_sort_asc: + return std::make_unique(); + + case KQLFunction::array_sort_desc: + return std::make_unique(); + + case KQLFunction::array_split: + return std::make_unique(); + + case KQLFunction::array_sum: + return std::make_unique(); + + case KQLFunction::bag_keys: + return std::make_unique(); + + case KQLFunction::bag_merge: + return std::make_unique(); + + case KQLFunction::bag_remove_keys: + return std::make_unique(); + + case KQLFunction::jaccard_index: + return std::make_unique(); + + case KQLFunction::pack: + return std::make_unique(); + + case KQLFunction::pack_all: + return std::make_unique(); + + case KQLFunction::pack_array: + return std::make_unique(); + + case KQLFunction::repeat: + return std::make_unique(); + + case KQLFunction::set_difference: + return std::make_unique(); + + case KQLFunction::set_has_element: + return std::make_unique(); + + case KQLFunction::set_intersect: + return std::make_unique(); + + case KQLFunction::set_union: + return std::make_unique(); + + case KQLFunction::treepath: + return std::make_unique(); + + case KQLFunction::zip: + return std::make_unique(); + + case KQLFunction::tobool: + return std::make_unique(); + + case KQLFunction::todatetime: + return std::make_unique(); + + case KQLFunction::todouble: + return std::make_unique(); + + case KQLFunction::toint: + return std::make_unique(); + + case KQLFunction::tolong: + return std::make_unique(); + + case KQLFunction::toscalar: + return std::make_unique(); + + case KQLFunction::tostring: + return std::make_unique(); + + case KQLFunction::totimespan: + return std::make_unique(); + + case KQLFunction::todecimal: + return std::make_unique(); + + case KQLFunction::arg_max: + return std::make_unique(); + + case KQLFunction::arg_min: + return std::make_unique(); + + case KQLFunction::avg: + return std::make_unique(); + + case KQLFunction::avgif: + return std::make_unique(); + + case KQLFunction::binary_all_and: + return std::make_unique(); + + case KQLFunction::binary_all_or: + return std::make_unique(); + + case KQLFunction::binary_all_xor: + return std::make_unique(); + + case KQLFunction::buildschema: + return std::make_unique(); + + case KQLFunction::count: + return std::make_unique(); + + case KQLFunction::countif: + return std::make_unique(); + + case KQLFunction::dcount: + return std::make_unique(); + + case KQLFunction::dcountif: + return std::make_unique(); + + case KQLFunction::dcount_hll: + return std::make_unique(); + + case KQLFunction::hll: + return std::make_unique(); + + case KQLFunction::hll_if: + return std::make_unique(); + + case KQLFunction::hll_merge: + return std::make_unique(); + + case KQLFunction::make_bag: + return std::make_unique(); + + case KQLFunction::make_bag_if: + return std::make_unique(); + + case KQLFunction::make_list: + return std::make_unique(); + + case KQLFunction::make_list_if: + return std::make_unique(); + + case KQLFunction::make_list_with_nulls: + return std::make_unique(); + + case KQLFunction::make_set: + return std::make_unique(); + + case KQLFunction::make_set_if: + return std::make_unique(); + + case KQLFunction::max: + return std::make_unique(); + + case KQLFunction::maxif: + return std::make_unique(); + + case KQLFunction::min: + return std::make_unique(); + + case KQLFunction::minif: + return std::make_unique(); + + case KQLFunction::percentile: + return std::make_unique(); + + case KQLFunction::percentilew: + return std::make_unique(); + + case KQLFunction::percentiles: + return std::make_unique(); + + case KQLFunction::percentiles_array: + return std::make_unique(); + + case KQLFunction::percentilesw: + return std::make_unique(); + + case KQLFunction::percentilesw_array: + return std::make_unique(); + + case KQLFunction::stdev: + return std::make_unique(); + + case KQLFunction::stdevif: + return std::make_unique(); + + case KQLFunction::sum: + return std::make_unique(); + + case KQLFunction::sumif: + return std::make_unique(); + + case KQLFunction::take_any: + return std::make_unique(); + + case KQLFunction::take_anyif: + return std::make_unique(); + + case KQLFunction::variance: + return std::make_unique(); + + case KQLFunction::varianceif: + return std::make_unique(); + + case KQLFunction::variancep: + return std::make_unique(); + + case KQLFunction::count_distinct: + return std::make_unique(); + + case KQLFunction::count_distinctif: + return std::make_unique(); + + case KQLFunction::series_fir: + return std::make_unique(); + + case KQLFunction::series_iir: + return std::make_unique(); + + case KQLFunction::series_fit_line: + return std::make_unique(); + + case KQLFunction::series_fit_line_dynamic: + return std::make_unique(); + + case KQLFunction::series_fit_2lines: + return std::make_unique(); + + case KQLFunction::series_fit_2lines_dynamic: + return std::make_unique(); + + case KQLFunction::series_outliers: + return std::make_unique(); + + case KQLFunction::series_periods_detect: + return std::make_unique(); + + case KQLFunction::series_periods_validate: + return std::make_unique(); + + case KQLFunction::series_stats_dynamic: + return std::make_unique(); + + case KQLFunction::series_stats: + return std::make_unique(); + + case KQLFunction::series_fill_backward: + return std::make_unique(); + + case KQLFunction::series_fill_const: + return std::make_unique(); + + case KQLFunction::series_fill_forward: + return std::make_unique(); + + case KQLFunction::series_fill_linear: + return std::make_unique(); + + case KQLFunction::ipv4_compare: + return std::make_unique(); + + case KQLFunction::ipv4_is_in_range: + return std::make_unique(); + + case KQLFunction::ipv4_is_match: + return std::make_unique(); + + case KQLFunction::ipv4_is_private: + return std::make_unique(); + + case KQLFunction::ipv4_netmask_suffix: + return std::make_unique(); + + case KQLFunction::parse_ipv4: + return std::make_unique(); + + case KQLFunction::parse_ipv4_mask: + return std::make_unique(); + + case KQLFunction::has_ipv6: + return std::make_unique(); + + case KQLFunction::has_any_ipv6: + return std::make_unique(); + + case KQLFunction::has_ipv6_prefix: + return std::make_unique(); + + case KQLFunction::has_any_ipv6_prefix: + return std::make_unique(); + + case KQLFunction::ipv6_compare: + return std::make_unique(); + + case KQLFunction::ipv6_is_match: + return std::make_unique(); + + case KQLFunction::parse_ipv6: + return std::make_unique(); + + case KQLFunction::parse_ipv6_mask: + return std::make_unique(); + + case KQLFunction::format_ipv4: + return std::make_unique(); + + case KQLFunction::format_ipv4_mask: + return std::make_unique(); + + case KQLFunction::has_ipv4: + return std::make_unique(); + + case KQLFunction::has_any_ipv4: + return std::make_unique(); + + case KQLFunction::has_ipv4_prefix: + return std::make_unique(); + + case KQLFunction::has_any_ipv4_prefix: + return std::make_unique(); + + case KQLFunction::binary_and: + return std::make_unique(); + + case KQLFunction::binary_not: + return std::make_unique(); + + case KQLFunction::binary_or: + return std::make_unique(); + + case KQLFunction::binary_shift_left: + return std::make_unique(); + + case KQLFunction::binary_shift_right: + return std::make_unique(); + + case KQLFunction::binary_xor: + return std::make_unique(); + + case KQLFunction::bitset_count_ones: + return std::make_unique(); + + case KQLFunction::bin: + return std::make_unique(); + + case KQLFunction::bin_at: + return std::make_unique(); + + case KQLFunction::kase: + return std::make_unique(); + + case KQLFunction::iff: + return std::make_unique(); + + case KQLFunction::iif: + return std::make_unique(); + + case KQLFunction::lookup: + return std::make_unique(); + + case KQLFunction::gettype: + return std::make_unique(); + + case KQLFunction::not_: + return std::make_unique(); + + case KQLFunction::datatype_bool: + return std::make_unique(); + + case KQLFunction::datatype_datetime: + return std::make_unique(); + + case KQLFunction::datatype_dynamic: + return std::make_unique(); + + case KQLFunction::datatype_guid: + return std::make_unique(); + + case KQLFunction::datatype_int: + return std::make_unique(); + + case KQLFunction::datatype_long: + return std::make_unique(); + + case KQLFunction::datatype_real: + return std::make_unique(); + + case KQLFunction::datatype_timespan: + return std::make_unique(); + + case KQLFunction::datatype_decimal: + return std::make_unique(); + + case KQLFunction::range: + return std::make_unique(); + + case KQLFunction::abs: + return std::make_unique(); + + case KQLFunction::acos: + return std::make_unique(); + + case KQLFunction::asin: + return std::make_unique(); + + case KQLFunction::atan: + return std::make_unique(); + + case KQLFunction::atan2: + return std::make_unique(); + + case KQLFunction::cos: + return std::make_unique(); + + case KQLFunction::cot: + return std::make_unique(); + + case KQLFunction::ceiling: + return std::make_unique(); + + case KQLFunction::degrees: + return std::make_unique(); + + case KQLFunction::exp: + return std::make_unique(); + + case KQLFunction::exp2: + return std::make_unique(); + + case KQLFunction::exp10: + return std::make_unique(); + + case KQLFunction::gamma: + return std::make_unique(); + + case KQLFunction::isfinite: + return std::make_unique(); + + case KQLFunction::isinf: + return std::make_unique(); + + case KQLFunction::log: + return std::make_unique(); + + case KQLFunction::log2: + return std::make_unique(); + + case KQLFunction::log10: + return std::make_unique(); + + case KQLFunction::loggamma: + return std::make_unique(); + + case KQLFunction::max_of: + return std::make_unique(); + + case KQLFunction::min_of: + return std::make_unique(); + + case KQLFunction::pi: + return std::make_unique(); + + case KQLFunction::pow: + return std::make_unique(); + + case KQLFunction::radians: + return std::make_unique(); + + case KQLFunction::rand: + return std::make_unique(); + + case KQLFunction::round: + return std::make_unique(); + + case KQLFunction::sign: + return std::make_unique(); + + case KQLFunction::sin: + return std::make_unique(); + + case KQLFunction::sqrt: + return std::make_unique(); + + case KQLFunction::tan: + return std::make_unique(); + + case KQLFunction::hash: + return std::make_unique(); + + case KQLFunction::hash_sha256: + return std::make_unique(); + } +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h new file mode 100644 index 000000000000..f9aea3b57987 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLFunctionFactory.h @@ -0,0 +1,12 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class KQLFunctionFactory +{ +public: + static std::unique_ptr get(const String & kql_function); +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp new file mode 100644 index 000000000000..20118e52418b --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.cpp @@ -0,0 +1,112 @@ +#include "KQLGeneralFunctions.h" + +#include +#include +#include +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + +namespace DB +{ +bool Bin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_bin"); +} + +bool BinAt::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_bin_at"); +} + +bool Case::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "multiIf"); +} + +bool Iff::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "If"); +} + +bool Iif::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "If"); +} + +bool Lookup::convertImpl(String & out, IParser::Pos & pos) +{ + auto temp_pos = pos; + const String fn_name = getKQLFunctionName(temp_pos); + + if (fn_name.empty()) + return false; + int num_of_args = 0; + temp_pos = pos; + ++temp_pos; + ++temp_pos; + + String arg; + + while (!temp_pos->isEnd() && temp_pos->type != TokenType::PipeMark && temp_pos->type != TokenType::Semicolon) + { + arg = getConvertedArgument(fn_name, temp_pos); + ++num_of_args; + if (temp_pos->type == TokenType::ClosingRoundBracket) + break; + ++temp_pos; + } + if (num_of_args == 3) + return directMapping(out, pos, "dictGet"); + else if (num_of_args == 4) + return directMapping(out, pos, "dictGetOrDefault"); + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "number of arguments do not match in function: {}", fn_name); +} + +bool GetType::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_gettype"); +} + +bool ToScalar::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + Expected expected; + ASTPtr subquery; + try + { + subquery = std::make_shared(); + if (!ParserKQLPrint().parse(pos, subquery, expected)) + subquery.reset(); + } + catch (...) + { + subquery.reset(); + } + + if (KQLContext kql_context; !subquery && !ParserKQLTableFunction(kql_context).parse(pos, subquery, expected)) + return false; + + --pos; + WriteBufferFromOwnString write_buffer; + subquery->format(IAST::FormatSettings(write_buffer, true)); + + out = std::format("(select tuple(*) from ({}) limit 1).1", write_buffer.stringView()); + return true; +} + +bool Not::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_not"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h new file mode 100644 index 000000000000..3d171e82ab99 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLGeneralFunctions.h @@ -0,0 +1,68 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Bin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class BinAt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "bin_at()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Case : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "case()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Iff : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "iff()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +class Iif : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "iif()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Lookup : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "lookup()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class GetType : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "gettype()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToScalar : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toscalar()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Not : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "not()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLHashFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLHashFunctions.cpp new file mode 100644 index 000000000000..ef2878279efa --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLHashFunctions.cpp @@ -0,0 +1,23 @@ +#include "KQLHashFunctions.h" + +#include + +namespace DB +{ +bool Hash::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_hash"); +} + +bool HashSha256::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arg = getArgument(function_name, pos, ArgumentState::Raw); + out = "lower(hex(SHA256(NULLIF(" + kqlCallToExpression("tostring", {arg}, pos.max_depth) + ",''))))"; + + return true; +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLHashFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLHashFunctions.h new file mode 100644 index 000000000000..7135b584c0f3 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLHashFunctions.h @@ -0,0 +1,21 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Hash : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hash()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HashSha256 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "hash_sha256()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp new file mode 100644 index 000000000000..5846d26ce516 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -0,0 +1,302 @@ +#include "KQLIPFunctions.h" + +#include + +#include + +namespace DB +{ +bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Parsed); + out = std::format( + "if(isNull({0} as lhs_ip_{5}) or isNull({1} as lhs_mask_{5}) " + "or isNull({2} as rhs_ip_{5}) or isNull({3} as rhs_mask_{5}), null, " + "sign(toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " + "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1)::UInt32)" + " - toInt64(tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1)::UInt32)))", + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + mask.value_or("32"), + generateUniqueIdentifier()); + return true; +} + +bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Parsed); + const auto ip_range = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format( + "if(isNull(IPv4StringToNumOrNull({0}) as ip_{3}) " + "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " + "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(32 - range_mask_{3}) - 1)))) = 0)", + ip_address, + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + generateUniqueIdentifier()); + return true; +} + +bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask.value_or("32")}, pos.max_depth)); + return true; +} + +bool Ipv4IsPrivate::convertImpl(String & out, IParser::Pos & pos) +{ + static const std::array s_private_subnets{"10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16"}; + + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto unique_identifier = generateUniqueIdentifier(); + + out += std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(toIPv4OrNull(tokens_{1}[1]) as nullable_ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "ignore(assumeNotNull(nullable_ip_{1}) as ip_{1}, " + "IPv4CIDRToRange(ip_{1}, assumeNotNull(mask_{1})) as range_{1}, IPv4NumToString(tupleElement(range_{1}, 1)) as begin_{1}, " + "IPv4NumToString(tupleElement(range_{1}, 2)) as end_{1}), null, ", + ip_address, + unique_identifier); + for (int i = 0; i < std::ssize(s_private_subnets); ++i) + { + if (i > 0) + out += " or "; + + const auto & subnet = s_private_subnets[i]; + out += std::format( + "length(tokens_{1}) = 1 and isIPAddressInRange(IPv4NumToString(ip_{1}), '{0}') or " + "length(tokens_{1}) = 2 and isIPAddressInRange(begin_{1}, '{0}') and isIPAddressInRange(end_{1}, '{0}')", + subnet, + unique_identifier); + } + + out.push_back(')'); + return true; +} + +bool Ipv4NetmaskSuffix::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_range = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) > 2 or not isIPv4String(tokens_{1}[1]), null, " + "length(tokens_{1}) = 1, 32, isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, toUInt8(min2(mask_{1}, 32)))", + ip_range, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "multiIf(length(splitByChar('/', {0}) as tokens_{1}) = 1, IPv4StringToNumOrNull(tokens_{1}[1]) as ip_{1}, " + "length(tokens_{1}) = 2 and isNotNull(ip_{1}) and isNotNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), " + "tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{1}), assumeNotNull(mask_{1})), 1)::UInt32, null)", + ip_address, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + const auto mask = getArgument(function_name, pos); + out = std::format( + "if(isNull(toIPv4OrNull({0}) as ip_{2}) or isNull(toUInt8OrNull(toString({1})) as mask_{2}), null, " + "toUInt32(tupleElement(IPv4CIDRToRange(assumeNotNull(ip_{2}), arrayMax([0, arrayMin([32, assumeNotNull(mask_{2})])])), 1)))", + ip_address, + mask, + generateUniqueIdentifier()); + return true; +} + +bool Ipv6Compare::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos); + const auto rhs = getArgument(function_name, pos); + const auto mask = getOptionalArgument(function_name, pos); + const auto calculated_mask = mask.value_or("128"); + out = std::format( + "if(length(splitByChar('/', {1}) as lhs_tokens_{0}) > 2 or length(splitByChar('/', {2}) as rhs_tokens_{0}) > 2 " + "or isNull(IPv6StringToNumOrNull(lhs_tokens_{0}[1]) as lhs_ipv6_{0}) or length(lhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(lhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(lhs_tokens_{0}[-1])) as lhs_suffix_{0}) " + "or isNull(IPv6StringToNumOrNull(rhs_tokens_{0}[1]) as rhs_ipv6_{0}) or length(rhs_tokens_{0}) = 2 " + "and isNull((if(isIPv4String(rhs_tokens_{0}[1]), 96, 0) + toUInt8OrNull(rhs_tokens_{0}[-1])) as rhs_suffix_{0}) " + "or isNull(toUInt8(min2({3}, min2(ifNull(lhs_suffix_{0}, 128), ifNull(rhs_suffix_{0}, 128)))) as suffix_{0}) " + "or isNull(bitShiftLeft(bitShiftRight(bitNot(reinterpretAsFixedString(0::UInt128)), (128 - suffix_{0}) as zeroes_{0}), " + "zeroes_{0}) as mask_{0}) or isNull(bitAnd(lhs_ipv6_{0}, mask_{0}) as lhs_base_{0}) " + "or isNull(bitAnd(rhs_ipv6_{0}, mask_{0}) as rhs_base_{0}), null, " + "multiIf(lhs_base_{0} < rhs_base_{0}, -1, lhs_base_{0} > rhs_base_{0}, 1, 0))", + generateUniqueIdentifier(), + lhs, + rhs, + calculated_mask); + return true; +} + +bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask.value_or("128")}, pos.max_depth)); + return true; +} + +bool ParseIpv6::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos); + out = std::format( + "if(length(splitByChar('/', {0}) as tokens_{1}) > 2 or isNull(IPv6StringToNumOrNull(tokens_{1}[1]) as ip_{1}) " + "or length(tokens_{1}) = 2 and isNull(toUInt8OrNull(tokens_{1}[-1]) as mask_{1}), null, " + "arrayStringConcat(flatten(extractAllGroups(lower(hex(tupleElement(IPv6CIDRToRange(assumeNotNull(ip_{1}), toUInt8(ifNull(mask_{1} " + "+ if(isIPv4String(tokens_{1}[1]), 96, 0), 128))), 1))), '([\\da-f]{{4}})')), ':'))", + ip_address, + generateUniqueIdentifier()); + return true; +} + +bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getArgument(function_name, pos, ArgumentState::Raw); + const auto unique_identifier = generateUniqueIdentifier(); + out = std::format( + "if(empty({0} as ipv4_{3}), {1}, {2})", + kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), + kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth), + unique_identifier); + return true; +} + +bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Parsed); + out = std::format( + "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or ({1}) < 0 " + "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " + "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(32 - ({1})) - 1))))), '')", + ParserKQLBase::getExprFromToken(ip_address, pos.max_depth), + mask.value_or("32"), + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + generateUniqueIdentifier()); + return true; +} + +bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto ip_address = getArgument(function_name, pos, ArgumentState::Raw); + const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); + const auto calculated_mask = mask.value_or("32"); + out = std::format( + "if(empty({1} as formatted_ip_{2}) or position(toTypeName({0}), 'Int') = 0 or not {0} between 0 and 32, '', " + "concat(formatted_ip_{2}, '/', toString(toInt64(min2({0}, ifNull({3} as suffix_{2}, 32))))))", + ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth), + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + generateUniqueIdentifier(), + kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth)); + return true; +} + +bool HasIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_ipv4"); +} + +bool HasAnyIpv4::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_any_ipv4"); +} + +bool HasIpv4Prefix::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_ipv4_prefix"); +} + +bool HasAnyIpv4Prefix::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_any_ipv4_prefix"); +} + +bool HasIpv6::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_ipv6"); +} + +bool HasAnyIpv6::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_any_ipv6"); +} + +bool HasIpv6Prefix::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_ipv6_prefix"); +} + +bool HasAnyIpv6Prefix::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_has_any_ipv6_prefix"); +} +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h new file mode 100644 index 000000000000..9f1b4a1ec9d9 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.h @@ -0,0 +1,154 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Ipv4Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_compare()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsInRange : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_in_range()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_match()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4IsPrivate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_is_private()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv4NetmaskSuffix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv4_netmask_suffix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv4_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv6Compare : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_compare()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv6Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv6_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv6Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv6_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ipv6IsMatch : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ipv6_is_match()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv6 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseIpv6Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_ipv6_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class FormatIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class FormatIpv4Mask : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "format_ipv4_mask()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv4 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv4()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasIpv4Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_ipv4_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIpv4Prefix : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_ipv4_prefix()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; +} + diff --git a/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp new file mode 100644 index 000000000000..fea0645fa63e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.cpp @@ -0,0 +1,214 @@ +#include "KQLMathematicalFunctions.h" + +#include + +namespace DB +{ + +bool Abs::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "abs"); +} + +bool Acos::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "acos"); +} + +bool Asin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "asin"); +} + +bool Atan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "atan"); +} + +bool Atan2::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "atan2"); +} + +bool Ceiling::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "ceil"); +} + +bool Cos::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "cos"); +} + +bool Cot::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + out = "1/tan(" + getArgument(fn_name, pos) + ")"; + + return true; +} + +bool Degrees::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "degrees"); +} + +bool Exp::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "exp"); +} + +bool Exp2::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "exp2"); +} + +bool Exp10::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "exp10"); +} + +bool Gamma::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "tgamma"); +} + +bool IsFinite::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isFinite"); +} + +bool IsInfinite::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isInfinite"); +} + +bool IsNan::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) in ['Float64', 'Nullable(Float64)'], isNaN({0}), throwIf(true, 'Expected argument of data type real'))", + argument); + + return true; +} + +bool Log::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "log"); +} + +bool Log2::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "log2"); +} + +bool Log10::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "log10"); +} + +bool LogGamma::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lgamma"); +} + +bool MaxOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + out.append("arrayReduce('max', ["); + const auto arguments = getArguments(fn_name, pos, ArgumentState::Parsed, {2, 64}); + + for (size_t i = 0; i < arguments.size(); i++) + { + out.append(arguments[i]); + if (i < arguments.size() - 1) + out.append(", "); + } + out.append("])"); + return true; +} + +bool MinOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + out.append("arrayReduce('min', ["); + const auto arguments = getArguments(fn_name, pos, ArgumentState::Parsed, {2, 64}); + + for (size_t i = 0; i < arguments.size(); i++) + { + out.append(arguments[i]); + if (i < arguments.size() - 1) + out.append(", "); + } + out.append("])"); + return true; +} + +bool Pi::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "pi"); +} + +bool Pow::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "pow"); +} + +bool Radians::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "radians"); +} + +bool Rand::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + const auto arg = getOptionalArgument(fn_name, pos).value_or("0"); + out.append("if(" + arg + " < 2, randCanonical(), moduloOrZero(rand()," + arg + "))"); + return true; +} + +bool Round::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "round"); +} + +bool Sign::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sign"); +} + +bool Sin::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sin"); +} + +bool Sqrt::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "sqrt"); +} + +bool Tan::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "tan"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h new file mode 100644 index 000000000000..5934fadac4f8 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLMathematicalFunctions.h @@ -0,0 +1,224 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Abs : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "abs()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Acos : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "acos()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Asin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "asin()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Atan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "atan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Atan2 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "atan2()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Ceiling : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "ceiling()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Cos : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "cos()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Cot : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "cot()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Degrees : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "degrees()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Exp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "exp()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Exp2 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "exp2()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Exp10 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "exp10()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Gamma : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "gamma()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsFinite : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isfinite()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsInfinite : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isinf()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsNan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Log : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "log()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Log2 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "log2()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Log10 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "log10()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class LogGamma : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "loggamma()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MaxOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "max_of()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MinOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "min_of()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Pi : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pi()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Pow : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "pow()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Radians : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "radians()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Rand : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "rand()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Round : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "round()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sign : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sign()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sin : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sin()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Sqrt : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "sqrt()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Tan : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tan()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp new file mode 100644 index 000000000000..54ae81f5ceed --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -0,0 +1,756 @@ +#include "KQLStringFunctions.h" +#include "KQLFunctionFactory.h" + +#include + +#include + +namespace DB::ErrorCodes +{ +extern const int SYNTAX_ERROR; +extern const int UNKNOWN_TYPE; +extern const int BAD_ARGUMENTS; +} + +namespace DB +{ +bool Base64EncodeToString::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "base64Encode"); +} + +bool Base64EncodeFromGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format( + "if(toTypeName({0}) not in ['UUID', 'Nullable(UUID)'], toString(throwIf(true, 'Expected guid as argument')), " + "base64Encode(UUIDStringToNum(toString({0}), 2)))", + argument, + generateUniqueIdentifier()); + return true; +} + +bool Base64DecodeToString::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String str = getConvertedArgument(fn_name, pos); + + out = std::format( + "IF ((length({0}) % 4) != 0, NULL, IF (countMatches(substring({0}, 1, length({0}) - 2), '=') > 0, NULL, IF(isValidUTF8(tryBase64Decode({0}) AS decoded_str_{1}),decoded_str_{1}, NULL)))", + str, + generateUniqueIdentifier()); + + return true; +} + +bool Base64DecodeToArray::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String str = getConvertedArgument(fn_name, pos); + + out = std::format( + "IF((length({0}) % 4) != 0, [NULL], IF(length(tryBase64Decode({0})) = 0, [NULL], IF(countMatches(substring({0}, 1, length({0}) - " + "2), '=') > 0, [NULL], arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', " + "base64Decode(assumeNotNull(IF(length(tryBase64Decode({0})) = 0, '', {0}))))))))", + str); + + return true; +} + +bool Base64DecodeToGuid::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos); + out = std::format("toUUIDOrNull(UUIDNumToString(toFixedString(base64Decode({}), 16), 2))", argument); + + return true; +} + +bool CountOf::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String search = getConvertedArgument(fn_name, pos); + + String kind = "'normal'"; + if (pos->type == TokenType::Comma) + { + ++pos; + kind = getConvertedArgument(fn_name, pos); + } + assert(kind == "'normal'" || kind == "'regex'"); + + if (kind == "'normal'") + out = "kql_count_overlapping_substrings(" + source + ", " + search + ")"; + else + out = "countMatches(" + source + ", " + search + ")"; + return true; +} + +bool Extract::convertImpl(String & out, IParser::Pos & pos) +{ + ParserKeyword s_kql("typeof"); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + Expected expected; + + std::unordered_map type_cast + = {{"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"}}; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String regex = getConvertedArgument(fn_name, pos); + + ++pos; + String capture_group = getConvertedArgument(fn_name, pos); + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String type_literal; + + if (pos->type == TokenType::Comma) + { + ++pos; + + if (s_kql.ignore(pos, expected)) + { + if (!open_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + + type_literal = String(pos->begin, pos->end); + + if (type_cast.find(type_literal) == type_cast.end()) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "{} is not a supported kusto data type for extract", type_literal); + + type_literal = type_cast[type_literal]; + ++pos; + + if (!close_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + } + } + + out = std::format("kql_extract({}, {}, {})", source, regex, capture_group); + if (type_literal == "Decimal") + { + out = std::format("countSubstrings({0}, '.') > 1 ? NULL: {0}, length(substr({0}, position({0},'.') + 1)))", out); + out = std::format("toDecimal128OrNull({0})", out); + } + else + { + if (type_literal == "Boolean") + out = std::format("toInt64OrNull({})", out); + + if (!type_literal.empty()) + out = "accurateCastOrNull(" + out + ", '" + type_literal + "')"; + } + return true; +} + +bool ExtractAll::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String regex = getConvertedArgument(fn_name, pos); + + ++pos; + const String second_arg = getConvertedArgument(fn_name, pos); + + String third_arg; + if (pos->type == TokenType::Comma) + { + ++pos; + third_arg = getConvertedArgument(fn_name, pos); + out = "arrayMap(x -> arrayFilter((y, i) -> i in " + second_arg + ", x, arrayEnumerate(x)), extractAllGroups(" + third_arg + ", " + + regex + "))"; + } + else + out = "extractAllGroups(" + second_arg + ", " + regex + ")"; + return true; +} + +bool ExtractJson::convertImpl(String & out, IParser::Pos & pos) +{ + String datatype = "String"; + ParserKeyword s_kql("typeof"); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + Expected expected; + + std::unordered_map type_cast + = {{"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"dynamic", "Array"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"}, + {"decimal", "Decimal"}}; + + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String json_datapath = getConvertedArgument(fn_name, pos); + ++pos; + const String json_datasource = getConvertedArgument(fn_name, pos); + if (pos->type == TokenType::Comma) + { + ++pos; + if (s_kql.ignore(pos, expected)) + { + if (!open_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + + datatype = String(pos->begin, pos->end); + + if (type_cast.find(datatype) == type_cast.end()) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "{} is not a supported kusto data type for {}", datatype, fn_name); + datatype = type_cast[datatype]; + ++pos; + + if (!close_bracket.ignore(pos, expected)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near typeof"); + } + } + const auto json_val = std::format("JSON_VALUE({0},{1})", json_datasource, json_datapath); + if (datatype == "Decimal") + { + out = std::format("countSubstrings({0}, '.') > 1 ? NULL: length(substr({0}, position({0},'.') + 1)))", json_val); + out = std::format("toDecimal128OrNull({0}::String ,{1})", json_val, out); + } + else + { + if (datatype == "Boolean") + out = std::format("if(toInt64OrNull({}) > 0, true, false)", json_val); + else if (!datatype.empty()) + out = std::format("accurateCastOrNull({},'{}')", json_val, datatype); + } + return true; +} + +bool HasAnyIndex::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String lookup = getConvertedArgument(fn_name, pos); + String src_array = std::format("splitByChar(' ',{})", source); + out = std::format( + "if (empty({1}), -1, indexOf(arrayMap(x -> (x in {0}), if (empty({1}), [''], arrayMap(x -> (toString(x)), {1}))), 1) - 1)", + src_array, + lookup); + return true; +} + +bool IndexOf::convertImpl(String & out, IParser::Pos & pos) +{ + const auto fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto source = getArgument(fn_name, pos); + const auto lookup = getArgument(fn_name, pos); + const auto start_index = getOptionalArgument(fn_name, pos); + const auto length = getOptionalArgument(fn_name, pos); + const auto occurrence = getOptionalArgument(fn_name, pos); + + out = std::format( + "kql_indexof(kql_tostring({}),kql_tostring({}),{},{},{})", + source, + lookup, + start_index.value_or("0"), + length.value_or("-1"), + occurrence.value_or("1")); + + return true; +} + +bool IndexOfRegex::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_indexof_regex"); +} + +bool IsAscii::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const auto arg = getConvertedArgument(fn_name, pos); + out = std::format("not toBool(arrayExists(x -> x < 0 or x > 127, arrayMap(x -> ascii(x), splitByString('', assumeNotNull({})))))", arg); + return true; +} + +bool IsEmpty::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + const auto arg = getArgument(fn_name, pos, ArgumentState::Raw); + out.append("empty(" + kqlCallToExpression("tostring", {arg}, pos.max_depth) + ")"); + return true; +} + +bool IsNotEmpty::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + const auto arg = getArgument(fn_name, pos, ArgumentState::Raw); + out.append("notEmpty(" + kqlCallToExpression("tostring", {arg}, pos.max_depth) + ")"); + return true; +} + +bool IsNotNull::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isNotNull"); +} + +bool ParseCommandLine::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String json_string = getConvertedArgument(fn_name, pos); + + ++pos; + const String type = getConvertedArgument(fn_name, pos); + + if (type != "'windows'") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Supported type argument is windows for {}", fn_name); + + out = std::format( + "if(empty({0}) OR hasAll(splitByChar(' ', {0}) , ['']) , arrayMap(x->null, splitByChar(' ', '')), splitByChar(' ', {0}))", + json_string); + return true; +} + +bool IsUtf8::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isValidUTF8"); +} + +bool IsNull::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "isNull"); +} + +bool MakeString::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_make_string"); +} + +bool NewGuid::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "generateUUIDv4", {0, 0}); +} + +bool ParseCSV::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + String csv_string = getConvertedArgument(fn_name, pos); + + out = std::format( + "if(position({0} ,'\n')::UInt8, (splitByChar(',', substring({0}, 1, position({0},'\n') -1))), (splitByChar(',', substring({0}, 1, " + "length({0})))))", + csv_string); + return true; +} + +bool ParseJson::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + if (String(pos->begin, pos->end) == "dynamic") + { + --pos; + auto arg = getArgument(fn_name, pos); + auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth); + out = std::format("{}", result); + } + else + { + auto arg = getConvertedArgument(fn_name, pos); + out = std::format("if (isValidJSON({0}) , JSON_QUERY({0}, '$') , toJSONString({0}))", arg); + } + return true; +} + +bool ParseURL::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "kql_parseurl"); +} + +bool ParseURLQuery::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + ++pos; + const String query = getConvertedArgument(fn_name, pos); + + const String query_string = std::format("if (position({},'?') > 0, queryString({}), {})", query, query, query); + const String query_parameters + = std::format(R"(concat('"Query Parameters":', concat('{{"', replace(replace({}, '=', '":"'),'&','","') ,'"}}')))", query_string); + out = std::format("concat('{{',{},'}}')", query_parameters); + return true; +} + +bool ParseVersion::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + String arg; + ++pos; + arg = getConvertedArgument(fn_name, pos); + out = std::format( + "length(splitByChar('.', {0})) > 4 OR length(splitByChar('.', {0})) < 1 OR match({0}, '.*[a-zA-Z]+.*') = 1 OR empty({0}) OR " + "hasAll(splitByChar('.', {0}) , ['']) ? toDecimal128OrNull('NULL' , 0) : " + "toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), " + "arrayResize(splitByChar('.', {0}), 4)))), 8),0)", + arg); + return true; +} + +bool ReplaceRegex::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "replaceRegexpAll"); +} + +bool Reverse::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto argument = getArgument(function_name, pos, ArgumentState::Raw); + out = std::format("reverse({})", kqlCallToExpression("tostring", {argument}, pos.max_depth)); + + return true; +} + +bool Split::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String source = getConvertedArgument(fn_name, pos); + + ++pos; + const String delimiter = getConvertedArgument(fn_name, pos); + auto split_res = std::format("empty({0}) ? splitByString(' ' , {1}) : splitByString({0} , {1})", delimiter, source); + int requested_index = -1; + + if (pos->type == TokenType::Comma) + { + ++pos; + auto arg = getConvertedArgument(fn_name, pos); + // remove space between minus and value + arg.erase(remove_if(arg.begin(), arg.end(), isspace), arg.end()); + requested_index = std::stoi(arg); + requested_index += 1; + out = std::format( + "multiIf(length({0}) >= {1} AND {1} > 0 , arrayPushBack([],arrayElement({0}, {1})) , {1}=0 ,{0} , arrayPushBack([] " + ",arrayElement(NULL,1)))", + split_res, + requested_index); + } + else + out = split_res; + return true; +} + +bool StrCat::convertImpl(String & out, IParser::Pos & pos) +{ + const auto function_name = getKQLFunctionName(pos); + if (function_name.empty()) + return false; + + const auto arguments = getArguments(function_name, pos, ArgumentState::Raw); + + out.append("concat("); + for (const auto & argument : arguments) + { + out.append(kqlCallToExpression("tostring", {argument}, pos.max_depth)); + out.append(", "); + } + + out.append("'')"); + return true; +} + +bool StrCatDelim::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto arguments = getArguments(fn_name, pos, ArgumentState::Raw, {2, 64}); + const String & delimiter = arguments[0]; + + String args; + args = "concat("; + for (size_t i = 1; i < arguments.size(); i++) + { + args += kqlCallToExpression("tostring", {arguments[i]}, pos.max_depth); + if (i < arguments.size() - 1) + args += ", " + delimiter + ", "; + } + args += ")"; + out = std::move(args); + return true; +} + +bool StrCmp::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + ++pos; + const String string1 = getConvertedArgument(fn_name, pos); + ++pos; + const String string2 = getConvertedArgument(fn_name, pos); + + out = std::format("multiIf({0} == {1}, 0, {0} < {1}, -1, 1)", string1, string2); + return true; +} + +bool StringSize::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "length"); +} + +bool StrLen::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lengthUTF8"); +} + +bool StrRep::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + const auto arguments = getArguments(fn_name, pos, ArgumentState::Raw, {2, 3}); + const String & value = arguments[0]; + const String & multiplier = arguments[1]; + + if (arguments.size() == 2) + out = "repeat(" + value + " , " + multiplier + ")"; + else if (arguments.size() == 3) + { + const String & delimiter = arguments[2]; + const String repeated_str + = "repeat(concat(" + kqlCallToExpression("tostring", {value}, pos.max_depth) + " , " + delimiter + ")," + multiplier + ")"; + out = "substr(" + repeated_str + ", 1, length(" + repeated_str + ") - length(" + delimiter + "))"; + } + return true; +} + +bool SubString::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String source = getConvertedArgument(fn_name, pos); + + ++pos; + String starting_index = getConvertedArgument(fn_name, pos); + + if (pos->type == TokenType::Comma) + { + ++pos; + auto length = getConvertedArgument(fn_name, pos); + + if (starting_index.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "number of arguments do not match in function: {}", fn_name); + else + out = "if(toInt64(length(" + source + ")) <= 0, '', substr(" + source + ", " + "((" + starting_index + "% toInt64(length(" + + source + ")) + toInt64(length(" + source + "))) % toInt64(length(" + source + "))) + 1, " + length + ") )"; + } + else + out = "if(toInt64(length(" + source + ")) <= 0, '', substr(" + source + "," + "((" + starting_index + "% toInt64(length(" + source + + ")) + toInt64(length(" + source + "))) % toInt64(length(" + source + "))) + 1))"; + + return true; +} + +bool ToLower::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "lower"); +} + +bool ToUpper::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "upper"); +} + +bool ToUtf8::convertImpl(String & out, IParser::Pos & pos) +{ + String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String func_arg = getConvertedArgument(fn_name, pos); + const String base_arg = "reinterpretAsInt64(reverse(UNBIN("; + const String base_arg_end = ")))"; + const String expr0 = base_arg + "substring(bin(x),2,7)" + base_arg_end; + const String expr1 = base_arg + "concat(substring(bin(x),4,5), substring(bin(x),11,6))" + base_arg_end; + const String expr2 = base_arg + "concat(substring(bin(x),5,4), substring(bin(x),11,6), substring(bin(x),19,6))" + base_arg_end; + const String expr3 + = base_arg + "concat(substring(bin(x),6,3), substring(bin(x),11,6), substring(bin(x),19,6), substring(bin(x),27,6))" + base_arg_end; + + out = std::format( + "arrayMap(x -> if(substring(bin(x),1,1)=='0', {0}," + "if (substring(bin(x),1,3)=='110', {1},if(substring(bin(x),1,4)=='1110'" + ", {2},if (substring(bin(x),1,5)=='11110', {3},-1)))), ngrams({4}, 1))", + expr0, + expr1, + expr2, + expr3, + func_arg); + return true; +} + +bool Translate::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + + if (fn_name.empty()) + return false; + + ++pos; + String from = getConvertedArgument(fn_name, pos); + ++pos; + String to = getConvertedArgument(fn_name, pos); + ++pos; + String source = getConvertedArgument(fn_name, pos); + + String len_diff = std::format("length({}) - length({})", from, to); + String to_str = std::format( + "multiIf(length({1}) = 0, {0}, {2} > 0, concat({1},repeat(substr({1},length({1}),1),toUInt16({2}))),{2} < 0 , " + "substr({1},1,length({0})),{1})", + from, + to, + len_diff); + out = std::format("if (length({3}) = 0,'',translate({0},{1},{2}))", source, from, to_str, to); + return true; +} + +bool Trim::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos, ArgumentState::Raw); + const auto source = getArgument(fn_name, pos, ArgumentState::Raw); + out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth); + + return true; +} + +bool TrimEnd::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos); + const auto source = getArgument(fn_name, pos); + out = std::format("replaceRegexpOne({0}, concat({1}, '$'), '')", source, regex); + + return true; +} + +bool TrimStart::convertImpl(String & out, IParser::Pos & pos) +{ + const String fn_name = getKQLFunctionName(pos); + if (fn_name.empty()) + return false; + + const auto regex = getArgument(fn_name, pos); + const auto source = getArgument(fn_name, pos); + out = std::format("replaceRegexpOne({0}, concat('^', {1}), '')", source, regex); + + return true; +} + +bool URLDecode::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "decodeURLComponent"); +} + +bool URLEncode::convertImpl(String & out, IParser::Pos & pos) +{ + return directMapping(out, pos, "encodeURLComponent"); +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h new file mode 100644 index 000000000000..e9a720e4be4e --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.h @@ -0,0 +1,322 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class Base64EncodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_tostring()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Base64EncodeFromGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_encode_fromguid()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Base64DecodeToString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_tostring()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Base64DecodeToArray : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toarray()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Base64DecodeToGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "base64_decode_toguid()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class CountOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "countof()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Extract : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ExtractAll : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_all()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ExtractJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "extract_json(), extractjson()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class HasAnyIndex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "has_any_index()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IndexOf : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IndexOfRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "indexof_regex()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsAscii : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isascii()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isempty()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsNotEmpty : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotempty()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsNotNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnotnull()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsUtf8 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isutf8()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class IsNull : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "isnull()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class MakeString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "make_string()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class NewGuid : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "new_guid()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseCommandLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_command_line()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseCSV : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_csv()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseJson : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_json()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseURL : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_url()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseURLQuery : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_urlquery()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ParseVersion : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "parse_version()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ReplaceRegex : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "replace_regex()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Reverse : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "reverse()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Split : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "split()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StrCat : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StrCatDelim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcat_delim()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StrCmp : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strcmp()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StringSize : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "string_size()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StrLen : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strlen()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class StrRep : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "strrep()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class SubString : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "substring()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToLower : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "tolower()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToUpper : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "toupper()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class ToUtf8 : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "to_utf8()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Translate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "translate()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class Trim : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class TrimEnd : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_end()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class TrimStart : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "trim_start()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class URLDecode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_decode()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +class URLEncode : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "url_encode()"; } + bool convertImpl(String & out, IParser::Pos & pos) override; +}; + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp new file mode 100644 index 000000000000..d5be8e262a84 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.cpp @@ -0,0 +1,111 @@ +#include "KQLTimeSeriesFunctions.h" + +namespace DB +{ + +bool SeriesFir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesIir::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLine::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFitLineDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2lines::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFit2linesDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesOutliers::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsDetect::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesPeriodsValidate::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStatsDynamic::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesStats::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillBackward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillConst::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillForward::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +bool SeriesFillLinear::convertImpl(String &out,IParser::Pos &pos) +{ + String res = String(pos->begin,pos->end); + out = res; + return false; +} + +} diff --git a/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h new file mode 100644 index 000000000000..999a27f6b391 --- /dev/null +++ b/src/Parsers/Kusto/KustoFunctions/KQLTimeSeriesFunctions.h @@ -0,0 +1,113 @@ +#pragma once + +#include "IParserKQLFunction.h" + +namespace DB +{ +class SeriesFir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesIir : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_iir()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLine : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFitLineDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_line_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2lines : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFit2linesDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fit_2lines_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesOutliers : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_outliers()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsDetect : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_detect()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesPeriodsValidate : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_periods_validate()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStatsDynamic : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats_dynamic()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesStats : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_stats()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillBackward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_backward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillConst : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_const()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillForward : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_forward()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +class SeriesFillLinear : public IParserKQLFunction +{ +protected: + const char * getName() const override { return "series_fill_linear()"; } + bool convertImpl(String &out,IParser::Pos &pos) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLCount.cpp b/src/Parsers/Kusto/ParserKQLCount.cpp new file mode 100644 index 000000000000..1f07acf89db5 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLCount.cpp @@ -0,0 +1,30 @@ +#include +#include +#include +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLCount::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near count operator"); + + ASTPtr select_expression_list; + String converted_columns = getExprFromToken("Count = count()", pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLCount.h b/src/Parsers/Kusto/ParserKQLCount.h new file mode 100644 index 000000000000..49132e090d7b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLCount.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLCount : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL count"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp new file mode 100644 index 000000000000..de226c2817ba --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + String expr; + + expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + node->as()->distinct = true; + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLDistinct.h b/src/Parsers/Kusto/ParserKQLDistinct.h new file mode 100644 index 000000000000..eb997893d3e6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLDistinct.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLDistinct : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL distinct"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp new file mode 100644 index 000000000000..850fe96e6a6f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -0,0 +1,47 @@ +#include "ParserKQLExtend.h" + +#include +#include +#include +#include + +#include + +namespace DB +{ +bool ParserKQLExtend::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + const auto extend_expr = getExprFromToken(pos); + Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); + IParser::Pos npos(ntokens, pos.max_depth); + + ASTPtr expression_list; + if (!ParserNotEmptyExpressionList(false).parse(npos, expression_list, expected) || !npos->isEnd()) + return false; + + std::ranges::for_each( + expression_list->children, + [this](const ASTPtr & expression) + { + if (const auto alias = expression->tryGetAlias(); !alias.empty()) + return; + + expression->setAlias(kql_context.nextDefaultColumnName()); + }); + + auto asterisk = std::make_shared(); + asterisk->transformers = std::make_shared(); + const auto & columns_except_transformer + = asterisk->children.emplace_back(asterisk->transformers)->children.emplace_back(std::make_shared()); + + std::ranges::transform( + expression_list->children, + std::back_inserter(columns_except_transformer->children), + [](const ASTPtr & child) { return std::make_shared(child->getAliasOrColumnName()); }); + + expression_list->children.insert(expression_list->children.cbegin(), std::move(asterisk)); + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(expression_list)); + return true; +} +} diff --git a/src/Parsers/Kusto/ParserKQLExtend.h b/src/Parsers/Kusto/ParserKQLExtend.h new file mode 100644 index 000000000000..33bfa80dfe58 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLExtend.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLExtend : public ParserKQLBase +{ +public: + explicit ParserKQLExtend(KQLContext & kql_context_) : kql_context(kql_context_) { } + +protected: + const char * getName() const override { return "KQL extend"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + KQLContext & kql_context; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 3a399bdccdb1..942f4cd90d2a 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -8,7 +8,7 @@ namespace DB { -bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { String expr = getExprFromToken(pos); ASTPtr where_expression; diff --git a/src/Parsers/Kusto/ParserKQLGetSchema.cpp b/src/Parsers/Kusto/ParserKQLGetSchema.cpp new file mode 100644 index 000000000000..d3520daeb62b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLGetSchema.cpp @@ -0,0 +1,27 @@ +#include "ParserKQLGetSchema.h" + +#include "Utilities.h" + +#include + +namespace DB +{ +bool ParserKQLGetSchema::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (const auto & token = pos->type; token != TokenType::EndOfStream && token != TokenType::PipeMark && token != TokenType::Semicolon) + { + expected.add(pos, "end of query or next pipe"); + return false; + } + + if (auto * select_query = node->as(); !select_query->select()) + setSelectAll(*select_query); + + auto enclosing_query = std::make_shared(); + ASTPtr getschema_function = makeASTFunction("getschema", wrapInSelectWithUnion(node)); + enclosing_query->addTableFunction(getschema_function); + + node = std::move(enclosing_query); + return true; +} +} diff --git a/src/Parsers/Kusto/ParserKQLGetSchema.h b/src/Parsers/Kusto/ParserKQLGetSchema.h new file mode 100644 index 000000000000..bf418db8ba14 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLGetSchema.h @@ -0,0 +1,15 @@ +#pragma once + +#include "ParserKQLQuery.h" + +namespace DB +{ + +class ParserKQLGetSchema : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL getschema"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLJoin.cpp b/src/Parsers/Kusto/ParserKQLJoin.cpp new file mode 100644 index 000000000000..4b9881955ac3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLJoin.cpp @@ -0,0 +1,293 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLJoin ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr sub_query_node; + String str_right_table; + String str_attributes; + std::vector attribute_list; + std::vector left_columns; + const String default_join = "UNINQUE INNER JOIN"; + String join_kind = default_join; + String kql_join_kind = "innerunique"; + + ParserKeyword s_kind("kind"); + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserIdentifier id_right_table; + + size_t paren_count = 0; + ASTPtr ast_right_table; + + std::unordered_map join_type + = {{"innerunique", default_join}, + {"inner", "INNER JOIN"}, + {"leftouter", "LEFT OUTER JOIN"}, + {"rightouter", "RIGHT OUTER JOIN"}, + {"fullouter", "FULL OUTER JOIN"}, + + {"leftanti", "LEFT ANTI JOIN"}, + {"anti", "LEFT ANTI JOIN"}, + {"leftantisemi", "LEFT ANTI JOIN"}, + + {"rightanti", "RIGHT ANTI JOIN"}, + {"rightantisemi", "RIGHT ANTI JOIN"}, + + {"leftsemi", "LEFT SEMI JOIN"}, + {"rightsemi", "RIGHT SEMI JOIN"}}; + + if (s_kind.ignore(pos)) + { + if (!equals.ignore(pos)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid kind for join operator"); + + String join_word(pos->begin, pos->end); + if (join_type.find(join_word) == join_type.end()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid value of kind for join operator"); + + join_kind = join_type[join_word]; + kql_join_kind = join_word; + ++pos; + } + + Pos right_table_start_pos = pos; + Pos keyword_on_pos = pos; + + bool has_bracket = open_bracket.ignore(pos); + + if (!has_bracket) + { + if (!id_right_table.parse(pos, ast_right_table, expected)) + return false; + } + else + paren_count = 1; + + Pos attributes_start_pos = pos; + bool attributes_on_column = false; + + auto parse_attribute = [&](Pos & start_pos, Pos & end_pos) + { + while (start_pos < end_pos && start_pos->type == TokenType::OpeningRoundBracket) + ++start_pos; + while (start_pos < end_pos && end_pos->type == TokenType::ClosingRoundBracket) + --end_pos; + + if (start_pos == end_pos) + { + if (start_pos->type != TokenType::BareWord) + return false; + attribute_list.push_back(String(start_pos->begin, end_pos->end)); + left_columns.push_back(String(start_pos->begin, end_pos->end)); + } + else + { + String left_column, right_column; + auto get_coulmn = [&]() + { + String left_alias = "left_.", right_alias = "right_."; + String left_alias2 = "$left.", right_alias2 = "$right."; + + auto attribute_str = String(start_pos->begin, end_pos->end); + + if (attribute_str.substr(0, left_alias.length()) != left_alias + && attribute_str.substr(0, left_alias2.length()) != left_alias2) + return false; + + auto r_begin = attribute_str.find("=="); + if (r_begin == std::string::npos) + return false; + if (attribute_str.substr(0, left_alias.length()) == left_alias) + left_column = attribute_str.substr(left_alias.length(), r_begin - left_alias.length()); + else + left_column = attribute_str.substr(left_alias2.length(), r_begin - left_alias2.length()); + + r_begin += 2; + while (r_begin < attribute_str.length() && attribute_str[r_begin] <= 0x20) + ++r_begin; + + if (attribute_str.substr(r_begin, right_alias.length()) != right_alias + && attribute_str.substr(r_begin, right_alias2.length()) != right_alias2) + return false; + + right_column = attribute_str.substr(r_begin + right_alias.length()); + return true; + }; + + if (!get_coulmn()) + return false; + trim(left_column); + trim(right_column); + left_columns.push_back(left_column); + + if (left_column != right_column) + { + attributes_on_column = true; + attribute_list.push_back(String(start_pos->begin, end_pos->end)); + } + else + attribute_list.push_back(left_column); + } + + return true; + }; + + auto update_attributes = [&] + { + auto temp_pos = pos; + --temp_pos; + + if (temp_pos < attributes_start_pos || !parse_attribute(attributes_start_pos, temp_pos)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Attributes error for join or lookup operator"); + attributes_start_pos = pos; + ++attributes_start_pos; + }; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + + if (String(pos->begin, pos->end) == "on" && paren_count == 0) + { + if (keyword_on_pos == right_table_start_pos) + { + keyword_on_pos = pos; + attributes_start_pos = pos; + ++attributes_start_pos; + } + } + + if (pos->type == TokenType::Comma && right_table_start_pos < keyword_on_pos && paren_count == 0) + { + update_attributes(); + } + ++pos; + } + + update_attributes(); + + if (keyword_on_pos <= right_table_start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing right table or 'on' for join or lookup operator"); + + --keyword_on_pos; + if (right_table_start_pos == keyword_on_pos) + str_right_table = String(right_table_start_pos->begin, keyword_on_pos->end); + else + str_right_table = std::format("kql{}", String(right_table_start_pos->begin, keyword_on_pos->end)); + + ++keyword_on_pos; + ++keyword_on_pos; + --pos; + if (pos < keyword_on_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing attributes for join or lookup operator"); + + String query_join; + if (join_kind == default_join) + { + join_kind = "INNER JOIN"; + String distinct_column; + for (auto const & col : left_columns) + distinct_column = distinct_column.empty() ? col : distinct_column + "," + col; + + String distinct_query = std::format("(SELECT DISTINCT ON ({}) * FROM dum_tbl)", distinct_column); + if (!parseSQLQueryByString(std::make_unique(), distinct_query, sub_query_node, pos.max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, node, true, false)) + return false; + node = std::move(sub_query_node); + } + + if (attributes_on_column) + { + auto replace = [&](std::string & str, const std::string & from, const std::string & to) + { + size_t start_pos = str.find(from); + if (start_pos != std::string::npos) + str.replace(start_pos, from.length(), to); + }; + + for (auto str : attribute_list) + { + if (str.substr(0, 6) != "left_." && str.substr(0, 6) != "$left.") + str = std::format("left_.{0} == right_.{0}", str); + else if (str.substr(0, 6) == "$left.") + { + replace(str, "$left.", "left_."); + replace(str, "$right.", "right_."); + } + + str_attributes = str_attributes.empty() ? str : str_attributes + " and " + str; + } + query_join = std::format("SELECT * FROM tbl {} {} ON {}", join_kind, str_right_table, str_attributes); + } + else + { + for (auto const & str : attribute_list) + str_attributes = str_attributes.empty() ? str : str_attributes + "," + str; + + query_join = std::format("SELECT * FROM tbl {} {} USING {}", join_kind, str_right_table, str_attributes); + } + + if (!parseSQLQueryByString(std::make_unique(), query_join, sub_query_node, pos.max_depth)) + return false; + + ASTPtr table_expr; + if (sub_query_node->as()->tables() + && sub_query_node->as()->tables()->as()->children.size() > 1) + { + table_expr = sub_query_node->as()->tables()->as()->children[1]; + if (table_expr->as()->table_expression->as()->subquery) + table_expr->as()->table_expression->as()->subquery->as()->alias + = "right_"; + else if (table_expr->as()->table_expression->as()->database_and_table_name) + { + table_expr + = table_expr->as()->table_expression->as()->database_and_table_name; + if (auto * ast_with_alias = dynamic_cast(table_expr.get())) + ast_with_alias->alias = "right_"; + } + } + if (kql_join_kind == "innerunique") + { + if (!setSubQuerySource(sub_query_node, node, false, true, "left_")) + return false; + } + else + { + if (!setSubQuerySource(sub_query_node, node, false, false, "left_")) + return false; + } + + node = std::move(sub_query_node); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLJoin.h b/src/Parsers/Kusto/ParserKQLJoin.h new file mode 100644 index 000000000000..185f91bdeb2f --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLJoin.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLJoin : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL Join"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index bb8e08fd3786..306884ac8b77 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -2,14 +2,11 @@ #include #include #include -#include -#include -#include namespace DB { -bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr limit_length; diff --git a/src/Parsers/Kusto/ParserKQLLookup.cpp b/src/Parsers/Kusto/ParserKQLLookup.cpp new file mode 100644 index 000000000000..2e7bf2b97844 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLookup.cpp @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLLookup::updatePipeLine(OperationsPos & operations, String & query) +{ + Pos pos = operations.back().second; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near lookup operator"); + + Pos start_pos = operations.front().second; + Pos end_pos = pos; + --end_pos; + --end_pos; + + String prev_query(start_pos->begin, end_pos->end); + + String join_kind = "kind=leftouter"; + ParserKeyword s_kind("kind"); + ParserToken equals(TokenType::Equals); + start_pos = pos; + end_pos = pos; + + if (s_kind.ignore(pos)) + { + if (!equals.ignore(pos)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid kind for lookup operator"); + + if (ParserKeyword("leftouter").ignore(pos)) + join_kind = "kind=leftouter"; + else if (ParserKeyword("inner").ignore(pos)) + join_kind = "kind=inner"; + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid value of kind for lookup operator"); + } + Pos right_table_start_pos = pos; + + size_t paren_count = 0; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + end_pos = pos; + ++pos; + } + + String right_expr = (right_table_start_pos <= end_pos) ? String(right_table_start_pos->begin, end_pos->end) : ""; + if (right_expr.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "lookup operator need right table"); + + query = std::format("{} join {} {} ", prev_query, join_kind, right_expr); + + return true; +} + +bool ParserKQLLookup::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) //(Pos & pos, ASTPtr & node, Expected & expected) +{ + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLookup.h b/src/Parsers/Kusto/ParserKQLLookup.h new file mode 100644 index 000000000000..d8880c7f5e5b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLookup.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLookup : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL lookup"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (OperationsPos & operations, String & query) override; + +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp new file mode 100644 index 000000000000..00335f6f3582 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -0,0 +1,304 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB::ErrorCodes +{ +extern const int UNKNOWN_TYPE; +} + +namespace DB +{ + +std::unordered_map ParserKQLMVExpand::type_cast = +{ {"bool", "Boolean"}, + {"boolean", "Boolean"}, + {"datetime", "DateTime"}, + {"date", "DateTime"}, + {"guid", "UUID"}, + {"int", "Int32"}, + {"long", "Int64"}, + {"real", "Float64"}, + {"double", "Float64"}, + {"string", "String"} +}; + +bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected) +{ + ParserToken equals(TokenType::Equals); + ParserToken open_bracket(TokenType::OpeningRoundBracket); + ParserToken close_bracket(TokenType::ClosingRoundBracket); + ParserToken comma(TokenType::Comma); + + ParserKeyword s_to("to"); + ParserKeyword s_type("typeof"); + uint16_t bracket_count = 0; + Pos expr_begin_pos = pos; + Pos expr_end_pos = pos; + + String alias; + String column_array_expr; + String to_type; + --expr_end_pos; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if (String(pos->begin,pos->end) == "=") + { + --pos; + alias = String(pos->begin, pos->end); + ++pos; + ++pos; + expr_begin_pos = pos; + } + + auto add_columns = [&] + { + column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth); + + if (alias.empty()) + { + alias = expr_begin_pos == expr_end_pos ? column_array_expr : String(expr_begin_pos->begin,expr_begin_pos->end) + "_"; + } + column_array_exprs.push_back(ColumnArrayExpr(alias, column_array_expr, to_type)); + }; + + if (s_to.ignore(pos, expected)) + { + --pos; + --pos; + expr_end_pos = pos; + ++pos; + ++pos; + + column_array_expr = String(expr_begin_pos->begin, expr_end_pos->end); + + if (!s_type.ignore(pos, expected)) + return false; + if (!open_bracket.ignore(pos, expected)) + return false; + to_type = String(pos->begin, pos->end); + + if (!type_cast.contains(to_type)) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "{} is not a supported kusto data type for mv-expand", to_type); + + ++pos; + if (!close_bracket.ignore(pos, expected)) + return false; + --pos; + } + + if ((pos->type == TokenType::Comma && bracket_count == 0) || String(pos->begin, pos->end) == "limit" || pos->type == TokenType::Semicolon) + { + if (column_array_expr.empty()) + { + expr_end_pos = pos; + --expr_end_pos; + } + add_columns(); + expr_begin_pos = pos; + expr_end_pos = pos; + ++expr_begin_pos; + + alias.clear(); + column_array_expr.clear(); + to_type.clear(); + + if (pos->type == TokenType::Semicolon) + break; + } + + if (String(pos->begin, pos->end) == "limit") + break; + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + { + if (expr_end_pos < expr_begin_pos) + { + expr_end_pos = pos; + --expr_end_pos; + } + add_columns(); + break; + } + } + return true; +} + +bool ParserKQLMVExpand::parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected) +{ + ParserKeyword s_bagexpansion("bagexpansion"); + ParserKeyword s_kind("kind"); + ParserKeyword s_with_itemindex("with_itemindex"); + ParserKeyword s_limit("limit"); + + ParserToken equals(TokenType::Equals); + ParserToken comma(TokenType::Comma); + + auto & column_array_exprs = kql_mv_expand.column_array_exprs; + auto & bagexpansion = kql_mv_expand.bagexpansion; + auto & with_itemindex = kql_mv_expand.with_itemindex; + auto & limit = kql_mv_expand.limit; + + if (s_bagexpansion.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + bagexpansion = String(pos->begin, pos->end); + ++pos; + } + else if (s_kind.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + bagexpansion = String(pos->begin, pos->end); + ++pos; + } + + if (s_with_itemindex.ignore(pos, expected)) + { + if (!equals.ignore(pos, expected)) + return false; + with_itemindex = String(pos->begin, pos->end); + ++pos; + } + + if (!parseColumnArrayExprs(column_array_exprs, pos, expected)) + return false; + + if (s_limit.ignore(pos, expected)) + limit = String(pos->begin, pos->end); + + return true; +} + +bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth) +{ + String expand_str; + String cast_type_column_remove, cast_type_column_rename ; + String cast_type_column_restore, cast_type_column_restore_name ; + String row_count_str; + String extra_columns; + String input = "dummy_input"; + for (auto column : kql_mv_expand.column_array_exprs) + { + if (column.alias == column.column_array_expr) + expand_str = expand_str.empty() ? String("ARRAY JOIN ") + column.alias : expand_str + "," + column.alias; + else + { + expand_str = expand_str.empty() ? std::format("ARRAY JOIN {} AS {} ", column.column_array_expr, column.alias): expand_str + std::format(", {} AS {}", column.column_array_expr, column.alias); + extra_columns = extra_columns + ", " + column.alias; + } + + if (!column.to_type.empty()) + { + cast_type_column_remove = cast_type_column_remove.empty() ? " Except " + column.alias : cast_type_column_remove + " Except " + column.alias ; + String rename_str; + + if (type_cast[column.to_type] == "Boolean") + rename_str = std::format("accurateCastOrNull(toInt64OrNull(toString({0})),'{1}') as {0}_ali",column.alias, type_cast[column.to_type]); + else + rename_str = std::format("accurateCastOrNull({0},'{1}') as {0}_ali",column.alias, type_cast[column.to_type]); + + cast_type_column_rename = cast_type_column_rename.empty() ? rename_str : cast_type_column_rename + "," + rename_str; + cast_type_column_restore = cast_type_column_restore.empty() ? std::format(" Except {}_ali ", column.alias) : cast_type_column_restore + std::format(" Except {}_ali ", column.alias); + cast_type_column_restore_name = cast_type_column_restore_name.empty() ? std::format("{0}_ali as {0}", column.alias) : cast_type_column_restore_name + std::format(", {0}_ali as {0}", column.alias); + } + + if (!kql_mv_expand.with_itemindex.empty()) + { + row_count_str = row_count_str.empty() ? "length("+column.alias+")" : row_count_str + ", length("+column.alias+")"; + } + } + + String columns = "*"; + if (!row_count_str.empty()) + { + expand_str += std::format(", range(0, arrayMax([{}])) AS {} ", row_count_str, kql_mv_expand.with_itemindex); + columns = kql_mv_expand.with_itemindex + " , " + columns; + } + + if (!kql_mv_expand.limit.empty()) + expand_str += " LIMIT " + kql_mv_expand.limit; + + auto query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); + + ASTPtr sub_query_node; + Expected expected; + + if (cast_type_column_remove.empty()) + { + query = std::format("Select {} {} From {} {}", columns, extra_columns, input, expand_str); + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, false, false)) + return false; + select_node = std::move(sub_query_node); + } + else + { + query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, true, false)) + return false; + select_node = std::move(sub_query_node); + + auto rename_query = std::format("(Select * {}, {} From {})", cast_type_column_remove, cast_type_column_rename, "query"); + if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, select_node, true, true)) + return false; + + select_node = std::move(sub_query_node); + query = std::format("Select * {}, {} from {}", cast_type_column_restore, cast_type_column_restore_name, "rename_query"); + + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + return false; + sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(select_node)); + select_node = std::move(sub_query_node); + } + return true; +} + +bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr setting; + ASTPtr select_expression_list; + auto begin = pos; + + KQLMVExpand kql_mv_expand; + if (!parserMVExpand(kql_mv_expand, pos, expected)) + return false; + if (!genQuery(kql_mv_expand, node, pos.max_depth)) + return false; + + const String setting_str = "enable_unaligned_array_join = 1"; + Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); + IParser::Pos pos_settings(token_settings, pos.max_depth); + + if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(setting)); + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.h b/src/Parsers/Kusto/ParserKQLMVExpand.h new file mode 100644 index 000000000000..ee5719dd4f8a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMVExpand.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMVExpand : public ParserKQLBase +{ + +protected: + static std::unordered_map type_cast; + + struct ColumnArrayExpr + { + String alias; + String column_array_expr; + String to_type; + ColumnArrayExpr(String alias_, String column_array_expr_, String to_type_) + :alias(alias_), column_array_expr(column_array_expr_), to_type(to_type_){} + }; + using ColumnArrayExprs = std::vector; + + struct KQLMVExpand + { + ColumnArrayExprs column_array_exprs; + String bagexpansion; + String with_itemindex; + String limit; + }; + + static bool parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected); + static bool parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected); + static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth); + + const char * getName() const override { return "KQL mv-expand"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp new file mode 100644 index 000000000000..ad1bb1bd515b --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -0,0 +1,551 @@ +#include "ParserKQLTimespan.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +void ParserKQLMakeSeries ::parseSingleAggregationColumn(AggregationColumns & aggregation_columns, Pos begin, Pos end, size_t & column_index) +{ + std::unordered_set allowed_aggregation( + {"avg", + "avgif", + "count", + "countif", + "dcount", + "dcountif", + "max", + "maxif", + "min", + "minif", + "percentile", + "take_any", + "stdev", + "sum", + "sumif", + "variance"}); + + String alias; + String aggregation_fun; + String column; + String default_value = "0"; + auto pos = begin; + + bool has_default = false; + if (begin == end) + throw Exception(ErrorCodes::SYNTAX_ERROR, "No aggregation in make-series operator"); + + if (String(pos->begin, pos->end) == "=") + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid equal symbol (=) in make-series operator"); + + auto agg_start_pos = pos; + auto agg_end_pos = pos; + BracketCount bracket_count; + while (pos < end) + { + bracket_count.count(pos); + if (String(pos->begin, pos->end) == "=" && bracket_count.isZero()) + { + if (!alias.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid equal symbol (=) in make-series operator"); + --pos; + if (begin != pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias", std::string_view(begin->begin, pos->end)); + ++pos; + alias = String(begin->begin, begin->end); + begin = pos; + ++begin; + } + + if (String(pos->begin, pos->end) == "default" && bracket_count.isZero()) + { + has_default = true; + if (!aggregation_fun.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Extra keyword (default) in make-series operator"); + --pos; + if (pos < begin) + throw Exception(ErrorCodes::SYNTAX_ERROR, "No aggregation in make-series operator"); + + aggregation_fun = String(begin->begin, pos->end); + agg_start_pos = begin; + agg_end_pos = pos; + ++pos; + ++pos; + if (String(pos->begin, pos->end) != "=") + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid default in make-series operator"); + begin = pos; + ++begin; + } + ++pos; + } + --end; + if (aggregation_fun.empty()) + { + if (end < begin) + throw Exception(ErrorCodes::SYNTAX_ERROR, "No aggregation in make-series operator"); + aggregation_fun = String(begin->begin, end->end); + agg_start_pos = begin; + agg_end_pos = end; + } + else if (has_default) + { + if (end < begin) + throw Exception(ErrorCodes::SYNTAX_ERROR, "No aggregation in make-series operator"); + default_value = getExprFromToken(String(begin->begin, end->end), pos.max_depth); + } + + auto agg_fun = String(agg_start_pos->begin, agg_start_pos->end); + String tmp_alias; + if (allowed_aggregation.contains(agg_fun)) + tmp_alias = agg_fun + "_"; + else + { + tmp_alias = std::format("Column{}", column_index); + ++column_index; + agg_fun.clear(); + } + + auto last_bareword_pos = agg_start_pos; + while (agg_start_pos < agg_end_pos) + { + if (agg_start_pos->type == TokenType::BareWord) + last_bareword_pos = agg_start_pos; + + if (agg_start_pos->type == TokenType::ClosingRoundBracket) + { + column = String(last_bareword_pos->begin, last_bareword_pos->end); + --agg_start_pos; + if (agg_start_pos == last_bareword_pos) + { + --agg_start_pos; + if (agg_start_pos->type == TokenType::OpeningRoundBracket && !agg_fun.empty()) + tmp_alias += column; + } + break; + } + ++agg_start_pos; + } + + if (alias.empty()) + alias = tmp_alias; + aggregation_columns.emplace_back(alias, aggregation_fun, column, default_value); +} + +bool ParserKQLMakeSeries ::parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos) +{ + BracketCount bracket_count; + auto begin = pos; + size_t column_index = 1; + while (!pos->isEnd()) + { + bracket_count.count(pos); + if ((pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) && bracket_count.isZero()) + break; + + if (pos->type == TokenType::Comma && bracket_count.isZero()) + { + parseSingleAggregationColumn(aggregation_columns, begin, pos, column_index); + ++begin; + } + if (String(pos->begin, pos->end) == "on" && bracket_count.isZero()) + { + parseSingleAggregationColumn(aggregation_columns, begin, pos, column_index); + return true; + } + ++pos; + } + + return false; +} + +bool ParserKQLMakeSeries ::parseAxisColumn(String & axis_column, Pos & pos) +{ + auto begin = pos; + BracketCount bracket_count; + while (!pos->isEnd()) + { + bracket_count.count(pos); + if ((pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) && bracket_count.isZero()) + break; + + if (auto keyword = String(pos->begin, pos->end); + (keyword == "from" || keyword == "to" || keyword == "step") && bracket_count.isZero()) + { + --pos; + axis_column = String(begin->begin, pos->end); + ++pos; + return true; + } + + ++pos; + } + return false; +} + +bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos) +{ + auto begin = pos; + auto from_pos = begin; + auto to_pos = begin; + auto step_pos = begin; + auto end_pos = begin; + + BracketCount bracket_count; + while (!pos->isEnd()) + { + bracket_count.count(pos); + if ((pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) && bracket_count.isZero()) + break; + + if (String(pos->begin, pos->end) == "from" && bracket_count.isZero()) + from_pos = pos; + if (String(pos->begin, pos->end) == "to" && bracket_count.isZero()) + to_pos = pos; + if (String(pos->begin, pos->end) == "step" && bracket_count.isZero()) + step_pos = pos; + if (String(pos->begin, pos->end) == "by" && bracket_count.isZero()) + { + end_pos = pos; + break; + } + ++pos; + } + + if (end_pos == begin) + end_pos = pos; + + if (String(step_pos->begin, step_pos->end) != "step") + return false; + + if (String(from_pos->begin, from_pos->end) == "from") + { + ++from_pos; + auto end_from_pos = (to_pos != begin) ? to_pos : step_pos; + --end_from_pos; + from_to_step.from_str = String(from_pos->begin, end_from_pos->end); + } + + if (String(to_pos->begin, to_pos->end) == "to") + { + ++to_pos; + --step_pos; + from_to_step.to_str = String(to_pos->begin, step_pos->end); + ++step_pos; + } + --end_pos; + ++step_pos; + from_to_step.step_str = String(step_pos->begin, end_pos->end); + + if (std::optional ticks; String(step_pos->begin, step_pos->end) == "time" || String(step_pos->begin, step_pos->end) == "timespan" + || ParserKQLTimespan::tryParse(from_to_step.step_str, ticks)) + { + // TODO: this is a hack of the ugliest kind that can only be fixed by supporting arbitrary expressions in make-series + static constexpr std::string_view wrapper = "toIntervalNanosecond("; + const auto timespan = getExprFromToken(from_to_step.step_str, pos.max_depth); + const auto value = timespan.substr(wrapper.length(), timespan.length() - wrapper.length() - 1); + + from_to_step.is_timespan = true; + from_to_step.step = std::stod(value) * 1e-9; + } + else + from_to_step.step = std::stod(from_to_step.step_str); + + return true; +} + +bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth) +{ + const uint64_t era_diff + = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) + + String start_str, end_str; + String sub_query, main_query; + + const auto & aggregation_columns = kql_make_series.aggregation_columns; + const auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + auto step = from_to_step.step; + + if (!kql_make_series.from_to_step.from_str.empty()) + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + + if (!kql_make_series.from_to_step.to_str.empty()) + end_str = getExprFromToken(from_to_step.to_str, max_depth); + + auto date_type_cast = [&](const String & src) + { + Tokens tokens(src.c_str(), src.c_str() + src.size()); + IParser::Pos pos(tokens, max_depth); + String res; + while (!pos->isEnd()) + { + auto tmp = String(pos->begin, pos->end); + if (tmp == "kql_datetime" || tmp == "kql_todatetime") + { + ++pos; + auto datetime_start_pos = pos; + auto datetime_end_pos = pos; + BracketCount bracket_count; + while (!pos->isEnd()) + { + bracket_count.count(pos); + if (pos->type == TokenType::ClosingRoundBracket && bracket_count.isZero()) + { + ++datetime_start_pos; + datetime_end_pos = pos; + --datetime_end_pos; + tmp = std::format("toDateTime64({}, 9, 'UTC')", String(datetime_start_pos->begin, datetime_end_pos->end)); + break; + } + ++pos; + } + } + res = res.empty() ? tmp : res + " " + tmp; + ++pos; + } + return res; + }; + + start_str = date_type_cast(start_str); + end_str = date_type_cast(end_str); + + String bin_str, start, end; + + uint64_t diff = 0; + String axis_column_format; + String axis_str; + + auto get_group_expression_alias = [&] + { + std::vector group_expression_tokens; + Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); + IParser::Pos pos(tokens, max_depth); + while (!pos->isEnd()) + { + if (String(pos->begin, pos->end) == "AS") + { + if (!group_expression_tokens.empty()) + group_expression_tokens.pop_back(); + ++pos; + group_expression_tokens.emplace_back(pos->begin, pos->end); + } + else + group_expression_tokens.emplace_back(pos->begin, pos->end); + ++pos; + } + String res; + for (auto const & token : group_expression_tokens) + res = res + token + " "; + return res; + }; + + auto group_expression_alias = get_group_expression_alias(); + + if (from_to_step.is_timespan) + { + axis_column_format = std::format("toFloat64(toDateTime64({}, 9, 'UTC'))", axis_column); + } + else + axis_column_format = std::format("toFloat64({})", axis_column); + + if (!start_str.empty()) // has from + { + bin_str = std::format( + " toFloat64({0}) + (toInt64((({1} - toFloat64({0})) / {2})) * {2}) AS {3}_ali", + start_str, + axis_column_format, + step, + axis_column); + start = std::format("toUInt64({})", start_str); + } + else + { + if (from_to_step.is_timespan) + diff = era_diff; + bin_str = std::format(" toFloat64(toInt64(({0} + {1}) / {2}) * {2}) AS {3}_ali ", axis_column_format, diff, step, axis_column); + } + + if (!end_str.empty()) + end = std::format("toUInt64({})", end_str); + + String range; + String condition; + + if (!start_str.empty() && !end_str.empty()) + { + range = std::format("range({}, {}, toUInt64({}))", start, end, step); + condition = std::format("where toInt64({0}) >= {1} and toInt64({0}) < {2}", axis_column_format, start, end); + } + else if (start_str.empty() && !end_str.empty()) + { + range = std::format("range(low, {} + {}, toUInt64({}))", end, diff, step); + condition = std::format("where toInt64({0}) - {1} < {2}", axis_column_format, diff, end); + } + else if (!start_str.empty() && end_str.empty()) + { + range = std::format("range({}, high, toUInt64({}))", start, step); + condition = std::format("where toInt64({}) >= {}", axis_column_format, start); + } + else + { + range = std::format("range(low, high, toUInt64({}))", step); + condition = " "; + } + + auto range_len = std::format("length({})", range); + + String sub_sub_query; + if (group_expression.empty()) + sub_sub_query = std::format( + " (Select {0}, {1} FROM {2} {4} GROUP BY {3}_ali ORDER BY {3}_ali) ", + subquery_columns, + bin_str, + "table_name", + axis_column, + condition); + else + sub_sub_query = std::format( + " (Select {0}, {1}, {2} FROM {3} {5} GROUP BY {0}, {4}_ali ORDER BY {4}_ali) ", + group_expression, + subquery_columns, + bin_str, + "table_name", + axis_column, + condition); + + ASTPtr sub_query_node; + + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth)) + return false; + select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); + + if (!group_expression.empty()) + main_query = std::format("{} ", group_expression_alias); + + auto axis_and_agg_alias_list = axis_column; + auto final_axis_agg_alias_list = std::format("tupleElement(zipped,1) AS {}", axis_column); + int idx = 2; + for (auto agg_column : aggregation_columns) + { + String agg_group_column = std::format( + "arrayConcat(groupArray({}_ali) as ga, arrayMap(x -> ({}),range(0,toUInt32({} - length(ga) < 0 ? 0 : {} - length(ga)),1))) as " + "{}", + agg_column.alias, + agg_column.default_value, + range_len, + range_len, + agg_column.alias); + main_query = main_query.empty() ? agg_group_column : main_query + ", " + agg_group_column; + + axis_and_agg_alias_list += ", " + agg_column.alias; + final_axis_agg_alias_list += std::format(", tupleElement(zipped,{}) AS {}", idx, agg_column.alias); + } + + if (from_to_step.is_timespan) + axis_str = std::format( + "arrayDistinct(arrayConcat(groupArray(toDateTime64({0}_ali - {1}, 9, 'UTC')), arrayMap(x->(toDateTime64(x - {1}, 9, 'UTC')), " + "{2}))) as {0}", + axis_column, + diff, + range); + else + axis_str + = std::format("arrayDistinct(arrayConcat(groupArray({0}_ali), arrayMap(x->(toFloat64(x)), {1}))) as {0}", axis_column, range); + + main_query += ", " + axis_str; + auto sub_group_by = group_expression.empty() ? "" : std::format("GROUP BY {}", group_expression_alias); + + sub_query = std::format( + "( SELECT toUInt64(min({}_ali)) AS low, toUInt64(max({}_ali))+ {} AS high, arraySort(arrayZip({})) as zipped, {} FROM {} {} )", + axis_column, + axis_column, + step, + axis_and_agg_alias_list, + main_query, + sub_sub_query, + sub_group_by); + + if (group_expression.empty()) + main_query = std::format("{}", final_axis_agg_alias_list); + else + main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); + + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth)) + return false; + select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); + + kql_make_series.sub_query = std::move(sub_query); + kql_make_series.main_query = std::move(main_query); + + return true; +} + +bool ParserKQLMakeSeries ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + ParserKeyword s_on("on"); + ParserKeyword s_by("by"); + + ASTPtr select_expression_list; + + KQLMakeSeries kql_make_series; + auto & aggregation_columns = kql_make_series.aggregation_columns; + auto & from_to_step = kql_make_series.from_to_step; + auto & subquery_columns = kql_make_series.subquery_columns; + auto & axis_column = kql_make_series.axis_column; + auto & group_expression = kql_make_series.group_expression; + + if (!parseAggregationColumns(aggregation_columns, pos)) + return false; + + if (!s_on.ignore(pos, expected)) + return false; + + if (!parseAxisColumn(axis_column, pos)) + return false; + + if (!parseFromToStepClause(from_to_step, pos)) + return false; + + if (s_by.ignore(pos, expected)) + { + group_expression = getExprFromToken(pos); + if (group_expression.empty()) + return false; + } + + for (auto & agg_column : aggregation_columns) + { + String column_str = std::format("{} AS {}_ali", agg_column.aggregation_fun, agg_column.alias); + subquery_columns = subquery_columns.empty() ? column_str : subquery_columns + ", " + column_str; + } + + makeSeries(kql_make_series, node, pos.max_depth); + + Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); + IParser::Pos pos_main_query(token_main_query, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + pos = begin; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h new file mode 100644 index 000000000000..8fe5625c8afe --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLMakeSeries : public ParserKQLBase +{ +protected: + struct AggregationColumn + { + String alias; + String aggregation_fun; + String column; + String default_value; + AggregationColumn(const String & alias_, const String & aggregation_fun_, const String & column_, const String & default_value_) + : alias(alias_), aggregation_fun(aggregation_fun_), column(column_), default_value(default_value_) + { + } + }; + + using AggregationColumns = std::vector; + + struct FromToStepClause + { + String from_str; + String to_str; + String step_str; + bool is_timespan = false; + double step; + }; + + struct KQLMakeSeries + { + AggregationColumns aggregation_columns; + FromToStepClause from_to_step; + String axis_column; + String group_expression; + String subquery_columns; + String sub_query; + String main_query; + }; + + static bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth); + static bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); + static bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); + static void parseSingleAggregationColumn(AggregationColumns & aggregation_columns, Pos begin, Pos end, size_t & column_index); + static bool parseAxisColumn(String & axis_column, Pos & pos); + const char * getName() const override { return "KQL make-series"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index 1575cffcc394..849ea9fa1fb4 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -1,71 +1,382 @@ -#include -#include -#include -#include +#include "ParserKQLOperators.h" +#include +#include +#include +#include #include +#include +#include +#include "KustoFunctions/IParserKQLFunction.h" +#include "ParserKQLQuery.h" +#include "ParserKQLStatement.h" -namespace DB +#include +#include + +namespace DB::ErrorCodes +{ +extern const int LOGICAL_ERROR; +extern const int SYNTAX_ERROR; +} + +namespace { +enum class WildcardsPos : uint8_t +{ + none, + left, + right, + both +}; -namespace ErrorCodes +enum class KQLOperatorValue : uint16_t +{ + none, + between, + not_between, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, /// =~ + not_equal, /// !~ + equal_cs, /// = + not_equal_cs, /// != + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, /// in + not_in_cs, /// !in + in, /// in~ + not_in, /// !in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, +}; + +const std::unordered_map KQLOperator = { + {"between", KQLOperatorValue::between}, + {"!between", KQLOperatorValue::not_between}, + {"contains", KQLOperatorValue::contains}, + {"!contains", KQLOperatorValue::not_contains}, + {"contains_cs", KQLOperatorValue::contains_cs}, + {"!contains_cs", KQLOperatorValue::not_contains_cs}, + {"endswith", KQLOperatorValue::endswith}, + {"!endswith", KQLOperatorValue::not_endswith}, + {"endswith_cs", KQLOperatorValue::endswith_cs}, + {"!endswith_cs", KQLOperatorValue::not_endswith_cs}, + {"=~", KQLOperatorValue::equal}, + {"!~", KQLOperatorValue::not_equal}, + {"==", KQLOperatorValue::equal_cs}, + {"!=", KQLOperatorValue::not_equal_cs}, + {"has", KQLOperatorValue::has}, + {"!has", KQLOperatorValue::not_has}, + {"has_all", KQLOperatorValue::has_all}, + {"has_any", KQLOperatorValue::has_any}, + {"has_cs", KQLOperatorValue::has_cs}, + {"!has_cs", KQLOperatorValue::not_has_cs}, + {"hasprefix", KQLOperatorValue::hasprefix}, + {"!hasprefix", KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs", KQLOperatorValue::hasprefix_cs}, + {"!hasprefix_cs", KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix", KQLOperatorValue::hassuffix}, + {"!hassuffix", KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs", KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs", KQLOperatorValue::not_hassuffix_cs}, + {"in", KQLOperatorValue::in_cs}, + {"!in", KQLOperatorValue::not_in_cs}, + {"in~", KQLOperatorValue::in}, + {"!in~", KQLOperatorValue::not_in}, + {"matches regex", KQLOperatorValue::matches_regex}, + {"startswith", KQLOperatorValue::startswith}, + {"!startswith", KQLOperatorValue::not_startswith}, + {"startswith_cs", KQLOperatorValue::startswith_cs}, + {"!startswith_cs", KQLOperatorValue::not_startswith_cs}, +}; + +void rebuildSubqueryForInOperator(DB::ASTPtr & node, bool useLowerCase) { - extern const int SYNTAX_ERROR; + //A sub-query for in operator in kql can have multiple columns, but only takes the first column. + //A sub-query for in operator in ClickHouse can not have multiple columns + //So only take the first column if there are multiple columns. + //select * not working for subquery. (a tabular statement without project) + + const auto selectColumns = node->children[0]->children[0]->as()->select(); + while (selectColumns->children.size() > 1) + selectColumns->children.pop_back(); + + if (useLowerCase) + { + auto args = std::make_shared(); + args->children.push_back(selectColumns->children[0]); + auto func_lower = std::make_shared(); + func_lower->name = "lower"; + func_lower->children.push_back(selectColumns->children[0]); + func_lower->arguments = args; + if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + else if (selectColumns->children[0]->as()) + func_lower->alias = std::move(selectColumns->children[0]->as()->alias); + + auto funcs = std::make_shared(); + funcs->children.push_back(func_lower); + selectColumns->children[0] = std::move(funcs); + } } -String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +std::string applyFormatString(const std::string_view format_string, const std::string & haystack, const std::string & needle) { - String new_expr; - Expected expected; - ParserToken s_lparen(TokenType::OpeningRoundBracket); + return std::vformat(format_string, std::make_format_args(haystack, needle)); +} + +std::string constructHasOperatorTranslation(const KQLOperatorValue kql_op, const std::string & haystack, const std::string & needle) +{ + if (kql_op != KQLOperatorValue::has && kql_op != KQLOperatorValue::not_has && kql_op != KQLOperatorValue::has_cs + && kql_op != KQLOperatorValue::not_has_cs && kql_op != KQLOperatorValue::has_all && kql_op != KQLOperatorValue::has_any) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unexpected operator: {}", magic_enum::enum_name(kql_op)); + + const auto tokens = std::invoke([&needle] { + std::vector result; + size_t pos = 0; + size_t start = 0; + size_t length = 0; + DB::SplitTokenExtractor token_extractor; + while (pos < needle.length() && token_extractor.nextInString(needle.c_str(), needle.length(), &pos, &start, &length)) + result.emplace_back(needle.c_str() + start, length); + + return result; + }); + + const auto is_case_sensitive = kql_op == KQLOperatorValue::has_cs || kql_op == KQLOperatorValue::not_has_cs; + const auto has_token_suffix = is_case_sensitive ? "" : "CaseInsensitive"; + const auto has_all_tokens + = std::accumulate(tokens.cbegin(), tokens.cend(), std::string(), [&has_token_suffix, &haystack](auto acc, const auto & token) { + return std::move(acc) + std::format("hasToken{}({}, '{}') and ", has_token_suffix, haystack, token); + }); + + const auto is_negation = kql_op == KQLOperatorValue::not_has || kql_op == KQLOperatorValue::not_has_cs; + return std::format( + "{4}ifNull(hasToken{3}OrNull({0}, {1}), {2} position{3}({0}, {1}) > 0)", + haystack, + needle, + has_all_tokens, + has_token_suffix, + is_negation ? "not " : ""); +} +} + +String genHasAnyAllOpExpr( + std::vector & tokens, + DB::IParser::Pos & token_pos, + const std::string & kql_op, + const std::function & translate) +{ + std::string new_expr; + DB::Expected expected; + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); auto haystack = tokens.back(); - - String logic_op = (kql_op == "has_all") ? " and " : " or "; - - while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + const auto * const logic_op = (kql_op == "has_all") ? " and " : " or "; + while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) { - auto tmp_arg = String(token_pos->begin, token_pos->end); - if (token_pos->type == TokenType::Comma) - new_expr = new_expr + logic_op; + auto tmp_arg = DB::IParserKQLFunction::getExpression(token_pos); + if (token_pos->type == DB::TokenType::Comma) + new_expr += logic_op; else - new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + new_expr += translate(haystack, tmp_arg); ++token_pos; - if (token_pos->type == TokenType::ClosingRoundBracket) + if (token_pos->type == DB::TokenType::ClosingRoundBracket) break; - } tokens.pop_back(); return new_expr; } -String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +String genEqOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const DB::String & ch_op) { - String new_expr; + DB::String tmp_arg(token_pos->begin, token_pos->end); + + if (tokens.empty() || tmp_arg != "~") + return tmp_arg; + + DB::String new_expr; + new_expr += "lower(" + tokens.back() + ")" + " "; + new_expr += ch_op + " "; + ++token_pos; + + if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) + new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; + else + new_expr += "lower(" + DB::IParserKQLFunction::getExpression(token_pos) + ")"; + + tokens.pop_back(); + return new_expr; +} + +String genBetweenOpExpr(std::vector & tokens, DB::IParser::Pos & token_pos, const String & ch_op) +{ + DB::String new_expr; + new_expr += ch_op + "("; + new_expr += tokens.back() + ","; + tokens.pop_back(); + ++token_pos; + + DB::BracketCount bracket_count; + bracket_count.count(token_pos); + + ++token_pos; + + while (!token_pos->isEnd()) + { + if ((token_pos->type == DB::TokenType::PipeMark || token_pos->type == DB::TokenType::Semicolon)) + break; + if (token_pos->type == DB::TokenType::Dot) + break; + new_expr += DB::IParserKQLFunction::getExpression(token_pos); + ++token_pos; + } + new_expr += ","; + + DB::ParserToken dot_token(DB::TokenType::Dot); + + if (dot_token.ignore(token_pos) && dot_token.ignore(token_pos)) + { + if (dot_token.ignore(token_pos)) + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error, number of dots do not match."); + + while (!token_pos->isEnd()) + { + bracket_count.count(token_pos); + if ((token_pos->type == DB::TokenType::PipeMark || token_pos->type == DB::TokenType::Semicolon) && bracket_count.isZero()) + break; + new_expr += DB::IParserKQLFunction::getExpression(token_pos); - ParserToken s_lparen(TokenType::OpeningRoundBracket); + if (token_pos->type == DB::TokenType::ClosingRoundBracket && bracket_count.isZero()) + { + break; + } + ++token_pos; + } + } + else + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error, number of dots do not match."); + + return new_expr; +} + +String genInOpExprCis(std::vector & tokens, DB::IParser::Pos & token_pos, const DB::String & kql_op, const DB::String & ch_op) +{ + DB::KQLContext kql_context; + DB::ParserKQLTableFunction kqlfun_p(kql_context); + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); - ASTPtr select; - Expected expected; + DB::ASTPtr select; + DB::Expected expected; + DB::String new_expr; ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + + if (tokens.empty()) + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + + new_expr = "lower(" + tokens.back() + ") "; + tokens.pop_back(); + auto pos = token_pos; + if (kqlfun_p.parse(pos, select, expected)) + { + rebuildSubqueryForInOperator(select, true); + new_expr += ch_op + " (" + serializeAST(*select) + ")"; + token_pos = pos; + return new_expr; + } --token_pos; --token_pos; - return ch_op; + new_expr += ch_op; + while (!token_pos->isEnd() && token_pos->type != DB::TokenType::PipeMark && token_pos->type != DB::TokenType::Semicolon) + { + auto tmp_arg = DB::String(token_pos->begin, token_pos->end); + if (token_pos->type != DB::TokenType::Comma && token_pos->type != DB::TokenType::ClosingRoundBracket + && token_pos->type != DB::TokenType::OpeningRoundBracket && token_pos->type != DB::TokenType::OpeningSquareBracket + && token_pos->type != DB::TokenType::ClosingSquareBracket && tmp_arg != "~" && tmp_arg != "dynamic") + { + if (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier) + new_expr += "lower('" + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + "')"; + else + new_expr += "lower(" + tmp_arg + ")"; + } + else if (tmp_arg != "~" && tmp_arg != "dynamic" && tmp_arg != "[" && tmp_arg != "]") + new_expr += tmp_arg; + + if (token_pos->type == DB::TokenType::ClosingRoundBracket) + break; + ++token_pos; + } + return new_expr; } -String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +std::string genInOpExpr(DB::IParser::Pos & token_pos, const std::string & kql_op, const std::string & ch_op) { - String new_expr, left_wildcards, right_wildcards, left_space, right_space; + DB::KQLContext kql_context; + DB::ParserKQLTableFunction kqlfun_p(kql_context); + DB::ParserToken s_lparen(DB::TokenType::OpeningRoundBracket); + + DB::ASTPtr select; + DB::Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + + auto pos = token_pos; + if (kqlfun_p.parse(pos, select, expected)) + { + rebuildSubqueryForInOperator(select, false); + auto new_expr = ch_op + " (" + serializeAST(*select) + ")"; + token_pos = pos; + return new_expr; + } + + --token_pos; + --token_pos; + return ch_op; +} + +std::string genHaystackOpExpr( + std::vector & tokens, + DB::IParser::Pos & token_pos, + const std::string & kql_op, + const std::function & translate, + WildcardsPos wildcards_pos, + WildcardsPos space_pos = WildcardsPos::none) +{ + std::string new_expr, left_wildcards, right_wildcards, left_space, right_space; switch (wildcards_pos) { @@ -73,7 +384,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_wildcards ="%"; + left_wildcards = "%"; break; case WildcardsPos::right: @@ -81,7 +392,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::both: - left_wildcards ="%"; + left_wildcards = "%"; right_wildcards = "%"; break; } @@ -92,7 +403,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::left: - left_space =" "; + left_space = " "; break; case WildcardsPos::right: @@ -100,260 +411,281 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos break; case WildcardsPos::both: - left_space =" "; + left_space = " "; right_space = " "; break; } ++token_pos; - if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) - new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; - else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + if (!tokens.empty() && (token_pos->type == DB::TokenType::StringLiteral || token_pos->type == DB::TokenType::QuotedIdentifier)) + new_expr = translate( + tokens.back(), + "'" + left_wildcards + left_space + DB::IParserKQLFunction::escapeSingleQuotes(String(token_pos->begin + 1, token_pos->end - 1)) + + right_space + right_wildcards + "'"); + else if (!tokens.empty() && token_pos->type == DB::TokenType::BareWord) { - auto tmp_arg = String(token_pos->begin, token_pos->end); - new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + auto tmp_arg = DB::IParserKQLFunction::getExpression(token_pos); + new_expr = translate( + tokens.back(), "concat('" + left_wildcards + left_space + "', " + tmp_arg + ", '" + right_space + right_wildcards + "')"); } else - throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + throw DB::Exception(DB::ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); + tokens.pop_back(); return new_expr; } -bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +namespace DB { +bool KQLOperators::convert(std::vector & tokens, IParser::Pos & pos) +{ + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + return false; + auto begin = pos; + auto token = String(pos->begin, pos->end); - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + String op = token; + if (token == "!") + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); + op = "!" + String(pos->begin, pos->end); + } + else if (token == "matches") { - KQLOperatorValue op_value = KQLOperatorValue::none; - - auto token = String(pos->begin,pos->end); - - String op = token; - if (token == "!") - { - ++pos; - if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); - op ="!"+String(pos->begin,pos->end); - } - else if (token == "matches") - { - ++pos; - if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) - { - if (String(pos->begin,pos->end) == "regex") - op +=" regex"; - else - --pos; - } - } - else - { - op = token; - } - ++pos; if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (String(pos->begin,pos->end) == "~") - op +="~"; + if (String(pos->begin, pos->end) == "regex") + op += " regex"; else --pos; } + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "~") + op += "~"; else --pos; + } + else + --pos; - if (KQLOperator.find(op) == KQLOperator.end()) - { - pos = begin; - return false; - } + const auto op_it = KQLOperator.find(op); + if (op_it == KQLOperator.end()) + { + pos = begin; + return false; + } - op_value = KQLOperator[op]; + String new_expr; - String new_expr; + const auto & op_value = op_it->second; + if (op_value == KQLOperatorValue::none) + { + tokens.push_back(op); + return true; + } - if (op_value == KQLOperatorValue::none) - tokens.push_back(op); - else - { - auto last_op = tokens.back(); - auto last_pos = pos; + if (tokens.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", op); - switch (op_value) - { - case KQLOperatorValue::contains: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); - break; + auto last_op = tokens.back(); + auto last_pos = pos; - case KQLOperatorValue::not_contains: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); - break; + switch (op_value) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::both); + break; - case KQLOperatorValue::contains_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); - break; + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::both); + break; - case KQLOperatorValue::not_contains_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); - break; + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "like({0}, {1})"), WildcardsPos::both); + break; - case KQLOperatorValue::endswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - break; + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not like({0}, {1})"), WildcardsPos::both); + break; - case KQLOperatorValue::not_endswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - break; + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::left); + break; - case KQLOperatorValue::endswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - break; + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::left); + break; - case KQLOperatorValue::not_endswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - break; + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "endsWith({0}, {1})"), WildcardsPos::none); + break; - case KQLOperatorValue::equal: - break; + case KQLOperatorValue::not_endswith_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not endsWith({0}, {1})"), WildcardsPos::none); + break; - case KQLOperatorValue::not_equal: - break; + case KQLOperatorValue::equal: + new_expr = genEqOpExprCis(tokens, pos, "=="); + break; - case KQLOperatorValue::equal_cs: - new_expr = "=="; - break; + case KQLOperatorValue::not_equal: + new_expr = genEqOpExprCis(tokens, pos, "!="); + break; - case KQLOperatorValue::not_equal_cs: - new_expr = "!="; - break; - case KQLOperatorValue::has: - new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); - break; + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; - case KQLOperatorValue::not_has: - new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); - break; + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; - case KQLOperatorValue::has_all: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); - break; + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; - case KQLOperatorValue::has_any: - new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); - break; + case KQLOperatorValue::has_all: + case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value)); + break; - case KQLOperatorValue::has_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); - break; + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; - case KQLOperatorValue::not_has_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); - break; + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&constructHasOperatorTranslation, op_value), WildcardsPos::none); + break; - case KQLOperatorValue::hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); - break; + case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; - case KQLOperatorValue::not_hasprefix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); - break; + case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; - case KQLOperatorValue::hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); - break; + case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "startsWith({0}, {1})"), WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "like({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; - case KQLOperatorValue::not_hasprefix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); - break; + case KQLOperatorValue::not_hasprefix_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not startsWith({0}, {1})"), WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not like({0}, {1})"), WildcardsPos::both, WildcardsPos::left); + break; - case KQLOperatorValue::hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); - break; + case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; - case KQLOperatorValue::not_hassuffix: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); - break; + case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; - case KQLOperatorValue::hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); - new_expr += " or "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); - break; + case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "endsWith({0}, {1})"), WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "like({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; - case KQLOperatorValue::not_hassuffix_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); - new_expr += " and "; - tokens.push_back(last_op); - new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); - break; + case KQLOperatorValue::not_hassuffix_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not endsWith({0}, {1})"), WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr( + tokens, last_pos, op, std::bind_front(&applyFormatString, "not like({0}, {1})"), WildcardsPos::both, WildcardsPos::right); + break; - case KQLOperatorValue::in_cs: - new_expr = genInOpExpr(pos,op,"in"); - break; + case KQLOperatorValue::in_cs: + new_expr = genInOpExpr(pos, op, "in"); + break; - case KQLOperatorValue::not_in_cs: - new_expr = genInOpExpr(pos,op,"not in"); - break; + case KQLOperatorValue::not_in_cs: + new_expr = genInOpExpr(pos, op, "not in"); + break; - case KQLOperatorValue::in: - break; + case KQLOperatorValue::in: + new_expr = genInOpExprCis(tokens, pos, op, "in"); + break; - case KQLOperatorValue::not_in: - break; + case KQLOperatorValue::not_in: + new_expr = genInOpExprCis(tokens, pos, op, "not in"); + break; - case KQLOperatorValue::matches_regex: - new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); - break; + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "match({0}, {1})"), WildcardsPos::none); + break; - case KQLOperatorValue::startswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); - break; + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "ilike({0}, {1})"), WildcardsPos::right); + break; - case KQLOperatorValue::not_startswith: - new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); - break; + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not ilike({0}, {1})"), WildcardsPos::right); + break; - case KQLOperatorValue::startswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); - break; + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "startsWith({0}, {1})"), WildcardsPos::none); + break; - case KQLOperatorValue::not_startswith_cs: - new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); - break; + case KQLOperatorValue::not_startswith_cs: + new_expr + = genHaystackOpExpr(tokens, pos, op, std::bind_front(&applyFormatString, "not startsWith({0}, {1})"), WildcardsPos::none); + break; - default: - break; - } + case KQLOperatorValue::between: + new_expr = genBetweenOpExpr(tokens, pos, "kql_between"); + break; - tokens.push_back(new_expr); - } - return true; + case KQLOperatorValue::not_between: + new_expr = genBetweenOpExpr(tokens, pos, "not kql_between"); + break; + + default: + break; } - pos = begin; - return false; -} + tokens.push_back(new_expr); + return true; +} } - diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 72e25cc3cf90..684be75401aa 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -1,106 +1,12 @@ #pragma once -#include -#include -#include +#include + namespace DB { - class KQLOperators { public: - bool convert(std::vector &tokens,IParser::Pos &pos); -protected: - - enum class WildcardsPos:uint8_t - { - none, - left, - right, - both - }; - - enum class KQLOperatorValue : uint16_t - { - none, - contains, - not_contains, - contains_cs, - not_contains_cs, - endswith, - not_endswith, - endswith_cs, - not_endswith_cs, - equal, /// =~ - not_equal, /// !~ - equal_cs, /// = - not_equal_cs, /// != - has, - not_has, - has_all, - has_any, - has_cs, - not_has_cs, - hasprefix, - not_hasprefix, - hasprefix_cs, - not_hasprefix_cs, - hassuffix, - not_hassuffix, - hassuffix_cs, - not_hassuffix_cs, - in_cs, /// in - not_in_cs, /// !in - in, /// in~ - not_in, /// !in~ - matches_regex, - startswith, - not_startswith, - startswith_cs, - not_startswith_cs, - }; - - std::unordered_map KQLOperator = - { - {"contains" , KQLOperatorValue::contains}, - {"!contains" , KQLOperatorValue::not_contains}, - {"contains_cs" , KQLOperatorValue::contains_cs}, - {"!contains_cs" , KQLOperatorValue::not_contains_cs}, - {"endswith" , KQLOperatorValue::endswith}, - {"!endswith" , KQLOperatorValue::not_endswith}, - {"endswith_cs" , KQLOperatorValue::endswith_cs}, - {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, - {"=~" , KQLOperatorValue::equal}, - {"!~" , KQLOperatorValue::not_equal}, - {"==" , KQLOperatorValue::equal_cs}, - {"!=" , KQLOperatorValue::not_equal_cs}, - {"has" , KQLOperatorValue::has}, - {"!has" , KQLOperatorValue::not_has}, - {"has_all" , KQLOperatorValue::has_all}, - {"has_any" , KQLOperatorValue::has_any}, - {"has_cs" , KQLOperatorValue::has_cs}, - {"!has_cs" , KQLOperatorValue::not_has_cs}, - {"hasprefix" , KQLOperatorValue::hasprefix}, - {"!hasprefix" , KQLOperatorValue::not_hasprefix}, - {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, - {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, - {"hassuffix" , KQLOperatorValue::hassuffix}, - {"!hassuffix" , KQLOperatorValue::not_hassuffix}, - {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, - {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, - {"in" , KQLOperatorValue::in_cs}, - {"!in" , KQLOperatorValue::not_in_cs}, - {"in~" , KQLOperatorValue::in}, - {"!in~" , KQLOperatorValue::not_in}, - {"matches regex" , KQLOperatorValue::matches_regex}, - {"startswith" , KQLOperatorValue::startswith}, - {"!startswith" , KQLOperatorValue::not_startswith}, - {"startswith_cs" , KQLOperatorValue::startswith_cs}, - {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, - }; - static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); - static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); - static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); + static bool convert(std::vector & tokens, IParser::Pos & pos); }; - } diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp new file mode 100644 index 000000000000..4abe88920c4d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -0,0 +1,37 @@ +#include +#include + +#include + +namespace DB +{ + +bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto temp_pos = pos; + const auto expr = getExprFromToken(temp_pos); + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, temp_pos.max_depth); + + ASTPtr select_expression_list; + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + return false; + + int column_index = 0; + std::ranges::for_each( + select_expression_list->children, + [&column_index](const ASTPtr & expression) + { + if (const auto alias = expression->tryGetAlias(); alias.empty()) + expression->setAlias(std::format("print_{}", column_index)); + + ++column_index; + }); + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + pos = temp_pos; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLPrint.h b/src/Parsers/Kusto/ParserKQLPrint.h new file mode 100644 index 000000000000..83ebf11186c3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLPrint.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLPrint : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index e978323d8215..4627b8d85197 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -1,25 +1,36 @@ +#include "ParserKQLProject.h" + +#include +#include #include -#include -#include + namespace DB { - -bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLProject::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { + const auto expr = getExprFromToken(pos); + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + ASTPtr select_expression_list; - String expr; + if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) + return false; - expr = getExprFromToken(pos); + std::ranges::for_each( + select_expression_list->children, + [this](const ASTPtr & expression) + { + if (expression->as() || expression->as()) + return; - Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + if (const auto alias = expression->tryGetAlias(); !alias.empty()) + return; - if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) - return false; + expression->setAlias(kql_context.nextDefaultColumnName()); + }); node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); return true; } - } diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h index b64675beed09..0dac2f2990c8 100644 --- a/src/Parsers/Kusto/ParserKQLProject.h +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -8,9 +8,15 @@ namespace DB class ParserKQLProject : public ParserKQLBase { +public: + explicit ParserKQLProject(KQLContext & kql_context_) : kql_context(kql_context_) { } + protected: const char * getName() const override { return "KQL project"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + KQLContext & kql_context; }; } diff --git a/src/Parsers/Kusto/ParserKQLProjectAway.cpp b/src/Parsers/Kusto/ParserKQLProjectAway.cpp new file mode 100644 index 000000000000..851625cf89b9 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProjectAway.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include "Utilities.h" +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} +bool ParserKQLProjectAway::parseImpl(Pos & pos, ASTPtr & node, Expected & /*expected*/) +{ + size_t bracket_count = 0; + auto begin_pos = pos; + String regular_column_str; + std::vector wildcard_columns; + std::vector regular_columns; + ASTPtr sub_query_node; + + auto append_columns = [®ular_columns, &wildcard_columns](Pos & begin, Pos & end) + { + const auto column = String(begin->begin, end->end); + if (const auto regex_column = wildcardToRegex(column)) + { + wildcard_columns.push_back("'" + *regex_column + "'"); + } + else + regular_columns.push_back(column); + }; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if (bracket_count == 0 and pos->type == TokenType::Comma) + { + auto end_pos = pos; + --end_pos; + append_columns(begin_pos, end_pos); + begin_pos = pos; + ++begin_pos; + } + ++pos; + } + + --pos; + append_columns(begin_pos, pos); + + if (wildcard_columns.empty() && regular_columns.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error: Missing projected away expressions"); + + if (!regular_columns.empty()) + { + if (regular_columns.size() == 1) + regular_column_str = regular_columns[0]; + else + { + regular_column_str = "(" + regular_columns[0]; + for (size_t i = 1; i < regular_columns.size(); ++i) + { + regular_column_str += "," + regular_columns[i]; + } + regular_column_str += ")"; + } + } + + size_t wildcard_columns_index = regular_columns.empty() ? 1 : 0; + + for (size_t i = wildcard_columns_index; i < wildcard_columns.size(); ++i) + { + String project_away_query = std::format("(SELECT * EXCEPT {} FROM dummy_input)", wildcard_columns[i]); + if (!parseSQLQueryByString(std::make_unique(), project_away_query, sub_query_node, pos.max_depth)) + return false; + if (!setSubQuerySource(sub_query_node, node, true, i != wildcard_columns_index)) + return false; + node = std::move(sub_query_node); + } + + String last_away = std::format("SELECT * EXCEPT {} from dummy", regular_columns.empty() ? wildcard_columns[0] : regular_column_str); + + if (!parseSQLQueryByString(std::make_unique(), last_away, sub_query_node, pos.max_depth)) + return false; + if (wildcard_columns_index < wildcard_columns.size()) + sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(node)); + else + { + if (!setSubQuerySource(sub_query_node, node, false, false)) + return false; + } + node = std::move(sub_query_node); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLProjectAway.h b/src/Parsers/Kusto/ParserKQLProjectAway.h new file mode 100644 index 000000000000..f49f51754dd6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProjectAway.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProjectAway : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project-away"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProjectRename.cpp b/src/Parsers/Kusto/ParserKQLProjectRename.cpp new file mode 100644 index 000000000000..844d05ff5ca6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProjectRename.cpp @@ -0,0 +1,39 @@ +#include "ParserKQLProjectRename.h" + +#include +#include +#include +#include + +#include + + +namespace DB +{ + +bool ParserKQLProjectRename::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + const auto projectrename_expr = getRenameExprFromToken(pos); + Tokens ntokens(projectrename_expr.c_str(), projectrename_expr.c_str() + projectrename_expr.size()); + IParser::Pos npos(ntokens, pos.max_depth); + + ASTPtr expression_list; + if (!ParserNotEmptyExpressionList(false).parse(npos, expression_list, expected) || !npos->isEnd()) + return false; + + auto asterisk = std::make_shared(); + asterisk->transformers = std::make_shared(); + const auto & columns_except_transformer + = asterisk->children.emplace_back(asterisk->transformers)->children.emplace_back(std::make_shared()); + + std::ranges::transform( + expression_list->children, + std::back_inserter(columns_except_transformer->children), + [](const ASTPtr & child) { return std::make_shared(child->getColumnName()); }); + + expression_list->children.insert(expression_list->children.cbegin(), std::move(asterisk)); + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(expression_list)); + return true; +} +} diff --git a/src/Parsers/Kusto/ParserKQLProjectRename.h b/src/Parsers/Kusto/ParserKQLProjectRename.h new file mode 100644 index 000000000000..2a57184301f4 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProjectRename.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int SYNTAX_ERROR; +} + +class ParserKQLProjectRename : public ParserKQLBase +{ +public: + explicit ParserKQLProjectRename(KQLContext & kql_context_) : kql_context(kql_context_) { } + +protected: + const char * getName() const override { return "KQL project-rename"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + String getRenameExprFromToken(Pos & pos) + { + String rename_expr; + std::vector seen_columns; + auto last_pos = pos; + auto rename_assignment = true; + size_t bracket_count = 0; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + else if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + else if (!bracket_count && pos->type == TokenType::Equals && String(pos->begin, pos->end) == "=") + { + ++pos; + if (pos->type == TokenType::BareWord) + { + auto column = String(pos->begin, pos->end); + if (std::ranges::find(seen_columns.begin(), seen_columns.end(), column) == seen_columns.end()) + { + rename_assignment = true; + seen_columns.push_back(column); + } + else + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error: column '{}' renamed multiple times", column); + } + --pos; + } + else if (!bracket_count && pos->type == TokenType::Comma) + { + if (rename_assignment) + { + rename_expr += (last_pos < pos) ? String(last_pos->begin, pos->end - 1) : ""; + rename_assignment = false; + } + last_pos = pos; + } + ++pos; + } + if (rename_assignment) + rename_expr += (last_pos < pos) ? String(last_pos->begin, pos->end) : ""; + + Tokens rename_tokens(rename_expr.c_str(), rename_expr.c_str() + rename_expr.size()); + IParser::Pos rename_pos(rename_tokens, pos.max_depth); + return getExprFromToken(rename_pos); + } + KQLContext & kql_context; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index 04ee36705a9a..cf462da4a879 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -1,24 +1,202 @@ +#include "Utilities.h" + +#include #include #include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include + #include -#include -#include -#include +#include namespace DB { -String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int SYNTAX_ERROR; +} + +const std::unordered_map kql_parser{ + {"filter", {"filter", false, false, false, 3}}, + {"where", {"filter", false, false, false, 3}}, + {"limit", {"limit", false, true, false, 3}}, + {"take", {"limit", false, true, false, 3}}, + {"project", {"project", false, false, false, 3}}, + {"distinct", {"distinct", true, true, false, 3}}, + {"extend", {"extend", true, true, false, 3}}, + {"sort by", {"order by", false, false, false, 4}}, + {"order by", {"order by", false, false, false, 4}}, + {"table", {"table", false, false, false, 3}}, + {"print", {"print", false, true, false, 3}}, + {"summarize", {"summarize", true, true, false, 3}}, + {"make-series", {"make-series", true, true, false, 5}}, + {"mv-expand", {"mv-expand", true, true, false, 5}}, + {"count", {"count", true, true, false, 3}}, + {"top", {"top", false, true, true, 3}}, + {"top-hitters", {"top-hitters", true, true, true, 5}}, + {"lookup", {"lookup", true, true, false, 3}}, + {"join", {"join", true, true, false, 3}}, + {"top-nested", {"top-nested", true, true, true, 5}}, + {"range", {"range", false, true, false, 3}}, + {"project-away", {"project-away", true, true, true, 5}}, + {"getschema", {"getschema", true, true, false, 3}}, + {"project-rename", {"project-rename", true, true, false, 5}}}; + +bool ParserKQLBase::parseByString(const String expr, ASTPtr & node, const uint32_t max_depth) +{ + Expected expected; + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos pos(tokens, max_depth); + return parse(pos, node, expected); +} + +bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth) +{ + Expected expected; + Tokens token_subquery(query.c_str(), query.c_str() + query.size()); + IParser::Pos pos_subquery(token_subquery, max_depth); + if (!parser->parse(pos_subquery, select_node, expected)) + return false; + return true; +}; + +bool ParserKQLBase::setSubQuerySource( + ASTPtr & select_query, + ASTPtr & source, + const bool dest_is_subquery, + const bool src_is_subquery, + const String alias, + const int32_t table_index) +{ + ASTPtr table_expr; + auto apply_alias = [&]() + { + if (!alias.empty()) + { + if (table_expr->as()->table_expression->as()->subquery) + table_expr->as() + ->table_expression->as() + ->subquery->as() + ->alias + = alias; + else if (table_expr->as()->table_expression->as()->database_and_table_name) + { + table_expr + = table_expr->as()->table_expression->as()->database_and_table_name; + if (auto * ast_with_alias = dynamic_cast(table_expr.get())) + ast_with_alias->alias = alias; + } + } + }; + if (!dest_is_subquery) + { + if (!select_query || !select_query->as()->tables() + || select_query->as()->tables()->as()->children.empty()) + return false; + table_expr = select_query->as()->tables()->as()->children[table_index]; + + if (!src_is_subquery) + { + table_expr->as()->table_expression + = source->as()->tables()->children[0]->as()->table_expression; + } + else + { + table_expr->as()->table_expression + = source->children[0]->as()->table_expression; + } + apply_alias(); + return true; + } + + if (!select_query || select_query->as()->children.empty() + || !select_query->as()->children[table_index]->as()->table_expression + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children.empty() + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children.empty() + || select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children[0] + ->as() + ->tables() + ->as() + ->children.empty()) + return false; + + table_expr = select_query->as() + ->children[table_index] + ->as() + ->table_expression->as() + ->subquery->children[0] + ->as() + ->list_of_selects->children[0] + ->as() + ->tables() + ->as() + ->children[0]; + + if (!src_is_subquery) + { + table_expr->as()->table_expression + = source->as()->tables()->children[0]->as()->table_expression; + } + else + { + table_expr->as()->table_expression + = source->children[0]->as()->table_expression; + } + apply_alias(); + + return true; +} + +String ParserKQLBase::getExprFromToken(const String & text, const uint32_t max_depth) { Tokens tokens(text.c_str(), text.c_str() + text.size()); IParser::Pos pos(tokens, max_depth); @@ -26,125 +204,239 @@ String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & m return getExprFromToken(pos); } -String ParserKQLBase :: getExprFromPipe(Pos & pos) +String ParserKQLBase::getExprFromPipe(Pos & pos) { - uint16_t bracket_count = 0; - auto begin = pos; + BracketCount bracket_count; auto end = pos; while (!end->isEnd() && end->type != TokenType::Semicolon) { - if (end->type == TokenType::OpeningRoundBracket) - ++bracket_count; - - if (end->type == TokenType::OpeningRoundBracket) - --bracket_count; - - if (end->type == TokenType::PipeMark && bracket_count == 0) + bracket_count.count(end); + if (end->type == TokenType::PipeMark && bracket_count.isZero()) break; ++end; } - --end; - return String(begin->begin, end->end); + if (end != pos) + --end; + return (pos <= end) ? String(pos->begin, end->end) : ""; } -String ParserKQLBase :: getExprFromToken(Pos & pos) +String ParserKQLBase::getExprFromToken(Pos & pos) { - String res; - std::vector tokens; - String alias; + std::vector comma_pos; + comma_pos.push_back(pos); - while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + size_t paren_count = 0; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) { - String token = String(pos->begin,pos->end); + if (pos->type == TokenType::PipeMark && paren_count == 0) + break; + + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; - if (token == "=") + if (pos->type == TokenType::Comma && paren_count == 0) { ++pos; - if (String(pos->begin,pos->end) != "~") + comma_pos.push_back(pos); + --pos; + } + ++pos; + } + + std::vector columns; + auto set_columns = [&](Pos & start_pos, Pos & end_pos) + { + bool has_alias = false; + auto equal_pos = start_pos; + auto columms_start_pos = start_pos; + auto it_pos = start_pos; + if (String(it_pos->begin, it_pos->end) == "=") + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid equal symbol (=)"); + + BracketCount bracket_count; + while (it_pos < end_pos) + { + bracket_count.count(it_pos); + if (String(it_pos->begin, it_pos->end) == "=") { - alias = tokens.back(); - tokens.pop_back(); + ++it_pos; + if (String(it_pos->begin, it_pos->end) != "~" && bracket_count.isZero()) + { + if (has_alias) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid equal symbol (=)"); + has_alias = true; + } + + --it_pos; + if (equal_pos == start_pos) + equal_pos = it_pos; } - --pos; + ++it_pos; } - else if (!KQLOperators().convert(tokens,pos)) + + if (has_alias) { - tokens.push_back(token); + columms_start_pos = equal_pos; + ++columms_start_pos; } + String column_str; + String function_name; + std::vector tokens; - if (pos->type == TokenType::Comma && !alias.empty()) + while (columms_start_pos < end_pos) { - tokens.pop_back(); - tokens.push_back("AS"); - tokens.push_back(alias); - tokens.push_back(","); - alias.clear(); + if (!KQLOperators::convert(tokens, columms_start_pos)) + { + if (columms_start_pos->type == TokenType::BareWord && function_name.empty()) + function_name = String(columms_start_pos->begin, columms_start_pos->end); + + auto expr = IParserKQLFunction::getExpression(columms_start_pos); + tokens.push_back(expr); + } + ++columms_start_pos; } - ++pos; - } - if (!alias.empty()) + for (const auto & token : tokens) + column_str = column_str.empty() ? token : column_str + " " + token; + + if (has_alias) + { + --equal_pos; + if (start_pos == equal_pos) + { + String new_column_str; + if (start_pos->type != TokenType::BareWord) + throw Exception( + ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias", std::string_view(start_pos->begin, start_pos->end)); + + if (function_name == "array_sort_asc" || function_name == "array_sort_desc") + new_column_str = std::format("{0}[1] AS {1}", column_str, String(start_pos->begin, start_pos->end)); + else + new_column_str = std::format("{0} AS {1}", column_str, String(start_pos->begin, start_pos->end)); + + columns.push_back(new_column_str); + } + else + { + String whole_alias(start_pos->begin, equal_pos->end); + + if (function_name != "array_sort_asc" && function_name != "array_sort_desc") + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias", whole_alias); + + if (start_pos->type != TokenType::OpeningRoundBracket && equal_pos->type != TokenType::ClosingRoundBracket) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} is not a valid alias for {}", whole_alias, function_name); + + String alias_inside; + bool comma_meet = false; + size_t index = 1; + ++start_pos; + while (start_pos < equal_pos) + { + if (start_pos->type == TokenType::Comma) + { + alias_inside.clear(); + if (comma_meet) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} has invalid alias for {}", whole_alias, function_name); + comma_meet = true; + } + else + { + if (!alias_inside.empty() || start_pos->type != TokenType::BareWord) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} has invalid alias for {}", whole_alias, function_name); + + alias_inside = String(start_pos->begin, start_pos->end); + auto new_column_str = std::format("{0}[{1}] AS {2}", column_str, index, alias_inside); + columns.push_back(new_column_str); + comma_meet = false; + ++index; + } + ++start_pos; + } + } + } + else + columns.push_back(column_str); + }; + + size_t cloumn_size = comma_pos.size(); + for (size_t i = 0; i < cloumn_size; ++i) { - tokens.push_back("AS"); - tokens.push_back(alias); + if (i == cloumn_size - 1) + set_columns(comma_pos[i], pos); + else + { + auto end_pos = comma_pos[i + 1]; + --end_pos; + set_columns(comma_pos[i], end_pos); + } } - for (auto const &token : tokens) - res = res.empty()? token : res +" " + token; + String res; + for (const auto & token : columns) + res = res.empty() ? token : res + "," + token; return res; } -std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +std::unique_ptr ParserKQLQuery::getOperator(const std::string_view op_name) { if (op_name == "filter" || op_name == "where") return std::make_unique(); else if (op_name == "limit" || op_name == "take") return std::make_unique(); else if (op_name == "project") - return std::make_unique(); + return std::make_unique(kql_context); + else if (op_name == "distinct") + return std::make_unique(); + else if (op_name == "getschema") + return std::make_unique(); + else if (op_name == "extend") + return std::make_unique(kql_context); else if (op_name == "sort by" || op_name == "order by") return std::make_unique(); else if (op_name == "summarize") return std::make_unique(); else if (op_name == "table") return std::make_unique(); - else - return nullptr; + else if (op_name == "make-series") + return std::make_unique(); + else if (op_name == "mv-expand") + return std::make_unique(); + else if (op_name == "print") + return std::make_unique(); + else if (op_name == "count") + return std::make_unique(); + else if (op_name == "top") + return std::make_unique(); + else if (op_name == "top-hitters") + return std::make_unique(); + else if (op_name == "lookup") + return std::make_unique(); + else if (op_name == "join") + return std::make_unique(); + else if (op_name == "top-nested") + return std::make_unique(); + else if (op_name == "range") + return std::make_unique(); + else if (op_name == "project-away") + return std::make_unique(); + else if (op_name == "project-rename") + return std::make_unique(kql_context); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "No such KQL operator exists: {}", op_name); } -bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLQuery::getOperations(Pos & pos, Expected & expected, OperationsPos & operation_pos) { - struct KQLOperatorDataFlowState - { - String operator_name; - bool need_input; - bool gen_output; - int8_t backspace_steps; // how many steps to last token of previous pipe - }; - - auto select_query = std::make_shared(); - node = select_query; - ASTPtr tables; - - std::unordered_map kql_parser = - { - { "filter", {"filter", false, false, 3}}, - { "where", {"filter", false, false, 3}}, - { "limit", {"limit", false, true, 3}}, - { "take", {"limit", false, true, 3}}, - { "project", {"project", false, false, 3}}, - { "sort by", {"order by", false, false, 4}}, - { "order by", {"order by", false, false, 4}}, - { "table", {"table", false, false, 3}}, - { "summarize", {"summarize", true, true, 3}} - }; - - std::vector> operation_pos; - - String table_name(pos->begin, pos->end); + if (pos->isEnd()) + return false; + if (String table_name(pos->begin, pos->end); table_name == "print" || table_name == "range") + operation_pos.emplace_back(table_name, pos); + else + operation_pos.emplace_back("table", pos); - operation_pos.push_back(std::make_pair("table", pos)); ++pos; uint16_t bracket_count = 0; @@ -152,54 +444,173 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type == TokenType::OpeningRoundBracket) ++bracket_count; - if (pos->type == TokenType::OpeningRoundBracket) + if (pos->type == TokenType::ClosingRoundBracket) --bracket_count; if (pos->type == TokenType::PipeMark && bracket_count == 0) { ++pos; + if (pos->isEnd()) + return false; String kql_operator(pos->begin, pos->end); - if (kql_operator == "order" || kql_operator == "sort") + + auto validate_kql_operator = [&] { - ++pos; - ParserKeyword s_by("by"); - if (s_by.ignore(pos,expected)) + if (kql_operator == "order" || kql_operator == "sort") { - kql_operator = "order by"; - --pos; + ++pos; + if (ParserKeyword("by").ignore(pos, expected)) + { + kql_operator = "order by"; + --pos; + } } - } - if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + else + { + auto op_pos_begin = pos; + ++pos; + if (ParserToken(TokenType::Minus).ignore(pos, expected)) + kql_operator = String(op_pos_begin->begin, pos->end); + else + --pos; + } + if (!kql_parser.contains(kql_operator)) + return false; + return true; + }; + + if (!validate_kql_operator()) return false; ++pos; - operation_pos.push_back(std::make_pair(kql_operator, pos)); + + if ((kql_operator == "print" || kql_operator == "range") && !operation_pos.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "{} must be the first operator in the query", kql_operator); + + operation_pos.emplace_back(kql_operator, pos); } else ++pos; } + return true; +} - auto kql_operator_str = operation_pos.back().first; - auto npos = operation_pos.back().second; - if (!npos.isValid()) +bool ParserKQLQuery::pre_process(String & source, Pos & pos) +{ + bool need_preprocess = false; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::HereDoc) + need_preprocess = true; + + ++pos; + } + + auto end = pos; + if (end != begin) + --end; + source = String(begin->begin, end->end); + + auto replace = [&](std::string & str, const std::string & from, const std::string & to) + { + size_t start_pos = str.find(from); + if (start_pos != std::string::npos) + { + str.replace(start_pos, from.length(), to); + return true; + } return false; + }; + + if (need_preprocess) + { + bool done = true; + while (done) + done = replace(source, "$left", "left_"); + done = true; + while (done) + done = replace(source, "$right", "right_"); + } + + return need_preprocess; +} + +bool ParserKQLQuery::parseImpl(Pos & original_pos, ASTPtr & node, Expected & expected) +{ + auto pos = original_pos; + bool pre_processed = false; + String pre_processed_query; + + pre_processed = pre_process(pre_processed_query, original_pos); + if (pre_processed) + { + Tokens tokens(pre_processed_query.data(), pre_processed_query.data() + pre_processed_query.size(), original_pos.max_depth); + IParser::Pos n_pos(tokens, original_pos.max_depth); + return executeImpl(n_pos, node, expected); + } + return executeImpl(pos, node, expected); +} + +bool ParserKQLQuery::executeImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + node = std::make_shared(); + + OperationsPos operation_pos; + if (!getOperations(pos, expected, operation_pos)) + return false; + + auto kql_operator_str = operation_pos.back().first; auto kql_operator_p = getOperator(kql_operator_str); + String updated_query; + kql_operator_p->updatePipeLine(operation_pos, updated_query); + + Tokens token_query(updated_query.c_str(), updated_query.c_str() + updated_query.size()); + IParser::Pos pos_query(token_query, pos.max_depth); + if (!updated_query.empty()) + { + operation_pos.clear(); + if (!ParserKQLQuery::getOperations(pos_query, expected, operation_pos)) + return false; + } + + kql_operator_str = operation_pos.back().first; + kql_operator_p = getOperator(kql_operator_str); if (!kql_operator_p) return false; + auto npos = operation_pos.back().second; + if (operation_pos.size() == 1) { - if (!kql_operator_p->parse(npos, node, expected)) - return false; + if (kql_operator_str == "print") + { + ++npos; + if (!ParserKQLPrint().parse(npos, node, expected)) + return false; + } + else if (kql_operator_str == "range") + { + ++npos; + if (!ParserKQLRange().parse(npos, node, expected)) + return false; + } + else if (kql_operator_str == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } } else if (operation_pos.size() == 2 && operation_pos.front().first == "table") { - if (!kql_operator_p->parse(npos, node, expected)) - return false; npos = operation_pos.front().second; if (!ParserKQLTable().parse(npos, node, expected)) return false; + + npos = operation_pos.back().second; + if (!kql_operator_p->parse(npos, node, expected)) + return false; } else { @@ -207,7 +618,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto last_pos = operation_pos.back().second; auto last_op = operation_pos.back().first; - auto set_main_query_clause =[&](String & op, Pos & op_pos) + auto set_main_query_clause = [&](const std::string_view op, Pos & op_pos) { auto op_str = ParserKQLBase::getExprFromPipe(op_pos); if (op == "project") @@ -224,22 +635,16 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) operation_pos.pop_back(); - if (kql_parser[last_op].need_input) - { - if (!kql_operator_p->parse(npos, node, expected)) - return false; - } - else + if (!kql_parser.at(last_op).input_as_subquery) { while (!operation_pos.empty()) { auto prev_op = operation_pos.back().first; auto prev_pos = operation_pos.back().second; - if (kql_parser[prev_op].gen_output) - break; - if (!project_clause.empty() && prev_op == "project") + if (kql_parser.at(prev_op).output_as_subquery || (!project_clause.empty() && prev_op == "project")) break; + set_main_query_clause(prev_op, prev_pos); operation_pos.pop_back(); last_op = prev_op; @@ -249,16 +654,17 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!operation_pos.empty()) { - for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + for (auto i = 0; i < kql_parser.at(last_op).backspace_steps; ++i) --last_pos; String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); IParser::Pos pos_subquery(token_subquery, pos.max_depth); - if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) + ASTPtr tables; + if (!ParserKQLSubquery(kql_context).parse(pos_subquery, tables, expected)) return false; - select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); } else { @@ -266,58 +672,48 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto set_query_clasue =[&](String op_str, String op_calsue) + if (!kql_operator_p->parse(npos, node, expected)) + return false; + + auto set_query_clause = [&](const String & op_str, const String & op_clause) { - auto oprator = getOperator(op_str); - if (oprator) - { - Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); - IParser::Pos pos_clause(token_clause, pos.max_depth); - if (!oprator->parse(pos_clause, node, expected)) - return false; - } - return true; + auto parser = getOperator(op_str); + Tokens token_clause(op_clause.c_str(), op_clause.c_str() + op_clause.size()); + IParser::Pos pos_clause(token_clause, pos.max_depth); + return parser->parse(pos_clause, node, expected); }; - if (!select_query->select()) + if (!node->as()->select()) { if (project_clause.empty()) project_clause = "*"; - if (!set_query_clasue("project", project_clause)) + if (!set_query_clause("project", project_clause)) return false; } - if (!order_clause.empty()) - if (!set_query_clasue("order by", order_clause)) - return false; - - if (!where_clause.empty()) - if (!set_query_clasue("where", where_clause)) - return false; - - if (!limit_clause.empty()) - if (!set_query_clasue("limit", limit_clause)) - return false; - return true; + if ((!order_clause.empty() && !set_query_clause("order by", order_clause)) + || (!where_clause.empty() && !set_query_clause("where", where_clause)) + || (!limit_clause.empty() && !set_query_clause("limit", limit_clause))) + return false; } - if (!select_query->select()) + if (auto * select_query = node->as(); !select_query->select()) + setSelectAll(*select_query); + else { - auto expr = String("*"); - Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); - if (!std::make_unique()->parse(new_pos, node, expected)) - return false; + std::ranges::for_each( + select_query->select()->children | std::views::transform([](const auto & expression) { return expression->tryGetAlias(); }), + std::bind_front(&KQLContext::checkForDefaultColumnName, std::ref(kql_context))); } - return true; + return true; } bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr select_node; - if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) + if (!ParserKQLTableFunction(kql_context).parse(pos, select_node, expected)) return false; ASTPtr node_subquery = std::make_shared(); @@ -339,4 +735,46 @@ bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } +bool ParserSimpleCHSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr sub_select_node; + ParserSelectWithUnionQuery select; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (!select.parse(pos, sub_select_node, expected)) + return false; + + if (pos->type != TokenType::ClosingRoundBracket) + return false; + ++pos; + + if (parent_select_node && parent_select_node->as()->tables()) + { + auto select_query = sub_select_node->as()->list_of_selects->children[0]; + select_query->as()->setExpression( + ASTSelectQuery::Expression::TABLES, parent_select_node->as()->tables()); + } + + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(sub_select_node); + + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; + + node_table_expr->children.emplace_back(node_subquery); + + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + + ASTPtr res = std::make_shared(); + + res->children.emplace_back(node_table_in_select_query_emlement); + + node = res; + return true; +} + } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index 42f5f84f0317..4a9767642f83 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -1,32 +1,103 @@ #pragma once -#include +#include "KQLContext.h" + #include +#include namespace DB { +using OperationsPos = std::vector>; + class ParserKQLBase : public IParserBase { public: + ~ParserKQLBase() override = default; + static String getExprFromToken(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t max_depth); static String getExprFromPipe(Pos & pos); - static String getExprFromToken(const String & text, const uint32_t & max_depth); + static bool setSubQuerySource( + ASTPtr & select_query, + ASTPtr & source, + const bool dest_is_subquery, + const bool src_is_subquery, + const String alias = "", + const int32_t table_index = 0); + static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth); + bool parseByString(const String expr, ASTPtr & node, const uint32_t max_depth); + virtual bool updatePipeLine(OperationsPos & /*operations*/, String & /*query*/) { return false; } }; class ParserKQLQuery : public IParserBase { +public: + struct KQLOperatorDataFlowState + { + String operator_name; + bool input_as_subquery; + bool output_as_subquery; + bool need_reinterpret; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + + explicit ParserKQLQuery(KQLContext & kql_context_) : kql_context(kql_context_) { } + + static bool getOperations(Pos & pos, Expected & expected, OperationsPos & operation_pos); protected: - static std::unique_ptr getOperator(String &op_name); + std::unique_ptr getOperator(std::string_view op_name); + static bool pre_process(String & source, Pos & pos); const char * getName() const override { return "KQL query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool executeImpl(Pos & pos, ASTPtr & node, Expected & expected); + +private: + KQLContext & kql_context; }; -class ParserKQLSubquery : public IParserBase +class ParserKQLSubquery : public ParserKQLBase { +public: + explicit ParserKQLSubquery(KQLContext & kql_context_) : kql_context(kql_context_) { } + protected: const char * getName() const override { return "KQL subquery"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + KQLContext & kql_context; }; +class ParserSimpleCHSubquery : public ParserKQLBase +{ +public: + ParserSimpleCHSubquery(ASTPtr parent_select_node_ = nullptr) { parent_select_node = parent_select_node_; } + +protected: + const char * getName() const override { return "Simple ClickHouse subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + ASTPtr parent_select_node; +}; + +class BracketCount +{ +public: + void count(IParser::Pos & pos) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++round_bracket_count; + if (pos->type == TokenType::ClosingRoundBracket) + --round_bracket_count; + if (pos->type == TokenType::OpeningSquareBracket) + ++square_bracket_count; + if (pos->type == TokenType::ClosingSquareBracket) + --square_bracket_count; + } + bool isZero() const { return round_bracket_count == 0 && square_bracket_count == 0; } + +private: + int16_t round_bracket_count = 0; + int16_t square_bracket_count = 0; +}; } diff --git a/src/Parsers/Kusto/ParserKQLRange.cpp b/src/Parsers/Kusto/ParserKQLRange.cpp new file mode 100644 index 000000000000..0a1fd4657b95 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLRange.cpp @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLRange::parseImpl(Pos & pos, ASTPtr & node, Expected & /*expected*/) +{ + ASTPtr select_node; + String column_name, start, stop, step; + auto start_pos = pos; + auto end_pos = pos; + BracketCount bracket_count; + while (!pos->isEnd()) + { + bracket_count.count(pos); + if ((pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) && bracket_count.isZero()) + break; + + if (String(pos->begin, pos->end) == "from" && bracket_count.isZero()) + { + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing column name for range operator"); + + column_name = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + if (String(pos->begin, pos->end) == "to" && bracket_count.isZero()) + { + if (column_name.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing `from` for range operator"); + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing start expression for range operator"); + start = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + if (String(pos->begin, pos->end) == "step" && bracket_count.isZero()) + { + if (column_name.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing `from` for range operator"); + if (start.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing 'to' for range operator"); + + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing stop expression for range operator"); + + stop = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + if (column_name.empty() || start.empty() || stop.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing required expression for range operator"); + + end_pos = pos; + --end_pos; + if (end_pos < start_pos) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing step expression for range operator"); + + step = String(start_pos->begin, end_pos->end); + + column_name = getExprFromToken(column_name, pos.max_depth); + start = getExprFromToken(start, pos.max_depth); + stop = getExprFromToken(stop, pos.max_depth); + step = getExprFromToken(step, pos.max_depth); + String query = std::format("SELECT * FROM (SELECT kql_range({0}, {1},{2}) AS {3}) ARRAY JOIN {3}", start, stop, step, column_name); + + if (!parseSQLQueryByString(std::make_unique(), query, select_node, pos.max_depth)) + return false; + node = std::move(select_node); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLRange.h b/src/Parsers/Kusto/ParserKQLRange.h new file mode 100644 index 000000000000..124e06a4d329 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLRange.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLRange : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL range"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index ef4b84b17c78..3d8df3df562f 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -1,59 +1,113 @@ +#include #include -#include -#include #include +#include +#include #include #include namespace DB { -bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +namespace ErrorCodes { - bool has_dir = false; - std::vector has_directions; + extern const int SYNTAX_ERROR; +} + +bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + String order_list_str; ParserOrderByExpressionList order_list; ASTPtr order_expression_list; - auto expr = getExprFromToken(pos); - - Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + auto validate_column = [&](Pos & pos1, Pos & pos2) + { + if (pos2->type == TokenType::BareWord && pos1 != pos2) + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "{} does not refer to any known column, table, variable or function", + String(pos2->begin, pos2->end)); - auto pos_backup = new_pos; - if (!order_list.parse(pos_backup, order_expression_list, expected)) - return false; + return String(pos1->begin, pos2->end); + }; - while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) + auto format_sort_expr = [&](const Pos & pos1, const Pos & pos2) { - String tmp(new_pos->begin, new_pos->end); - if (tmp == "desc" || tmp == "asc") - has_dir = true; - - if (new_pos->type == TokenType::Comma) + auto start_pos = pos1; + auto end_pos = pos2; + String column_expr, sort_dir, nulls_position; + auto tmp_pos = start_pos; + while (tmp_pos < end_pos) { - has_directions.push_back(has_dir); - has_dir = false; + String tmp(tmp_pos->begin, tmp_pos->end); + if (tmp == "desc" || tmp == "asc") + { + if (!sort_dir.empty() || !nulls_position.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "The incomplete fragment is unexpected"); + --tmp_pos; + column_expr = validate_column(start_pos, tmp_pos); + sort_dir = tmp; + ++tmp_pos; + } + if (tmp == "nulls") + { + if (!nulls_position.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "The incomplete fragment is unexpected"); + auto nulls_pos = tmp_pos; + ++tmp_pos; + tmp = String(tmp_pos->begin, tmp_pos->end); + if (tmp_pos->isEnd() || (tmp != "first" && tmp != "last")) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid nulls position of sort operator"); + + nulls_position = "nulls " + tmp; + if (column_expr.empty()) + { + --nulls_pos; + column_expr = validate_column(start_pos, nulls_pos); + } + } + + ++tmp_pos; } - ++new_pos; - } - has_directions.push_back(has_dir); + --end_pos; + if (column_expr.empty()) + column_expr = validate_column(start_pos, end_pos); + + if (sort_dir.empty()) + sort_dir = "desc"; + if (nulls_position.empty()) + nulls_position = sort_dir == "desc" ? "nulls last" : "nulls first"; + return std::format("{} {} {}", getExprFromToken(column_expr, pos.max_depth), sort_dir, nulls_position); + }; - for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) + auto paren_count = 0; + auto begin = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (!has_directions[i]) + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + if (pos->type == TokenType::Comma && paren_count == 0) { - auto *order_expr = order_expression_list->children[i]->as(); - order_expr->direction = -1; // default desc - if (!order_expr->nulls_direction_was_explicitly_specified) - order_expr->nulls_direction = -1; - else - order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + auto single_sort_expr = format_sort_expr(begin, pos); + order_list_str = order_list_str.empty() ? single_sort_expr : order_list_str + "," + single_sort_expr; + begin = pos; + ++begin; } + ++pos; } - node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + auto single_sort_expr = format_sort_expr(begin, pos); + order_list_str = order_list_str.empty() ? single_sort_expr : order_list_str + "," + single_sort_expr; + Tokens tokens(order_list_str.c_str(), order_list_str.c_str() + order_list_str.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!order_list.parse(new_pos, order_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); return true; } diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 21e480234d39..4c89d678c994 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -1,3 +1,6 @@ +#include "KQLContext.h" +#include "Utilities.h" + #include #include #include @@ -11,7 +14,9 @@ namespace DB bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + KQLContext kql_context; + + ParserKQLWithOutput query_with_output_p(kql_context); ParserSetQuery set_p; bool res = query_with_output_p.parse(pos, node, expected) @@ -22,7 +27,7 @@ bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; + ParserKQLWithUnionQuery kql_p(kql_context); ASTPtr query; bool parsed = kql_p.parse(pos, query, expected); @@ -37,8 +42,7 @@ bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr kql_query; - - if (!ParserKQLQuery().parse(pos, kql_query, expected)) + if (!ParserKQLQuery(kql_context).parse(pos, kql_query, expected)) return false; if (kql_query->as()) @@ -47,44 +51,60 @@ bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return true; } - auto list_node = std::make_shared(); - list_node->children.push_back(kql_query); - - auto select_with_union_query = std::make_shared(); - node = select_with_union_query; - select_with_union_query->list_of_selects = list_node; - select_with_union_query->children.push_back(select_with_union_query->list_of_selects); - + node = wrapInSelectWithUnion(kql_query); return true; } -bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserKQLWithUnionQuery kql_p; + ParserKQLWithUnionQuery kql_p(kql_context); ASTPtr select; ParserToken s_lparen(TokenType::OpeningRoundBracket); auto begin = pos; - auto paren_count = 0 ; + auto paren_count = 0; String kql_statement; if (s_lparen.ignore(pos, expected)) { - ++paren_count; - while (!pos->isEnd()) + if (pos->type == TokenType::HereDoc) { - if (pos->type == TokenType::ClosingRoundBracket) - --paren_count; - if (pos->type == TokenType::OpeningRoundBracket) - ++paren_count; - - if (paren_count == 0) - break; - - kql_statement = kql_statement + " " + String(pos->begin,pos->end); - ++pos; + auto kal_table_str = String(pos->begin, pos->end); + auto heredoc_name_end_position = kal_table_str.find('$', 1); + if (heredoc_name_end_position != std::string::npos) + { + size_t heredoc_size = heredoc_name_end_position + 1; + std::string_view heredoc = {kal_table_str.data(), heredoc_size}; + + size_t heredoc_end_position = kal_table_str.find(heredoc, heredoc_size); + if (heredoc_end_position != std::string::npos) + { + kql_statement = kal_table_str.substr(heredoc_name_end_position + 1, heredoc_end_position - heredoc_name_end_position - 1); + } + } + } + else + { + ++paren_count; + auto pos_start = pos; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + ++pos; + } + if (pos->isEnd() && paren_count != 0) + return false; + + kql_statement = String(pos_start->begin, (--pos)->end); } + ++pos; Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); IParser::Pos pos_kql(token_kql, pos.max_depth); @@ -95,8 +115,7 @@ bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return true; } } - pos = begin; + pos = begin; return false; }; - } diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h index ef44b2d6c8ac..75286c8cd701 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.h +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -1,5 +1,7 @@ #pragma once +#include "KQLContext.h" + #include namespace DB @@ -7,46 +9,48 @@ namespace DB class ParserKQLStatement : public IParserBase { -private: - const char * end; - bool allow_settings_after_format_in_insert; +protected: const char * getName() const override { return "KQL Statement"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -public: - explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) - : end(end_) - , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) - {} }; - class ParserKQLWithOutput : public IParserBase { +public: + explicit ParserKQLWithOutput(KQLContext & kql_context_) : kql_context(kql_context_) { } + protected: - const char * end; - bool allow_settings_after_format_in_insert; const char * getName() const override { return "KQL with output"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -public: - explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) - : end(end_) - , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) - {} + +private: + KQLContext & kql_context; }; class ParserKQLWithUnionQuery : public IParserBase { +public: + explicit ParserKQLWithUnionQuery(KQLContext & kql_context_) : kql_context(kql_context_) { } + protected: const char * getName() const override { return "KQL query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + KQLContext & kql_context; }; -class ParserKQLTaleFunction : public IParserBase +class ParserKQLTableFunction : public IParserBase { +public: + explicit ParserKQLTableFunction(KQLContext & kql_context_) : kql_context(kql_context_) { } + protected: const char * getName() const override { return "KQL() function"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + KQLContext & kql_context; }; } - diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index 75eacb1adbd2..c2a23af55453 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -15,10 +17,20 @@ #include #include +#include +#include +#include +#include + namespace DB { -bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr select_expression_list; ASTPtr group_expression_list; @@ -27,33 +39,169 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte String expr_groupby; String expr_columns; bool groupby = false; + auto column_begin_pos = pos; + + uint16_t bracket_count = 0; + int32_t new_column_index = 1; + + std::vector expr_aggregations; + std::vector expr_groupbys; + + std::unordered_set aggregate_functions( + {"arg_max", + "arg_min", + "avg", + "avgif", + "binary_all_and", + "binary_all_or", + "binary_all_xor", + "buildschema", + "count", + "countif", + "dcount", + "dcountif", + "make_bag", + "make_bag_if", + "make_list", + "make_list_if", + "make_list_with_nulls", + "make_set", + "make_set_if", + "max", + "maxif", + "min", + "minif", + "percentile", + "percentilew", + "percentiles", + "percentiles_array", + "percentilesw", + "percentilesw_array", + "stdev", + "stdevif", + "sum", + "sumif", + "take_any", + "take_anyif", + "variance", + "varianceif", + "variancep"}); + + auto apply_aliais = [&](Pos & begin_pos, Pos & end_pos, bool is_groupby) + { + if (String(begin_pos->begin, begin_pos->end) == "by") + return; + if (end_pos->end <= begin_pos->begin) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near keyword \"{}\"", std::string_view(begin_pos->begin, begin_pos->end)); + auto expr = String(begin_pos->begin, end_pos->end); + auto equal_pos = begin_pos; + ++equal_pos; + if (!is_groupby) + { + if (String(equal_pos->begin, equal_pos->end) != "=") + { + String alias; + String aggregate_fun = String(begin_pos->begin, begin_pos->end); + if (aggregate_functions.find(aggregate_fun) == aggregate_functions.end()) + { + alias = std::format("Columns{}", new_column_index); + ++new_column_index; + } + else + { + alias = std::format("{}_", aggregate_fun); + auto agg_colum_pos = begin_pos; + ++agg_colum_pos; + ++agg_colum_pos; + ++agg_colum_pos; + if (agg_colum_pos->type == TokenType::Comma || agg_colum_pos->type == TokenType::ClosingRoundBracket) + { + --agg_colum_pos; + if (agg_colum_pos->type != TokenType::ClosingRoundBracket) + alias = alias + String(agg_colum_pos->begin, agg_colum_pos->end); + } + } + expr = std::format("{} = {}", alias, expr); + } + expr_aggregations.push_back(expr); + } + else + { + if (String(equal_pos->begin, equal_pos->end) != "=") + { + String groupby_fun = String(begin_pos->begin, begin_pos->end); + if (equal_pos->isEnd() || equal_pos->type == TokenType::Comma || equal_pos->type == TokenType::Semicolon + || equal_pos->type == TokenType::PipeMark) + { + expr = groupby_fun; + } + else + { + String alias; + if (groupby_fun == "bin" || groupby_fun == "bin_at") + { + auto bin_colum_pos = begin_pos; + ++bin_colum_pos; + ++bin_colum_pos; + alias = String(bin_colum_pos->begin, bin_colum_pos->end); + ++bin_colum_pos; + if (bin_colum_pos->type != TokenType::Comma) + alias.clear(); + } + if (alias.empty()) + { + alias = std::format("Columns{}", new_column_index); + ++new_column_index; + } - auto begin = pos; - auto pos_groupby = pos; + expr = std::format("{} = {}", alias, expr); + } + } + expr_groupbys.push_back(expr); + } + }; while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) { - if (String(pos->begin, pos->end) == "by") + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (pos->type == TokenType::ClosingRoundBracket) + --bracket_count; + + if ((bracket_count == 0 and pos->type == TokenType::Comma) || String(pos->begin, pos->end) == "by") { - groupby = true; - auto end = pos; - --end; - expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; - pos_groupby = pos; - ++pos_groupby; + auto end_pos = pos; + --end_pos; + apply_aliais(column_begin_pos, end_pos, groupby); + if (String(pos->begin, pos->end) == "by") + groupby = true; + column_begin_pos = pos; + ++column_begin_pos; } ++pos; } --pos; - if (groupby) - expr_groupby = String(pos_groupby->begin, pos->end); - else - expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + apply_aliais(column_begin_pos, pos, groupby); + + for (auto const & expr : expr_aggregations) + expr_aggregation = expr_aggregation.empty() ? expr : expr_aggregation + "," + expr; - auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; - expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + for (auto const & expr : expr_groupbys) + expr_groupby = expr_groupby.empty() ? expr : expr_groupby + "," + expr; - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + if (!expr_groupby.empty()) + expr_columns = expr_groupby; + + if (!expr_aggregation.empty()) + { + if (expr_columns.empty()) + expr_columns = expr_aggregation; + else + expr_columns = expr_columns + "," + expr_aggregation; + } + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); @@ -65,7 +213,7 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); @@ -77,5 +225,4 @@ bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return true; } - } diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h index 1aad02705dfd..e95043c15232 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.h +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -8,7 +8,6 @@ namespace DB class ParserKQLSummarize : public ParserKQLBase { - protected: const char * getName() const override { return "KQL summarize"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp index 6356ad688b67..b4a627efa155 100644 --- a/src/Parsers/Kusto/ParserKQLTable.cpp +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -3,45 +3,27 @@ #include #include #include + #include + +namespace +{ +const std::unordered_set sql_keywords{"SELECT", "INSERT", "CREATE", "ALTER", "SYSTEM", "SHOW", "GRANT", "REVOKE", + "ATTACH", "CHECK", "DESCRIBE", "DESC", "DETACH", "DROP", "EXISTS", "KILL", + "OPTIMIZE", "RENAME", "SET", "TRUNCATE", "USE", "EXPLAIN"}; +} + namespace DB { bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - std::unordered_set sql_keywords - ({ - "SELECT", - "INSERT", - "CREATE", - "ALTER", - "SYSTEM", - "SHOW", - "GRANT", - "REVOKE", - "ATTACH", - "CHECK", - "DESCRIBE", - "DESC", - "DETACH", - "DROP", - "EXISTS", - "KILL", - "OPTIMIZE", - "RENAME", - "SET", - "TRUNCATE", - "USE", - "EXPLAIN" - }); - ASTPtr tables; String table_name(pos->begin,pos->end); String table_name_upcase(table_name); - std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); - - if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + std::ranges::transform(table_name_upcase, table_name_upcase.begin(), toupper); + if (sql_keywords.contains(table_name_upcase)) return false; if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h index c67dcb151562..8e03b50cb8bb 100644 --- a/src/Parsers/Kusto/ParserKQLTable.h +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -8,7 +8,6 @@ namespace DB class ParserKQLTable : public ParserKQLBase { - protected: const char * getName() const override { return "KQL Table"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLTimespan.cpp b/src/Parsers/Kusto/ParserKQLTimespan.cpp new file mode 100644 index 000000000000..745e5dd70c58 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTimespan.cpp @@ -0,0 +1,257 @@ +#include "ParserKQLTimespan.h" +#include "Utilities.h" + +#include + +#include +#include + +namespace x3 = boost::spirit::x3; + +namespace +{ +enum class KQLTimespanUnit +{ + Day, + Hour, + Minute, + Second, + Millisecond, + Microsecond, + Nanosecond, + Tick +}; + +template +concept arithmetic = std::is_arithmetic_v; + +Int64 kqlTimespanToTicks(const arithmetic auto value, const KQLTimespanUnit unit) +{ + static constexpr Int64 TICKS_PER_MICROSECOND = 10; + static constexpr auto TICKS_PER_MILLISECOND = TICKS_PER_MICROSECOND * 1000; + static constexpr auto TICKS_PER_SECOND = TICKS_PER_MILLISECOND * 1000; + static constexpr auto TICKS_PER_MINUTE = TICKS_PER_SECOND * 60; + static constexpr auto TICKS_PER_HOUR = TICKS_PER_MINUTE * 60; + static constexpr auto TICKS_PER_DAY = TICKS_PER_HOUR * 24; + + switch (unit) + { + case KQLTimespanUnit::Day: + return static_cast(value * TICKS_PER_DAY); + case KQLTimespanUnit::Hour: + return static_cast(value * TICKS_PER_HOUR); + case KQLTimespanUnit::Minute: + return static_cast(value * TICKS_PER_MINUTE); + case KQLTimespanUnit::Second: + return static_cast(value * TICKS_PER_SECOND); + case KQLTimespanUnit::Millisecond: + return static_cast(value * TICKS_PER_MILLISECOND); + case KQLTimespanUnit::Microsecond: + return static_cast(value * TICKS_PER_MICROSECOND); + case KQLTimespanUnit::Tick: + return static_cast(value); + case KQLTimespanUnit::Nanosecond: + return static_cast(value / 100); + } +} + +struct TimespanUnits : public x3::symbols +{ + TimespanUnits() + { + // clang-format off + add + ("d", KQLTimespanUnit::Day) + ("day", KQLTimespanUnit::Day) + ("days", KQLTimespanUnit::Day) + ("h", KQLTimespanUnit::Hour) + ("hr", KQLTimespanUnit::Hour) + ("hrs", KQLTimespanUnit::Hour) + ("hour", KQLTimespanUnit::Hour) + ("hours", KQLTimespanUnit::Hour) + ("m", KQLTimespanUnit::Minute) + ("min", KQLTimespanUnit::Minute) + ("minute", KQLTimespanUnit::Minute) + ("minutes", KQLTimespanUnit::Minute) + ("s", KQLTimespanUnit::Second) + ("sec", KQLTimespanUnit::Second) + ("second", KQLTimespanUnit::Second) + ("seconds", KQLTimespanUnit::Second) + ("ms", KQLTimespanUnit::Millisecond) + ("milli", KQLTimespanUnit::Millisecond) + ("millis", KQLTimespanUnit::Millisecond) + ("millisec", KQLTimespanUnit::Millisecond) + ("millisecond", KQLTimespanUnit::Millisecond) + ("milliseconds", KQLTimespanUnit::Millisecond) + ("micro", KQLTimespanUnit::Microsecond) + ("micros", KQLTimespanUnit::Microsecond) + ("microsec", KQLTimespanUnit::Microsecond) + ("microsecond", KQLTimespanUnit::Microsecond) + ("microseconds", KQLTimespanUnit::Microsecond) + ("nano", KQLTimespanUnit::Nanosecond) + ("nanos", KQLTimespanUnit::Nanosecond) + ("nanosec", KQLTimespanUnit::Nanosecond) + ("nanosecond", KQLTimespanUnit::Nanosecond) + ("nanoseconds", KQLTimespanUnit::Nanosecond) + ("tick", KQLTimespanUnit::Tick) + ("ticks", KQLTimespanUnit::Tick) + ; + // clang-format on + } +}; + +const TimespanUnits timespan_units; + +struct KQLTimespanComponents +{ + static constexpr auto MAX_SECONDS_FRACTIONAL = 10'000'000U; + + bool isValid() const { return hours < 24 && minutes < 60 && seconds < 60 && seconds_fractional < MAX_SECONDS_FRACTIONAL; } + std::optional toTicks() const + { + if (!isValid()) + return {}; + + const auto sign = is_negative ? -1 : 1; + auto seconds_fractional_in_ticks = seconds_fractional; + while (seconds_fractional_in_ticks > 0 && seconds_fractional_in_ticks < (MAX_SECONDS_FRACTIONAL / 10)) + seconds_fractional_in_ticks *= 10; + + const auto ticks = kqlTimespanToTicks(days, KQLTimespanUnit::Day) + kqlTimespanToTicks(hours, KQLTimespanUnit::Hour) + + kqlTimespanToTicks(minutes, KQLTimespanUnit::Minute) + kqlTimespanToTicks(seconds, KQLTimespanUnit::Second) + + kqlTimespanToTicks(seconds_fractional_in_ticks, KQLTimespanUnit::Tick); + return sign * ticks; + } + + bool is_negative = false; + unsigned days = 0; + unsigned hours = 0; + unsigned minutes = 0; + unsigned seconds = 0; + unsigned seconds_fractional = 0; +}; + +struct KQLTimespanNull +{ +}; + +using KQLTimespanValueWithUnit = std::pair; + +using x3::_attr; +using x3::_val; +using x3::double_; +using x3::int_; +using x3::lexeme; +using x3::lit; +using x3::omit; +using x3::uint_; + +const auto SET_DAYS = [](auto & ctx) { _val(ctx).days = _attr(ctx); }; +const auto SET_HOURS_AND_MINUTES = [](auto & ctx) +{ + auto & kql_timespan_components = _val(ctx); + const auto & attributes = _attr(ctx); + kql_timespan_components.hours = at_c<0>(attributes); + kql_timespan_components.minutes = at_c<1>(attributes); +}; + +const auto SET_NEGATIVE = [](auto & ctx) { _val(ctx).is_negative = true; }; +const auto SET_SECONDS = [](auto & ctx) { _val(ctx).seconds = _attr(ctx); }; +const auto SET_SECONDS_FRACTIONAL = [](auto & ctx) { _val(ctx).seconds_fractional = _attr(ctx); }; + +const x3::rule KQL_TIMESPAN_SEPARATED_COMPONENTS = "KQL timespan separated components"; +// clang-format off +const auto KQL_TIMESPAN_SEPARATED_COMPONENTS_def = + lexeme + [ + -(lit('-')[SET_NEGATIVE] | lit('+')) + >> -(uint_ >> lit('.'))[SET_DAYS] + >> (uint_ >> lit(':') >> uint_)[SET_HOURS_AND_MINUTES] + >> -(lit(':') >> uint_[SET_SECONDS] >> -(lit('.') >> uint_[SET_SECONDS_FRACTIONAL])) + ]; +// clang-format on + +const auto SET_VALUE_AND_UNIT = [](auto & ctx) +{ + const auto & value_and_unit = _attr(ctx); + _val(ctx) = {at_c<0>(value_and_unit), at_c<1>(value_and_unit)}; +}; + +const x3::rule KQL_TIMESPAN_VALUE_WITH_UNIT = "KQL timespan value with unit"; +const auto KQL_TIMESPAN_VALUE_WITH_UNIT_def = (double_ >> lexeme[timespan_units])[SET_VALUE_AND_UNIT]; + +const x3::rule KQL_TIMESPAN_DAY_VALUE = "KQL timespan day value"; +const auto KQL_TIMESPAN_DAY_VALUE_def = int_; + +const x3::rule KQL_TIMESPAN_NULL = "KQL timespan null"; +const auto KQL_TIMESPAN_NULL_def = omit[lit("null")]; + +const x3::rule> + KQL_TIMESPAN = "KQL timespan"; +const auto KQL_TIMESPAN_def = KQL_TIMESPAN_SEPARATED_COMPONENTS | KQL_TIMESPAN_VALUE_WITH_UNIT | KQL_TIMESPAN_DAY_VALUE | KQL_TIMESPAN_NULL; + +BOOST_SPIRIT_DEFINE( + KQL_TIMESPAN_SEPARATED_COMPONENTS, KQL_TIMESPAN_VALUE_WITH_UNIT, KQL_TIMESPAN_DAY_VALUE, KQL_TIMESPAN_NULL, KQL_TIMESPAN); +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +std::string kqlTicksToInterval(const std::optional ticks) +{ + return std::format("toIntervalNanosecond({})", ticks ? std::to_string(*ticks * 100) : "null"); +} + +std::optional ParserKQLTimespan::parse(const std::string_view expression) +{ + const auto throw_exception + = [&expression] { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a correct timespan expression: {}", expression); }; + + const auto * first = expression.cbegin(); + const auto * last = expression.cend(); + + boost::variant kql_timespan_variant; + const auto success = x3::phrase_parse(first, last, KQL_TIMESPAN, x3::space, kql_timespan_variant); + + if (!success || first != last) + throw_exception(); + + return boost::apply_visitor( + [&throw_exception](const auto & kql_timespan) -> std::optional + { + using Type = std::decay_t; + if constexpr (std::is_same_v) + { + const auto ticks = kql_timespan.toTicks(); + if (!ticks) + throw_exception(); + + return *ticks; + } + else if constexpr (std::is_same_v) + return kqlTimespanToTicks(kql_timespan.first, kql_timespan.second); + else if constexpr (std::is_same_v) + return kqlTimespanToTicks(kql_timespan, KQLTimespanUnit::Day); + else if constexpr (std::is_same_v) + return std::nullopt; + }, + kql_timespan_variant); +} + +bool ParserKQLTimespan::tryParse(const std::string_view expression, std::optional & ticks) +{ + try + { + ticks = parse(expression); + return true; + } + catch (...) + { + return false; + } +} +} diff --git a/src/Parsers/Kusto/ParserKQLTimespan.h b/src/Parsers/Kusto/ParserKQLTimespan.h new file mode 100644 index 000000000000..f98de09fa980 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTimespan.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ +std::string kqlTicksToInterval(std::optional ticks); + +class ParserKQLTimespan +{ +public: + static std::optional parse(std::string_view expression); + static bool tryParse(std::string_view expression, std::optional & ticks); +}; +} diff --git a/src/Parsers/Kusto/ParserKQLTop.cpp b/src/Parsers/Kusto/ParserKQLTop.cpp new file mode 100644 index 000000000000..df6bac0e7da8 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTop.cpp @@ -0,0 +1,58 @@ +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLTop::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) +{ + return true; +} + +bool ParserKQLTop::updatePipeLine (OperationsPos & operations, String & query) +{ + Pos pos = operations.back().second; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near top operator"); + + Pos start_pos = operations.front().second; + Pos end_pos = pos; + --end_pos; + --end_pos; + + String prev_query(start_pos->begin, end_pos->end); + + String limit_expr, sort_expr; + start_pos = pos; + end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + { + auto limt_end_pos = pos; + --limt_end_pos; + limit_expr = String(start_pos->begin, limt_end_pos->end); + start_pos = pos; + ++start_pos; + } + end_pos = pos; + ++pos; + } + sort_expr = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + if (limit_expr.empty() || sort_expr.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "top operator need a by clause"); + + query = std::format("{} sort by {} | take {}", prev_query, sort_expr, limit_expr); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTop.h b/src/Parsers/Kusto/ParserKQLTop.h new file mode 100644 index 000000000000..8672204f4020 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTop.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTop : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL top"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (OperationsPos & operations, String & query) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTopHitter.cpp b/src/Parsers/Kusto/ParserKQLTopHitter.cpp new file mode 100644 index 000000000000..63eb55e3ba83 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopHitter.cpp @@ -0,0 +1,75 @@ +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +bool ParserKQLTopHitters::parseImpl(Pos & /*pos*/, ASTPtr & /*node*/, Expected & /*expected*/) +{ + return true; +} + +bool ParserKQLTopHitters::updatePipeLine (OperationsPos & operations, String & query) +{ + Pos pos = operations.back().second; + + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near top-hitters operator"); + + Pos start_pos = operations.front().second; + Pos end_pos = pos; + for (auto i = 0; i < 4; ++i) + --end_pos; + + String prev_query(start_pos->begin, end_pos->end); + + String number_of_values, value_expression, summing_expression; + start_pos = pos; + end_pos = pos; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "of") + { + auto number_end_pos = pos; + --number_end_pos; + number_of_values = String(start_pos->begin, number_end_pos->end); + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "by") + { + auto expr_end_pos = pos; + --expr_end_pos; + value_expression = String(start_pos->begin, expr_end_pos->end); + start_pos = pos; + ++start_pos; + } + end_pos = pos; + ++pos; + } + + if (value_expression.empty()) + value_expression = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + else + summing_expression = (start_pos <= end_pos) ? String(start_pos->begin, end_pos->end) : ""; + + if (number_of_values.empty() || value_expression.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "top-hitter operator need a ValueExpression"); + + if (summing_expression.empty()) + query = std::format("{0} summarize approximate_count_{1} = count() by {1} | sort by approximate_count_{1} desc | take {2} ", prev_query, value_expression, number_of_values); + else + query = std::format("{0} summarize approximate_sum_{1} = sum({1}) by {2} | sort by approximate_sum_{1} desc | take {3}", prev_query, summing_expression, value_expression, number_of_values); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTopHitter.h b/src/Parsers/Kusto/ParserKQLTopHitter.h new file mode 100644 index 000000000000..2fa6a9b6203d --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopHitter.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTopHitters : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL top-hitters"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool updatePipeLine (OperationsPos & operations, String & query) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTopNested.cpp b/src/Parsers/Kusto/ParserKQLTopNested.cpp new file mode 100644 index 000000000000..5854910b1edf --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopNested.cpp @@ -0,0 +1,431 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_DIRECTION_OF_SORTING; + extern const int SYNTAX_ERROR; +} + +String ParserKQLTopNested ::calculateSingleTopNestedWithOthers( + const TopNestedClauses & top_nested_clauses, size_t layer, bool has_others, const uint32_t max_depth) +{ + const String source_table = "source_table AS (SELECT * FROM StormEvents) "; + const String & topn = getExprFromToken(top_nested_clauses[layer].topn, max_depth); + const String & expr_alias = top_nested_clauses[layer].expr_alias; + const String & expr = getExprFromToken(top_nested_clauses[layer].expr, max_depth); + const String & agg_alias = top_nested_clauses[layer].agg_alias; + const String & agg_expr = getExprFromToken(top_nested_clauses[layer].agg_expr, max_depth); + const String & order_expr = top_nested_clauses[layer].order; + + String topn_expr = topn.empty() ? "" : std::format("LIMIT {} ", topn); + String column_expr_with_aliais = expr + " AS " + expr_alias; + String agg_expr_with_aliais = std::format("{} AS {} ", agg_expr, agg_alias); + String agg_expr_value_with_aliais = std::format("{} AS {}_value ", agg_expr, agg_alias); + String query; + if (layer == 0) + { + query = std::format( + "WITH {0},tb0_normal AS (SELECT {1}, {2} FROM source_table GROUP BY {3} ORDER BY {4} {5} {6})", + source_table, + column_expr_with_aliais, + agg_expr_with_aliais, + expr_alias, + agg_alias, + order_expr, + topn_expr); + if (has_others) + query = query + + std::format( + ",tb0_others AS (SELECT {0} FROM source_table WHERE {1} NOT IN (SELECT {1} FROM tb0_normal))", + agg_expr_value_with_aliais, + expr_alias); + } + else + { + const String tb0_normal_name = std::format("tb{}_normal", layer - 1); + const String row_alias0_name = std::format("row{}", layer - 1); + + const String tb1_prev_name = std::format("tb{}_prev", layer); + const String tb1_partition_name = std::format("tb{}_partition", layer); + const String tb1_normal_name = std::format("tb{}_normal", layer); + const String tb1_others_prev_name = std::format("tb{}_others_prev", layer); + const String tb1_others_name = std::format("tb{}_others", layer); + const String row_alias1_name = std::format("row{}", layer); + + String column_list, select_list, join_list, group_list, prev_group_list; + for (size_t i = 0; i < layer; ++i) + { + const String select_tmp = std::format("{0}, {1}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + select_list = select_list.empty() ? select_tmp : select_list + ", " + select_tmp; + join_list = join_list.empty() ? top_nested_clauses[i].expr : join_list + ", " + top_nested_clauses[i].expr; + column_list = column_list.empty() ? top_nested_clauses[i].expr_alias : column_list + ", " + top_nested_clauses[i].expr_alias; + + prev_group_list = select_list; + } + group_list = select_list + ", " + expr_alias; + auto tb1_prev_select_list = select_list + ", " + column_expr_with_aliais + ", " + agg_expr_with_aliais; + auto tb1_partition_select_list = select_list + ", " + expr_alias + ", " + agg_alias; + auto tb1_others_select_list = select_list + ", " + expr_alias + ", " + agg_alias; + + + const String tb1_prev_query = std::format( + "{0} AS (SELECT {1} FROM {2} INNER JOIN source_table AS join1 USING ({3}) GROUP BY {4})", + tb1_prev_name, + tb1_prev_select_list, + tb0_normal_name, + join_list, + group_list); + + const String tb1_partition_query = std::format( + "{0} AS (SELECT {1}, ROW_NUMBER () over (PARTITION by {2} order by {3} {4}) AS {5} FROM {6})", + tb1_partition_name, + tb1_partition_select_list, + column_list, + agg_alias, + order_expr, + row_alias1_name, + tb1_prev_name); + + const String where_clause = (topn.empty() || layer < 1) ? "" : std::format("WHERE {} <= {}", row_alias1_name, topn); + const String tb1_normal_query + = std::format("{0} AS (SELECT {1} FROM {2} {3})", tb1_normal_name, tb1_partition_select_list, tb1_partition_name, where_clause); + + query = tb1_prev_query + "," + tb1_partition_query + "," + tb1_normal_query; + if (has_others) + { + auto tb1_others_prev_select_list = column_list + ", " + agg_expr_value_with_aliais; + auto tb1_others_prev_join_clause + = std::format("LEFT JOIN {0} USING ({1})", tb1_normal_name, column_list + ", " + expr_alias); + auto tb1_others_prev_join_where_clasue = std::format(" empty({}.{}) ", tb1_normal_name, expr); + for (size_t i = 0; i < layer; ++i) + tb1_others_prev_join_where_clasue + += std::format("AND source_table.{0} IN (SELECT {0} FROM {1}) ", top_nested_clauses[i].expr, tb1_normal_name); + + const String tb1_others_prev_query = std::format( + "{0} AS (SELECT {1} FROM source_table {2} WHERE {3} GROUP BY {4})", + tb1_others_prev_name, + tb1_others_prev_select_list, + tb1_others_prev_join_clause, + tb1_others_prev_join_where_clasue, + column_list); + + const String tb1_others_query = std::format( + "{0} AS (SELECT DISTINCT {1}, {2}_value FROM {3} RIGHT JOIN {4} USING ({5}))", + tb1_others_name, + select_list, + agg_alias, + tb1_others_prev_name, + tb1_normal_name, + column_list); + + query = query + "," + tb1_others_prev_query + "," + tb1_others_query; + } + } + return query; +} + +String ParserKQLTopNested ::calculateTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, const uint32_t max_depth) +{ + String query, last_select_list, last_others_list; + auto size = top_nested_clauses.size(); + bool has_others = false; + for (size_t i = 0; i < size; ++i) + { + if (!top_nested_clauses[i].others.empty()) + { + has_others = true; + break; + } + } + + for (size_t i = 0; i < size; ++i) + { + const String single_query = calculateSingleTopNestedWithOthers(top_nested_clauses, i, has_others, max_depth); + const String others_expr + = top_nested_clauses[i].others.empty() ? "NULL" : getExprFromToken(top_nested_clauses[i].others, max_depth); + const String others_agg = top_nested_clauses[i].others.empty() ? "NULL" : std::format("{}_value", top_nested_clauses[i].agg_alias); + if (i == 0) + { + query = single_query; + last_select_list = std::format("{}, {}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + last_others_list = std::format( + "{} AS {}, {} AS {}", others_expr, top_nested_clauses[i].expr_alias, others_agg, top_nested_clauses[i].agg_alias); + } + else + { + query = query + "," + single_query; + last_others_list + = last_select_list + ", " + + std::format( + "{} AS {}, {} AS {}", others_expr, top_nested_clauses[i].expr_alias, others_agg, top_nested_clauses[i].agg_alias); + last_select_list + = last_select_list + ", " + std::format("{}, {}", top_nested_clauses[i].expr_alias, top_nested_clauses[i].agg_alias); + } + } + if (has_others) + for (size_t i = 0; i < size - 1; ++i) + { + auto other_values = top_nested_clauses[i].agg_alias; + String all_others_table = std::format("tb{}_all_others AS (SELECT ", i); + String separator; + String first_list; + for (size_t j = 0; j < i; ++j) + { + if (first_list.empty()) + first_list = std::format("{}, {}", top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + else + first_list += std::format(", {}, {}", top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + } + all_others_table += first_list; + for (size_t j = i; j < size; ++j) + { + separator = (i == 0) ? "" : ","; + if (i == 0) + { + separator = (j == 0) ? "" : ","; + } + else + separator = ","; + if (top_nested_clauses[j].others.empty()) + all_others_table + = all_others_table + + std::format( + "{} NULL AS {} , NULL AS {}", separator, top_nested_clauses[j].expr_alias, top_nested_clauses[j].agg_alias); + else + all_others_table = all_others_table + + std::format("{} {} AS {} , {}_value AS {}", + separator, + getExprFromToken(top_nested_clauses[j].others, max_depth), + top_nested_clauses[j].expr_alias, + other_values, + top_nested_clauses[j].agg_alias); + } + all_others_table += std::format(" FROM tb{}_others )", i); + query = query + "," + all_others_table; + } + + String last_normal_table = std::format("tb{}_normal", size - 1); + if (has_others) + { + String last_others_table = std::format("tb{}_others", size - 1); + query = query + + std::format( + ", last_query AS (SELECT {0} FROM {1} UNION ALL SELECT {2} FROM {3}", + last_select_list, + last_normal_table, + last_others_list, + last_others_table); + if (size > 1) + { + for (size_t i = 0; i < size - 1; ++i) + { + String tb_all_others = std::format("tb{}_all_others", i); + query = query + std::format(" UNION ALL SELECT {} FROM {}", last_select_list, tb_all_others); + } + } + query += ") Select * from last_query"; + } + else + query = query + std::format(" SELECT {0} FROM {1} ", last_select_list, last_normal_table); + + return query; +} + +bool ParserKQLTopNested ::parseSingleTopNestedClause(Pos & begin_pos, Pos & last_pos, TopNestedClause & top_nested_clause, const int layer) +{ + TopNestedClause arg; + auto pos = begin_pos; + for (auto i = 0; i < 3; ++i) + ++pos; + auto start_pos = pos; + auto end_pos = pos; + + auto get_name_value = [&](Pos & begin, Pos & end, String & name, String & value) + { + Pos tmp = begin; + bool has_alias = false; + Pos value_pos = begin; + while (tmp < end) + { + if (String(tmp->begin, tmp->end) == "=") + { + --tmp; + name = String(begin->begin, tmp->end); + ++tmp; + ++tmp; + --end; + value = String(tmp->begin, end->end); + value_pos = tmp; + ++end; + has_alias = true; + break; + } + ++tmp; + } + if (!has_alias) + { + --end; + value = String(begin->begin, end->end); + ++end; + } + return value_pos; + }; + + bool has_by = false, has_of = false; + Pos expr_start_pos = begin_pos; + Pos expr_end_pos = begin_pos; + while (pos < last_pos) + { + if (String(pos->begin, pos->end) == "of") + { + has_of = true; + end_pos = pos; + --end_pos; + if (start_pos <= end_pos) + arg.topn = String(start_pos->begin, end_pos->end); + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "with") + { + end_pos = pos; + expr_start_pos = get_name_value(start_pos, end_pos, arg.expr_alias, arg.expr); + expr_end_pos = end_pos; + start_pos = pos; + ++start_pos; + } + + if (String(pos->begin, pos->end) == "by") + { + has_by = true; + end_pos = pos; + if (arg.expr.empty()) + { + expr_start_pos = get_name_value(start_pos, end_pos, arg.expr_alias, arg.expr); + expr_end_pos = end_pos; + } + else + get_name_value(start_pos, end_pos, arg.others_name, arg.others); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + if (!has_of) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing 'of' keyword for top-nested operator"); + + if (!has_by) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing 'by' keyword for top-nested operator"); + + get_name_value(start_pos, pos, arg.agg_alias, arg.agg_expr); + + if (arg.agg_expr.empty()) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Missing aggregation expression for top-nested operator"); + + if (arg.expr_alias.empty()) + { --expr_end_pos; + if (expr_start_pos == expr_end_pos) + arg.expr_alias = arg.expr; + else + arg.expr_alias = std::format("Column{}", layer + 1); + } + + if (arg.agg_alias.empty()) + arg.agg_alias = std::format("aggregated_{}", arg.expr_alias); + + --last_pos; + + if (last_pos->type != TokenType::BareWord) + { + if (last_pos->type != TokenType::Number && last_pos->type != TokenType::ClosingRoundBracket) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect aggregation expression: {}", arg.expr); + arg.order = "DESC"; + } + else + { + const auto sort_direct = String(last_pos->begin, last_pos->end); + if (sort_direct != "desc" && sort_direct != "asc") + throw Exception(ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING, "Unknown direction of sorting: {}", sort_direct); + + std::size_t found = arg.agg_expr.find(sort_direct); + arg.agg_expr = arg.agg_expr.substr(0, found); + arg.order = sort_direct; + } + + top_nested_clause = std::move(arg); + return true; +} + +bool ParserKQLTopNested ::parseTopNestedClause(Pos & pos, TopNestedClauses & top_nested_clauses) +{ + TopNestedClause top_nested_clause; + auto start_pos = pos; + for (auto i = 0; i < 3; ++i) + --start_pos; + + auto end_pos = start_pos; + auto paren_count = 0; + int layer = 0; + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (String(pos->begin, pos->end) == "," and paren_count == 0) + { + end_pos = pos; + parseSingleTopNestedClause(start_pos, end_pos, top_nested_clause, layer); + ++layer; + top_nested_clauses.emplace_back(top_nested_clause); + start_pos = pos; + ++start_pos; + } + ++pos; + } + + parseSingleTopNestedClause(start_pos, pos, top_nested_clause, layer); + top_nested_clauses.emplace_back(top_nested_clause); + return true; +} + +bool ParserKQLTopNested ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + TopNestedClauses top_nested_clauses; + + parseTopNestedClause(pos, top_nested_clauses); + String query = calculateTopNestedWithOthers(top_nested_clauses, pos.max_depth); + + ASTPtr select_node; + Tokens tokens(query.c_str(), query.c_str() + query.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!ParserSelectQuery().parse(new_pos, select_node, expected)) + return false; + + auto with_node = select_node->as()->with(); + + auto * with_elem = with_node->children[0]->as(); + + auto sub_select = with_elem->children[0]->children[0]->children[0]->children[0]; + if (!setSubQuerySource(sub_select, node, false, false, "")) + return false; + + node = std::move(select_node); + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTopNested.h b/src/Parsers/Kusto/ParserKQLTopNested.h new file mode 100644 index 000000000000..89fd283573aa --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTopNested.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTopNested : public ParserKQLBase +{ + +protected: + struct TopNestedClause + { + String topn; + String expr_alias; + String expr; + String others_name; + String others; + String agg_alias; + String agg_function; + String agg_expr; + String agg_column; + String order; + }; + using TopNestedClauses = std::vector; + const char * getName() const override { return "KQL top-nested"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + + static bool parseSingleTopNestedClause(Pos & begin_pos, Pos & last_pos, TopNestedClause & top_nested_clause, const int layer); + static bool parseTopNestedClause(Pos & pos, TopNestedClauses & top_nested_clauses); + static String calculateTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, const uint32_t max_depth); + static String calculateSingleTopNestedWithOthers(const TopNestedClauses & top_nested_clauses, size_t layer, bool has_others, const uint32_t max_depth); +}; + +} diff --git a/src/Parsers/Kusto/Utilities.cpp b/src/Parsers/Kusto/Utilities.cpp new file mode 100644 index 000000000000..3cdd996dc043 --- /dev/null +++ b/src/Parsers/Kusto/Utilities.cpp @@ -0,0 +1,84 @@ +#include "Utilities.h" + +#include "KustoFunctions/IParserKQLFunction.h" + +#include +#include +#include + +namespace DB +{ +String extractLiteralArgumentWithoutQuotes(const std::string & function_name, IParser::Pos & pos) +{ + ++pos; + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + { + auto result = extractTokenWithoutQuotes(pos); + ++pos; + return result; + } + + --pos; + return IParserKQLFunction::getArgument(function_name, pos, IParserKQLFunction::ArgumentState::Raw); +} + +String extractTokenWithoutQuotes(IParser::Pos & pos) +{ + const auto offset = static_cast(pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral); + return {pos->begin + offset, pos->end - offset}; +} + +void setSelectAll(ASTSelectQuery & select_query) +{ + auto expression_list = std::make_shared(); + expression_list->children.push_back(std::make_shared()); + select_query.setExpression(ASTSelectQuery::Expression::SELECT, std::move(expression_list)); +} + +std::optional wildcardToRegex(const String & wildcard) +{ + String regex; + regex += '^'; + bool has_wildcard = false; + for (char c : wildcard) + { + if (c == '*') + { + regex += ".*"; + has_wildcard = true; + } + else if (c == '?') + { + regex += "."; + has_wildcard = true; + } + else if (c == '.' || c == '+' || c == '(' || c == ')' || c == '[' || c == ']' || c == '\\' || c == '^' || c == '$') + { + regex += "\\"; + regex += c; + has_wildcard = true; + } + else + { + regex += c; + } + } + regex += '$'; + + if (has_wildcard) + return regex; + + return {}; +} + +ASTPtr wrapInSelectWithUnion(const ASTPtr & select_query) +{ + auto select_with_union_query = std::make_shared(); + auto & list_of_selects = select_with_union_query->list_of_selects; + list_of_selects = std::make_shared(); + list_of_selects->children.push_back(select_query); + select_with_union_query->children.push_back(list_of_selects); + + return select_with_union_query; +} +} diff --git a/src/Parsers/Kusto/Utilities.h b/src/Parsers/Kusto/Utilities.h new file mode 100644 index 000000000000..6e94199718e8 --- /dev/null +++ b/src/Parsers/Kusto/Utilities.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +namespace DB +{ +String extractLiteralArgumentWithoutQuotes(const std::string & function_name, IParser::Pos & pos); +String extractTokenWithoutQuotes(IParser::Pos & pos); +void setSelectAll(ASTSelectQuery & select_query); +std::optional wildcardToRegex(const String & wildcard); +ASTPtr wrapInSelectWithUnion(const ASTPtr & select_query); +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index be67807ad8f1..449b6972cd12 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -213,7 +213,7 @@ Token Lexer::nextTokenImpl() for (const char * iterator = token_begin; iterator < pos; ++iterator) { - if (!isWordCharASCII(*iterator) && *iterator != '$') + if (!isWordCharASCII(*iterator) && *iterator != '$' && *iterator != '.') return Token(TokenType::ErrorWrongNumber, token_begin, pos); } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 415d3321eb5a..e7d2f57bfeca 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -21,7 +21,6 @@ #include #include - namespace DB { diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 8601e12ebcba..3eecf9ca32ea 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -13,7 +13,6 @@ #include #include "Parsers/IAST_fwd.h" - namespace DB { diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 727d037112fc..4df74c2dd822 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -215,7 +215,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -280,7 +280,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( node = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index fa792e7c8b5b..1e88b68b5f0a 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -1,5 +1,6 @@ +#include #include - +#include namespace DB { @@ -20,13 +21,54 @@ Tokens::Tokens(const char * begin, const char * end, size_t max_query_size, bool UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) { + std::unordered_set valid_kql_negative_suffix( + { + "between", + "contains", + "contains_cs", + "endswith", + "endswith_cs", + "~", + "=", + "has", + "has_cs", + "hasprefix", + "hasprefix_cs", + "hassuffix", + "hassuffix_cs", + "in", + "startswith", + "startswith_cs"}); /// We have just two kind of parentheses: () and []. UnmatchedParentheses stack; /// We have to iterate through all tokens until the end to avoid false positive "Unmatched parentheses" error /// when parser failed in the middle of the query. - for (TokenIterator it = begin; it.isValid(); ++it) + for (TokenIterator it = begin; !it->isEnd(); ++it) { + if (!it.isValid()) // allow kql negative operators + { + if (it->type == TokenType::ErrorSingleExclamationMark) + { + ++it; + if (!valid_kql_negative_suffix.contains(String(it.get().begin, it.get().end))) + break; + --it; + } + else + { + if (String(it.get().begin, it.get().end) == "~") + { + --it; + if (const auto prev = String(it.get().begin, it.get().end); prev != "!" && prev != "=" && prev != "in") + break; + ++it; + } + else + break; + } + } + if (it->type == TokenType::OpeningRoundBracket || it->type == TokenType::OpeningSquareBracket) { stack.push_back(*it); @@ -39,7 +81,8 @@ UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin) stack.push_back(*it); return stack; } - else if ((stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket) + else if ( + (stack.back().type == TokenType::OpeningRoundBracket && it->type == TokenType::ClosingRoundBracket) || (stack.back().type == TokenType::OpeningSquareBracket && it->type == TokenType::ClosingSquareBracket)) { /// Valid match. diff --git a/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp new file mode 100644 index 000000000000..173b8ea789aa --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_AggregateFunctions.cpp @@ -0,0 +1,143 @@ +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Aggregate, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize t = stdev(Age) by FirstName", + "SELECT\n FirstName,\n sqrt(varSamp(Age)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = stdevif(Age, Age < 10) by FirstName", + "SELECT\n FirstName,\n sqrt(varSampIf(Age, Age < 10)) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_and(Age) by FirstName", + "SELECT\n FirstName,\n groupBitAnd(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = binary_all_or(Age) by FirstName", + "SELECT\n FirstName,\n groupBitOr(Age) AS t\nFROM Customers\nGROUP BY FirstName" + + }, + { + "Customers | summarize t = binary_all_xor(Age) by FirstName", + "SELECT\n FirstName,\n groupBitXor(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize percentiles(Age, 30, 40, 50, 60, 70) by FirstName", + "SELECT\n FirstName,\n quantiles(30 / 100, 40 / 100, 50 / 100, 60 / 100, 70 / 100)(Age) AS percentiles_Age\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, 10, 20, 30, 50) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = percentiles_array(Age, dynamic([10, 20, 30, 50])) by FirstName", + "SELECT\n FirstName,\n quantiles(10 / 100, 20 / 100, 30 / 100, 50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilesw(Bucket, Frequency, 50, 75, 99.9)", + "SELECT quantilesExactWeighted(50 / 100, 75 / 100, 99.9 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "DataTable| summarize t = percentilesw_array(Bucket, Frequency, dynamic([10, 50, 30]))", + "SELECT quantilesExactWeighted(10 / 100, 50 / 100, 30 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = percentile(Age, 50) by FirstName", + "SELECT\n FirstName,\n quantile(50 / 100)(Age) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "DataTable | summarize t = percentilew(Bucket, Frequency, 50)", + "SELECT quantileExactWeighted(50 / 100)(Bucket, Frequency) AS t\nFROM DataTable" + }, + { + "Customers | summarize t = make_list_with_nulls(Age) by FirstName", + "SELECT\n FirstName,\n arrayConcat(groupArray(Age), arrayMap(x -> NULL, range(0, toUInt32(count(*) - length(groupArray(Age))), 1))) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize count() by bin(Age, 10)", + "SELECT\n kql_bin(Age, 10) AS Age,\n count() AS count_\nFROM Customers\nGROUP BY Age" + }, + { + "Customers | summarize count(Age+1) by bin(Age+1, 10)", + "SELECT\n kql_bin(Age + 1, 10) AS Columns1,\n count(Age + 1) AS count_\nFROM Customers\nGROUP BY Columns1" + }, + { + "Customers | summarize count(Age) by bin(Age, 10)", + "SELECT\n kql_bin(Age, 10) AS Age,\n count(Age) AS count_Age\nFROM Customers\nGROUP BY Age" + }, + { + "Customers | summarize count_distinct(Education)", + "SELECT countDistinct(Education) AS Columns1\nFROM Customers" + }, + { + "Customers | summarize count_distinctif(Education,Age >30)", + "SELECT countIfDistinct(Education, Age > 30) AS Columns1\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName)" + "SELECT any(FirstName) AS take_any_FirstName\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName), take_any(LastName)" + "SELECT\n any(FirstName) AS take_any_FirstName,\n any(LastName) AS take_any_LastName\nFROM Customers" + }, + { + "Customers | summarize take_any(FirstName, LastName) by FirstName, LastName" + "SELECT\n FirstName,\n LastName,\n any(FirstName),\n any(LastName) AS take_any_FirstName\nFROM Customers\nGROUP BY\n FirstName,\n LastName" + }, + { + "Customers | summarize take_anyif(FirstName, LastName has 'Diaz')" + "SELECT anyIf(FirstName, hasTokenCaseInsensitive(LastName, 'Diaz')) AS take_anyif_FirstName\nFROM Customers" + }, + { + "Customers | summarize take_anyif(FirstName, LastName has 'Diaz'), dcount(FirstName)" + "SELECT\n anyIf(FirstName, hasTokenCaseInsensitive(LastName, 'Diaz')) AS take_anyif_FirstName,\n uniqCombined64(18)(FirstName) AS dcount_FirstName\nFROM Customers" + }, + { + "Customers | summarize dcount(Education, 2)" + "SELECT uniqCombined64(16)(Education) AS dcount_Education\nFROM Customers" + }, + { + "Customers | summarize dcountif(Education, Occupation=='Professional', 2)" + "SELECT uniqCombined64If(16)(Education, Occupation = 'Professional') AS dcountif_Education\nFROM Customers" + }, + { + "Customers | summarize by FirstName, LastName, Age", + "SELECT\n FirstName,\n LastName,\n Age\nFROM Customers\nGROUP BY\n FirstName,\n LastName,\n Age" + }, + { + "Customers | summarize z=arg_max(Age, FirstName, LastName) by Occupation", + "SELECT\n Occupation,\n argMax(FirstName, Age) AS FirstName,\n argMax(LastName, Age) AS LastName,\n argMax(Age, Age) AS z\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | summarize z=arg_min(Age, FirstName, LastName) by Occupation", + "SELECT\n Occupation,\n argMin(FirstName, Age) AS FirstName,\n argMin(LastName, Age) AS LastName,\n argMin(Age, Age) AS z\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | summarize x = hll(Education), y = hll(Occupation) | project xy = hll_merge(x, y) | project dcount_hll(xy);", + "SELECT uniqCombined64Merge(18)(xy) AS Column1\nFROM\n(\n SELECT uniqCombined64MergeState(18)(arrayJoin([x, y])) AS xy\n FROM\n (\n SELECT\n uniqCombined64State(18)(Education) AS x,\n uniqCombined64State(18)(Occupation) AS y\n FROM Customers\n )\n)" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Aggregate, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | summarize variance(Age)", + R"(SELECT IF\(isNaN\(varSamp\(if\(toTypeName\(Age\) = \'Nullable\(Nothing\)\', throwIf\(toTypeName\(Age\) = \'Nullable\(Nothing\)\', \'summarize operator: Failed to resolve scalar expression named null\'\), Age\)\) AS variance_\d+\), 0, variance_\d+\) AS variance_Age\nFROM Customers)" + }, + { + "Customers | summarize variancep(Age)", + R"(SELECT IF\(isNaN\(varPop\(if\(toTypeName\(Age\) = \'Nullable\(Nothing\)\', throwIf\(toTypeName\(Age\) = \'Nullable\(Nothing\)\', \'summarize operator: Failed to resolve scalar expression named null\'\), Age\)\) AS variance_\d+\), 0, variance_\d+\) AS variancep_Age\nFROM Customers)" + }, + { + "Customers | summarize varianceif(Age, Age < 30)", + R"(SELECT IF\(isNaN\(varSampIf\(if\(toTypeName\(Age\) = \'Nullable\(Nothing\)\', throwIf\(toTypeName\(Age\) = \'Nullable\(Nothing\)\', \'summarize operator: Failed to resolve scalar expression named null\'\), Age\), Age < 30\) AS variance_\d+\), 0, variance_\d+\) AS varianceif_Age\nFROM Customers)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp new file mode 100644 index 000000000000..9f84951d5039 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Binary.cpp @@ -0,0 +1,37 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Binary, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print binary_and(A, B)", + "SELECT bitAnd(CAST(A, 'Int64'), CAST(B, 'Int64')) AS print_0" + }, + { + "print binary_not(A)", + "SELECT bitNot(CAST(A, 'Int64')) AS print_0" + }, + { + "print binary_or(A, B)", + "SELECT bitOr(CAST(A, 'Int64'), CAST(B, 'Int64')) AS print_0" + }, + { + "print binary_shift_left(A, B)", + "SELECT if(B < 0, NULL, bitShiftLeft(CAST(A, 'Int64'), B)) AS print_0" + }, + { + "print binary_shift_right(A, B)", + "SELECT if(B < 0, NULL, bitShiftRight(CAST(A, 'Int64'), B)) AS print_0" + }, + { + "print binary_xor(A, B)", + "SELECT bitXor(CAST(A, 'Int64'), CAST(B, 'Int64')) AS print_0" + }, + { + "print bitset_count_ones(A)", + "SELECT bitCount(A) AS print_0" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp new file mode 100644 index 000000000000..ca20b676962d --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Conversion.cpp @@ -0,0 +1,80 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P( + ParserKQLQuery_Conversion, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print tobool(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0) AS print_0" + }, + { + "print toboolean(A)", + "SELECT multiIf(toString(A) = 'true', true, toString(A) = 'false', false, toInt64OrNull(toString(A)) != 0) AS print_0" + }, + { + "print todouble(A)", + "SELECT toFloat64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1) AS print_0" + }, + { + "print toint(A)", + "SELECT intDiv(toInt32OrNull(toString(A)), if(toTypeName(A) = 'IntervalNanosecond', 100, 1)) AS print_0" + }, + { + "print tolong(A)", + "SELECT kql_tolong(A) AS print_0" + }, + { + "print toreal(A)", + "SELECT toFloat64OrNull(toString(A)) / if(toTypeName(A) = 'IntervalNanosecond', 100, 1) AS print_0" + }, + { + "print tostring(A)", + "SELECT ifNull(kql_tostring(A), '') AS print_0" + }, + { + "print decimal(123.345)", + "SELECT toDecimal128(CAST('123.345', 'String'), 32) AS print_0" + }, + { + "print decimal(NULL)", + "SELECT NULL AS print_0" + }, + { + "print todecimal('123.45')", + "SELECT if((toTypeName('123.45') = 'String') OR (toTypeName('123.45') = 'FixedString'), toDecimal128OrNull(CAST('123.45', 'String'), CAST(if(position(CAST('123.45', 'String'), 'e') = 0, if(countSubstrings(CAST('123.45', 'String'), '.') = 1, length(substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('123.45', 'String'), 'e+') AS x) > 0, substr(CAST('123.45', 'String'), x + 2), (position(CAST('123.45', 'String'), 'e-') AS y) > 0, substr(CAST('123.45', 'String'), y + 2), (position(CAST('123.45', 'String'), 'e-') = 0) AND (position(CAST('123.45', 'String'), 'e+') = 0) AND (position(CAST('123.45', 'String'), 'e') > 0), substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('123.45', 'String'), CAST(if(position(CAST('123.45', 'String'), 'e') = 0, if(countSubstrings(CAST('123.45', 'String'), '.') = 1, length(substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('123.45', 'String'), x + 2), y > 0, substr(CAST('123.45', 'String'), y + 2), (position(CAST('123.45', 'String'), 'e-') = 0) AND (position(CAST('123.45', 'String'), 'e+') = 0) AND (position(CAST('123.45', 'String'), 'e') > 0), substr(CAST('123.45', 'String'), position(CAST('123.45', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))) AS print_0" + }, + { + "print todecimal(NULL)", + "SELECT toDecimal128OrNull(CAST(NULL, 'Nullable(String)'), 17) / if(toTypeName(NULL) = 'IntervalNanosecond', 100, 1) AS print_0" + }, + { + "print todecimal(123456.3456)", + "SELECT if((toTypeName(123456.3456) = 'String') OR (toTypeName(123456.3456) = 'FixedString'), toDecimal128OrNull(CAST('123456.3456', 'String'), CAST(if(position(CAST('123456.3456', 'String'), 'e') = 0, if(countSubstrings(CAST('123456.3456', 'String'), '.') = 1, length(substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('123456.3456', 'String'), 'e+') AS x) > 0, substr(CAST('123456.3456', 'String'), x + 2), (position(CAST('123456.3456', 'String'), 'e-') AS y) > 0, substr(CAST('123456.3456', 'String'), y + 2), (position(CAST('123456.3456', 'String'), 'e-') = 0) AND (position(CAST('123456.3456', 'String'), 'e+') = 0) AND (position(CAST('123456.3456', 'String'), 'e') > 0), substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('123456.3456', 'String'), CAST(if(position(CAST('123456.3456', 'String'), 'e') = 0, if(countSubstrings(CAST('123456.3456', 'String'), '.') = 1, length(substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('123456.3456', 'String'), x + 2), y > 0, substr(CAST('123456.3456', 'String'), y + 2), (position(CAST('123456.3456', 'String'), 'e-') = 0) AND (position(CAST('123456.3456', 'String'), 'e+') = 0) AND (position(CAST('123456.3456', 'String'), 'e') > 0), substr(CAST('123456.3456', 'String'), position(CAST('123456.3456', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))) AS print_0" + }, + { + "print todecimal('abc')", + "SELECT if((toTypeName('abc') = 'String') OR (toTypeName('abc') = 'FixedString'), toDecimal128OrNull(CAST('abc', 'String'), CAST(if(position(CAST('abc', 'String'), 'e') = 0, if(countSubstrings(CAST('abc', 'String'), '.') = 1, length(substr(CAST('abc', 'String'), position(CAST('abc', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('abc', 'String'), 'e+') AS x) > 0, substr(CAST('abc', 'String'), x + 2), (position(CAST('abc', 'String'), 'e-') AS y) > 0, substr(CAST('abc', 'String'), y + 2), (position(CAST('abc', 'String'), 'e-') = 0) AND (position(CAST('abc', 'String'), 'e+') = 0) AND (position(CAST('abc', 'String'), 'e') > 0), substr(CAST('abc', 'String'), position(CAST('abc', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('abc', 'String'), CAST(if(position(CAST('abc', 'String'), 'e') = 0, if(countSubstrings(CAST('abc', 'String'), '.') = 1, length(substr(CAST('abc', 'String'), position(CAST('abc', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('abc', 'String'), x + 2), y > 0, substr(CAST('abc', 'String'), y + 2), (position(CAST('abc', 'String'), 'e-') = 0) AND (position(CAST('abc', 'String'), 'e+') = 0) AND (position(CAST('abc', 'String'), 'e') > 0), substr(CAST('abc', 'String'), position(CAST('abc', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))) AS print_0" + }, + { + "print todecimal('1e5')", + "SELECT if((toTypeName('1e5') = 'String') OR (toTypeName('1e5') = 'FixedString'), toDecimal128OrNull(CAST('1e5', 'String'), CAST(if(position(CAST('1e5', 'String'), 'e') = 0, if(countSubstrings(CAST('1e5', 'String'), '.') = 1, length(substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), '.') + 1)), 0), toUInt64(multiIf((position(CAST('1e5', 'String'), 'e+') AS x) > 0, substr(CAST('1e5', 'String'), x + 2), (position(CAST('1e5', 'String'), 'e-') AS y) > 0, substr(CAST('1e5', 'String'), y + 2), (position(CAST('1e5', 'String'), 'e-') = 0) AND (position(CAST('1e5', 'String'), 'e+') = 0) AND (position(CAST('1e5', 'String'), 'e') > 0), substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8')), toDecimal128OrNull(CAST('1e5', 'String'), CAST(if(position(CAST('1e5', 'String'), 'e') = 0, if(countSubstrings(CAST('1e5', 'String'), '.') = 1, length(substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), '.') + 1)), 0), toUInt64(multiIf(x > 0, substr(CAST('1e5', 'String'), x + 2), y > 0, substr(CAST('1e5', 'String'), y + 2), (position(CAST('1e5', 'String'), 'e-') = 0) AND (position(CAST('1e5', 'String'), 'e+') = 0) AND (position(CAST('1e5', 'String'), 'e') > 0), substr(CAST('1e5', 'String'), position(CAST('1e5', 'String'), 'e') + 1), CAST('0', 'String')))), 'UInt8'))) AS print_0" + }, + { + "print decimal(1e-5)", + "SELECT toDecimal128(CAST('1e-5', 'String'), 5) AS print_0" + }, + { + "print time(9nanoseconds)", + "SELECT toIntervalNanosecond(0) AS print_0" + }, + { + "print time(1tick)", + "SELECT toIntervalNanosecond(100) AS print_0" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Count.cpp b/src/Parsers/tests/KQL/gtest_KQL_Count.cpp new file mode 100644 index 000000000000..1649b325b0f4 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Count.cpp @@ -0,0 +1,29 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Count, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | count", + "SELECT count() AS Count\nFROM Customers" + }, + { + "Customers | where Age< 30 | count", + "SELECT count() AS Count\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where Age< 30 | limit 2| count", + "SELECT count() AS Count\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n)" + }, + { + "Customers | where Age< 30 | limit 2 | count | project Count", + "SELECT Count\nFROM\n(\n SELECT count() AS Count\n FROM\n (\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n )\n)" + }, + { + "Customers|project FirstName|where FirstName != 'Peter'|sort by FirstName asc nulls first|count", + "SELECT count() AS Count\nFROM\n(\n SELECT FirstName\n FROM Customers\n WHERE FirstName != 'Peter'\n ORDER BY FirstName ASC NULLS FIRST\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp b/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp new file mode 100644 index 000000000000..e64bcccbc4fd --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_DataType.cpp @@ -0,0 +1,65 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DataType, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print dynamic(null)", + "SELECT NULL AS print_0" + }, + { + "print dynamic(1)", + "SELECT 1 AS print_0" + }, + { + "print dynamic(datetime(1))", + "SELECT kql_datetime(1) AS print_0" + }, + { + "print dynamic(timespan(1d))", + "SELECT toIntervalNanosecond(86400000000000) AS print_0" + }, + { + "print dynamic(parse_ipv4('127.0.0.1'))", + "throws AS print_0" + }, + { + "print dynamic({ \"a\": 9 })", + "throws AS print_0" + }, + { + "print dynamic([1, 2, 3])", + "SELECT [1, 2, 3] AS print_0" + }, + { + "print dynamic([1, dynamic([2]), 3])", + "SELECT [1, [2], 3] AS print_0" + }, + { + "print dynamic([[1], [2], [3]])", + "SELECT [[1], [2], [3]] AS print_0" + }, + { + "print dynamic(['a', \"b\", 'c'])", + "SELECT ['a', 'b', 'c'] AS print_0" + }, + { + "print dynamic([1, 'a', true, false])", + "SELECT [1, 'a', true, false] AS print_0" + }, + { + "print dynamic([date(1), time(1d), 1, 2])", + "SELECT [kql_datetime(1), toIntervalNanosecond(86400000000000), 1, 2] AS print_0" + }, + { + "print time('13:00:40.00000')", + "SELECT toIntervalNanosecond(46840000000000) AS print_0" + }, + { + "print timespan('12.23:12:23');", + "SELECT toIntervalNanosecond(1120343000000000) AS print_0" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp b/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp new file mode 100644 index 000000000000..444c67a6f9c8 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp @@ -0,0 +1,33 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Distinct, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | distinct *", + "SELECT DISTINCT *\nFROM Customers" + }, + { + "Customers | distinct Occupation", + "SELECT DISTINCT Occupation\nFROM Customers" + }, + { + "Customers | distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM Customers" + }, + { + "Customers |where Age <30| distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers |where Age <30 | order by Age| distinct Occupation, Education", + "SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC NULLS LAST\n)" + }, + { + "Customers | project a = (Age % 10) | distinct a;", + "SELECT DISTINCT a\nFROM\n(\n SELECT Age % 10 AS a\n FROM Customers\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp new file mode 100644 index 000000000000..1d5a55615c9f --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Dynamic.cpp @@ -0,0 +1,136 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicExactMatch, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_concat(A, B)", + "SELECT arrayConcat(A, B) AS print_0" + }, + { + "print array_concat(A, B, C, D)", + "SELECT arrayConcat(A, B, C, D) AS print_0" + }, + { + "print array_iff(A, B, C)", + "SELECT kql_ArrayIif(A, B, C) AS print_0" + }, + { + "print array_index_of(dynamic([1, 2, 3]), 2)", + "SELECT indexOf([1, 2, 3], 2) - 1 AS print_0" + }, + { + "print array_index_of(dynamic(['a', 'b', 'c']), 'b')", + "SELECT indexOf(['a', 'b', 'c'], 'b') - 1 AS print_0" + }, + { + "print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley')", + "SELECT indexOf(['John', 'Denver', 'Bob', 'Marley'], 'Marley') - 1 AS print_0" + }, + + { + "print array_length(dynamic([1, 2, 3]))", + "SELECT arrayLastIndex(x -> true, [1, 2, 3]) AS print_0" + }, + { + "print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley']))", + "SELECT arrayLastIndex(x -> true, ['John', 'Denver', 'Bob', 'Marley']) AS print_0" + }, + { + "print array_reverse(A)", + "SELECT arrayReverse(A) AS print_0" + }, + { + "print array_rotate_left(A, B)", + "SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(B, toInt64(length(A))), length(A)) + 1]), range(0, length(A))) AS print_0" + }, + { + "print array_rotate_right(A, B)", + "SELECT arrayMap(x -> (A[moduloOrZero((x + length(A)) + moduloOrZero(-1 * B, toInt64(length(A))), length(A)) + 1]), range(0, length(A))) AS print_0" + }, + { + "print array_sum(dynamic([2, 5, 3]))", + "SELECT if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> (x IS NOT NULL), [2, 5, 3]))), 'Array\\\\((.*)\\\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), arraySum(x -> toFloat64OrDefault(x), [2, 5, 3]), NULL) AS print_0" + }, + { + "print array_sum(dynamic([2.5, 5.5, 3]))", + "SELECT if(multiSearchAny(extract(toTypeName(arrayMap(x -> assumeNotNull(x), arrayFilter(x -> (x IS NOT NULL), [2.5, 5.5, 3]))), 'Array\\\\((.*)\\\\)'), ['Bool', 'Decimal', 'Float', 'Int', 'Nothing', 'UInt']), arraySum(x -> toFloat64OrDefault(x), [2.5, 5.5, 3]), NULL) AS print_0" + }, + { + "print jaccard_index(A, B)", + "SELECT length(arrayIntersect(A, B)) / length(arrayDistinct(arrayConcat(A, B))) AS print_0" + }, + { + "print pack_array(A, B, C, D)", + "SELECT [A, B, C, D] AS print_0" + }, + { + "print set_difference(A, B)", + "SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B)), x)), arrayDistinct(A)) AS print_0" + }, + { + "print set_difference(A, B, C)", + "SELECT arrayFilter(x -> (NOT has(arrayDistinct(arrayConcat(B, C)), x)), arrayDistinct(A)) AS print_0" + }, + { + "print set_has_element(A, B)", + "SELECT has(A, B) AS print_0" + }, + { + "print set_intersect(A, B)", + "SELECT arrayIntersect(A, B) AS print_0" + }, + { + "print set_intersect(A, B, C)", + "SELECT arrayIntersect(A, B, C) AS print_0" + }, + { + "print set_union(A, B)", + "SELECT arrayDistinct(arrayConcat(A, B)) AS print_0" + }, + { + "print set_union(A, B, C)", + "SELECT arrayDistinct(arrayConcat(A, B, C)) AS print_0" + } +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_DynamicRegex, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print array_shift_left(A, B)", + R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)*'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\) AS print_0)" + }, + { + "print array_shift_left(A, B, C)", + R"(SELECT arrayResize\(if\(B > 0, arraySlice\(A, B \+ 1\), arrayConcat\(arrayWithConstant\(abs\(B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\) AS print_0)" + }, + { + "print array_shift_right(A, B)", + R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(NULL IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), NULL\) AS fill_value_\d+\) AS print_0)" + }, + { + "print array_shift_right(A, B, C)", + R"(SELECT arrayResize\(if\(\(-1 \* B\) > 0, arraySlice\(A, \(-1 \* B\) \+ 1\), arrayConcat\(arrayWithConstant\(abs\(-1 \* B\), fill_value_\d+\), A\)\), length\(A\), if\(\(C IS NULL\) AND \(\(extract\(toTypeName\(A\), 'Array\\\\\(\(\.\*\)\\\\\)'\) AS element_type_\d+\) = 'String'\), defaultValueOfTypeName\(if\(element_type_\d+ = 'Nothing', 'Nullable\(Nothing\)', element_type_\d+\)\), C\) AS fill_value_\d+\) AS print_0)" + }, + { + "print array_slice(A, B, C)", + R"(SELECT arraySlice\(A, 1 \+ if\(B >= 0, B, arrayMax\(\[-length\(A\), B\]\) \+ length\(A\)\) AS offset_\d+, \(\(1 \+ if\(C >= 0, C, arrayMax\(\[-length\(A\), C\]\) \+ length\(A\)\)\) - offset_\d+\) \+ 1\) AS print_0)" + }, + { + "print array_split(A, B)", + R"(SELECT if\(empty\(arrayMap\(x -> if\(x >= 0, x, arrayMax\(\[0, x \+ CAST\(length\(A\), 'Int\d+'\)\]\)\), flatten\(\[B\]\)\) AS indices_\d+\), \[A\], arrayConcat\(\[arraySlice\(A, 1, indices_\d+\[1\]\)\], arrayMap\(i -> arraySlice\(A, \(indices_\d+\[i\]\) \+ 1, if\(i = length\(indices_\d+\), CAST\(length\(A\), 'Int\d+'\), CAST\(indices_\d+\[i \+ 1\], 'Int\d+'\)\) - \(indices_\d+\[i\]\)\), range\(1, length\(indices_\d+\) \+ 1\)\)\)\) AS print_0)" + }, + { + "print zip(A, B)", + R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(', extract\(toTypeName\(if\(length\(A\) = 0, \[NULL\], A\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(', extract\(toTypeName\(if\(length\(B\) = 0, \[NULL\], B\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg1_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\)\)\) AS print_0)" + }, + { + "print zip(A, B, C)", + R"(SELECT arrayMap\(t -> \[untuple\(t\)\], arrayZip\(arrayResize\(arg0_\d+, arrayMax\(\[length\(if\(match\(toTypeName\(A\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), A, CAST\(A, concat\('Array\(', extract\(toTypeName\(if\(length\(A\) = 0, \[NULL\], A\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg0_\d+\), length\(if\(match\(toTypeName\(B\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), B, CAST\(B, concat\('Array\(', extract\(toTypeName\(if\(length\(B\) = 0, \[NULL\], B\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg1_\d+\), length\(if\(match\(toTypeName\(C\), 'Array\\\\\(Nullable\\\\\(\.\*\\\\\)\\\\\)'\), C, CAST\(C, concat\('Array\(', extract\(toTypeName\(if\(length\(C\) = 0, \[NULL\], C\)\), 'Array\\\\\(\(\.\*\)\\\\\)'\), '\)'\)\)\) AS arg2_\d+\)\]\) AS max_length_\d+, NULL\), arrayResize\(arg1_\d+, max_length_\d+, NULL\), arrayResize\(arg2_\d+, max_length_\d+, NULL\)\)\) AS print_0)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_General.cpp b/src/Parsers/tests/KQL/gtest_KQL_General.cpp new file mode 100644 index 000000000000..d82c518aa037 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_General.cpp @@ -0,0 +1,105 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_General, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print case(5 <= 10, 'A', 12 <= 20, 'B', 22 <= 30, 'C', 'D')", + "SELECT multiIf(5 <= 10, 'A', 12 <= 20, 'B', 22 <= 30, 'C', 'D') AS print_0" + }, + { + "Customers | extend t = case(Age <= 10, 'A', Age <= 20, 'B', Age <= 30, 'C', 'D')", + "SELECT\n * EXCEPT t,\n multiIf(Age <= 10, 'A', Age <= 20, 'B', Age <= 30, 'C', 'D') AS t\nFROM Customers" + }, + { + "Customers | extend t = iff(Age < 20, 'little', 'big')", + "SELECT\n * EXCEPT t,\n If(Age < 20, 'little', 'big') AS t\nFROM Customers" + }, + { + "Customers | extend t = iif(Age < 20, 'little', 'big')", + "SELECT\n * EXCEPT t,\n If(Age < 20, 'little', 'big') AS t\nFROM Customers" + }, + { + "print bin_at(6.5, 2.5, 7)", + "SELECT kql_bin_at(6.5, 2.5, 7) AS print_0" + }, + { + "print bin_at(1h, 1d, 12h)", + "SELECT kql_bin_at(toIntervalNanosecond(3600000000000), toIntervalNanosecond(86400000000000), toIntervalNanosecond(43200000000000)) AS print_0" + }, + { + "print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0))", + "SELECT kql_bin_at(kql_datetime('2017-05-15 10:20:00.0'), toIntervalNanosecond(86400000000000), kql_datetime('1970-01-01 12:00:00.0')) AS print_0" + }, + { + "print bin(4.5, 1)", + "SELECT kql_bin(4.5, 1) AS print_0" + }, + { + "print bin(4.5, -1)", + "SELECT kql_bin(4.5, -1) AS print_0" + }, + { + "print bin(time(16d), 7d)", + "SELECT kql_bin(toIntervalNanosecond(1382400000000000), toIntervalNanosecond(604800000000000)) AS print_0" + }, + { + "print bin(datetime(1970-05-11 13:45:07), 1d)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07'), toIntervalNanosecond(86400000000000)) AS print_0" + }, + { + "print bin(datetime(1970-05-11 13:45:07.456345672), 1ms)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07.456345672'), toIntervalNanosecond(1000000)) AS print_0" + }, + { + "print bin(datetime(1970-05-11 13:45:07.456345672), 1microseconds)", + "SELECT kql_bin(kql_datetime('1970-05-11 13:45:07.456345672'), toIntervalNanosecond(1000)) AS print_0" + }, + { + "print lookup('dictionary_table', 'value', '1')", + "SELECT dictGet('dictionary_table', 'value', '1') AS print_0" + }, + { + "print lookup('dictionary_table', 'value', '100', 'default')", + "SELECT dictGetOrDefault('dictionary_table', 'value', '100', 'default') AS print_0" + }, + { + "T | print 1", + "throws AS print_0" + }, + { + "T | range from 1 to 5 step 1", + "throws" + }, + { + "T |", + "throws" + }, + { + "print t = gettype(1)", + "SELECT kql_gettype(1) AS t" + }, + { + "Customers | project t = gettype(FirstName)", + "SELECT kql_gettype(FirstName) AS t\nFROM Customers" + }, + { + "print x = 5 | extend a = toscalar(print 5, 'asd' | project y = strcat(print_0, print_1));", + "SELECT\n * EXCEPT a,\n (\n SELECT tuple(*)\n FROM\n (\n SELECT concat(ifNull(kql_tostring(print_0), ''), ifNull(kql_tostring(print_1), ''), '') AS y\n FROM\n (\n SELECT\n 5 AS print_0,\n 'asd' AS print_1\n )\n )\n LIMIT 1\n ).1 AS a\nFROM\n(\n SELECT 5 AS x\n)" + }, + { + "print t = not(strlen('abc'))", + "SELECT kql_not(lengthUTF8('abc')) AS t" + }, + { + "print t = not(1)", + "SELECT kql_not(1) AS t" + }, + { + "Customers | project not(Age)", + "SELECT kql_not(Age) AS Column1\nFROM Customers" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_GetSchema.cpp b/src/Parsers/tests/KQL/gtest_KQL_GetSchema.cpp new file mode 100644 index 000000000000..a196d8b8e24b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_GetSchema.cpp @@ -0,0 +1,21 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_GetSchema, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | getschema", + "SELECT *\nFROM getschema(\n SELECT *\n FROM Customers\n)" + }, + { + "Customers | getschema | getschema", + "SELECT *\nFROM getschema(\n SELECT *\n FROM\n (\n SELECT *\n FROM getschema(\n SELECT *\n FROM Customers\n )\n )\n)" + }, + { + "print x = 'asd' | extend y = strlen(x) | getschema", + "SELECT *\nFROM getschema(\n SELECT *\n FROM\n (\n SELECT\n * EXCEPT y,\n lengthUTF8(x) AS y\n FROM\n (\n SELECT 'asd' AS x\n )\n )\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Hash.cpp b/src/Parsers/tests/KQL/gtest_KQL_Hash.cpp new file mode 100644 index 000000000000..0e62fa46d657 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Hash.cpp @@ -0,0 +1,21 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Hash, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print hash('World')", + "SELECT kql_hash('World') AS print_0" + }, + { + "print hash('World', 100)", + "SELECT kql_hash('World', 100) AS print_0" + }, + { + "print hash_sha256('World')", + "SELECT lower(hex(SHA256(NULLIF(ifNull(kql_tostring('World'), ''), '')))) AS print_0" + }, +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_IP.cpp b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp new file mode 100644 index 000000000000..c4abd52e804a --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_IP.cpp @@ -0,0 +1,141 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print format_ipv4(A)", + R"(SELECT ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(32 < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - 32\) - 1\)\)\)\)\), ''\) AS print_0)" + }, + { + "print format_ipv4(A, B)", + R"(SELECT ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(B < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - B\) - 1\)\)\)\)\), ''\) AS print_0)" + }, + { + "print format_ipv4_mask(A)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(32 < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - 32\) - 1\)\)\)\)\), ''\) AS formatted_ip_\d+\) OR \(position\(toTypeName\(32\), 'Int'\) = 0\) OR \(NOT \(\(32 >= 0\) AND \(32 <= 32\)\)\), '', concat\(formatted_ip_\d+, '/', toString\(toInt64\(min2\(32, ifNull\(multiIf\(\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS suffix_\d+, 32\)\)\)\)\)\) AS print_0)" + }, + { + "print format_ipv4_mask(A, B)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(A\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(A\) = 'String'\)\) OR \(B < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - B\) - 1\)\)\)\)\), ''\) AS formatted_ip_\d+\) OR \(position\(toTypeName\(B\), 'Int'\) = 0\) OR \(NOT \(\(B >= 0\) AND \(B <= 32\)\)\), '', concat\(formatted_ip_\d+, '/', toString\(toInt64\(min2\(B, ifNull\(multiIf\(\(length\(splitByChar\('/', ifNull\(kql_tostring\(A\), ''\)\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS suffix_\d+, 32\)\)\)\)\)\) AS print_0)" + }, + { + "print ipv4_compare(A, B)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(32, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\) AS print_0)" + }, + { + "print ipv4_compare(A, B, C)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(C, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\) AS print_0)" + }, + { + "print ipv6_compare(A, B)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(128, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) AS print_0)" + }, + { + "print ipv6_compare(A, B, C)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(C, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) AS print_0)" + }, + { + "print ipv4_is_in_range(A, B)", + R"(SELECT if\(\(\(IPv4StringToNumOrNull\(A\) AS ip_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS range_start_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS range_mask_\d+\) IS NULL\), NULL, bitXor\(range_start_ip_\d+, bitAnd\(ip_\d+, bitNot\(toUInt32\(intExp2\(32 - range_mask_\d+\) - 1\)\)\)\) = 0\) AS print_0)" + }, + { + "print ipv4_is_match(A, B)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(32, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\) = 0 AS print_0)" + }, + { + "print ipv4_is_match(A, B, C)", + R"(SELECT if\(\(\(multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS lhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS lhs_mask_\d+\) IS NULL\) OR \(\(multiIf\(length\(splitByChar\('/', B\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS rhs_ip_\d+\) IS NULL\) OR \(\(multiIf\(\(length\(splitByChar\('/', B\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS rhs_mask_\d+\) IS NULL\), NULL, sign\(toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(lhs_ip_\d+\), toUInt8\(min2\(C, min2\(assumeNotNull\(lhs_mask_\d+\), assumeNotNull\(rhs_mask_\d+\)\)\)\) AS mask_\d+\).1, 'UInt32'\)\) - toInt64\(CAST\(IPv4CIDRToRange\(assumeNotNull\(rhs_ip_\d+\), mask_\d+\).1, 'UInt32'\)\)\)\) = 0 AS print_0)" + }, + { + "print ipv6_is_match(A, B)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(128, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) = 0 AS print_0)" + }, + { + "print ipv6_is_match(A, B, C)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS lhs_tokens_\d+\) > 2\) OR \(length\(splitByChar\('/', B\) AS rhs_tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(lhs_tokens_\d+\[1\]\) AS lhs_ipv6_\d+\) IS NULL\) OR \(\(length\(lhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(lhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(lhs_tokens_\d+\[-1\]\)\) AS lhs_suffix_\d+\) IS NULL\)\) OR \(\(IPv6StringToNumOrNull\(rhs_tokens_\d+\[1\]\) AS rhs_ipv6_\d+\) IS NULL\) OR \(\(length\(rhs_tokens_\d+\) = 2\) AND \(\(\(if\(isIPv4String\(rhs_tokens_\d+\[1\]\), 96, 0\) \+ toUInt8OrNull\(rhs_tokens_\d+\[-1\]\)\) AS rhs_suffix_\d+\) IS NULL\)\) OR \(\(toUInt8\(min2\(C, min2\(ifNull\(lhs_suffix_\d+, 128\), ifNull\(rhs_suffix_\d+, 128\)\)\)\) AS suffix_\d+\) IS NULL\) OR \(\(bitShiftLeft\(bitShiftRight\(bitNot\(reinterpretAsFixedString\(CAST\('0', 'UInt128'\)\)\), 128 - suffix_\d+ AS zeroes_\d+\), zeroes_\d+\) AS mask_\d+\) IS NULL\) OR \(\(bitAnd\(lhs_ipv6_\d+, mask_\d+\) AS lhs_base_\d+\) IS NULL\) OR \(\(bitAnd\(rhs_ipv6_\d+, mask_\d+\) AS rhs_base_\d+\) IS NULL\), NULL, multiIf\(lhs_base_\d+ < rhs_base_\d+, -1, lhs_base_\d+ > rhs_base_\d+, 1, 0\)\) = 0 AS print_0)" + }, + { + "print ipv4_is_private(A)", + R"(SELECT multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(toIPv4OrNull\(tokens_\d+\[1\]\) AS nullable_ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, ignore\(assumeNotNull\(nullable_ip_\d+\) AS ip_\d+, IPv4CIDRToRange\(ip_\d+, assumeNotNull\(mask_\d+\)\) AS range_\d+, IPv4NumToString\(range_\d+.1\) AS begin_\d+, IPv4NumToString\(range_\d+.2\) AS end_\d+\), NULL, \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '10.0.0.0/8'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '10.0.0.0/8'\) AND isIPAddressInRange\(end_\d+, '10.0.0.0/8'\)\) OR \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '172.16.0.0/12'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '172.16.0.0/12'\) AND isIPAddressInRange\(end_\d+, '172.16.0.0/12'\)\) OR \(\(length\(tokens_\d+\) = 1\) AND isIPAddressInRange\(IPv4NumToString\(ip_\d+\), '192.168.0.0/16'\)\) OR \(\(length\(tokens_\d+\) = 2\) AND isIPAddressInRange\(begin_\d+, '192.168.0.0/16'\) AND isIPAddressInRange\(end_\d+, '192.168.0.0/16'\)\)\) AS print_0)" + }, + { + "print ipv4_netmask_suffix(A)", + R"(SELECT multiIf\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(NOT isIPv4String\(tokens_\d+\[1\]\)\), NULL, length\(tokens_\d+\) = 1, 32, \(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL, NULL, toUInt8\(min2\(mask_\d+, 32\)\)\) AS print_0)" + }, + { + "print parse_ipv4(A)", + R"(SELECT multiIf\(length\(splitByChar\('/', A\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\).1, 'UInt32'\), NULL\) AS print_0)" + }, + { + "print parse_ipv4_mask(A, B)", + R"(SELECT if\(\(\(toIPv4OrNull\(A\) AS ip_\d+\) IS NULL\) OR \(\(toUInt8OrNull\(toString\(B\)\) AS mask_\d+\) IS NULL\), NULL, toUInt32\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), arrayMax\(\[0, arrayMin\(\[32, assumeNotNull\(mask_\d+\)\]\)\]\)\).1\)\) AS print_0)" + }, + { + "print parse_ipv6(A)", + R"(SELECT if\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\).1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\) AS print_0)" + }, + { + "print parse_ipv6_mask(A, B)", + R"(SELECT if\(empty\(ifNull\(if\(\(\(\(toUInt32OrNull\(toString\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\)\) AS param_as_uint32_\d+\) IS NOT NULL\) AND \(toTypeName\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\) = 'String'\)\) OR \(\(B - 96\) < 0\) OR \(\(ifNull\(param_as_uint32_\d+, multiIf\(length\(splitByChar\('/', ifNull\(kql_tostring\(replaceRegexpOne\(A, concat\('\^', '::'\), ''\)\), ''\)\) AS tokens_\d+\) = 1, IPv4StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+, \(length\(tokens_\d+\) = 2\) AND \(ip_\d+ IS NOT NULL\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NOT NULL\), CAST\(IPv4CIDRToRange\(assumeNotNull\(ip_\d+\), assumeNotNull\(mask_\d+\)\)\.1, 'UInt32'\), NULL\)\) AS ip_as_number_\d+\) IS NULL\), NULL, IPv4NumToString\(bitAnd\(ip_as_number_\d+, bitNot\(toUInt32\(intExp2\(32 - \(B - 96\)\) - 1\)\)\)\)\), ''\) AS ipv4_\d+\), if\(\(length\(splitByChar\('/', concat\(ifNull\(kql_tostring\(ifNull\(kql_tostring\(if\(\(length\(splitByChar\('/', A\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\)\), ''\)\), ''\), ifNull\(kql_tostring\('/'\), ''\), ifNull\(kql_tostring\(ifNull\(kql_tostring\(B\), ''\)\), ''\), ''\)\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\), if\(\(length\(splitByChar\('/', ipv4_\d+\) AS tokens_\d+\) > 2\) OR \(\(IPv6StringToNumOrNull\(tokens_\d+\[1\]\) AS ip_\d+\) IS NULL\) OR \(\(length\(tokens_\d+\) = 2\) AND \(\(toUInt8OrNull\(tokens_\d+\[-1\]\) AS mask_\d+\) IS NULL\)\), NULL, arrayStringConcat\(flatten\(extractAllGroups\(lower\(hex\(IPv6CIDRToRange\(assumeNotNull\(ip_\d+\), toUInt8\(ifNull\(mask_\d+ \+ if\(isIPv4String\(tokens_\d+\[1\]\), 96, 0\), 128\)\)\)\.1\)\), '\(\[\\\\da-f\]\{4\}\)'\)\), ':'\)\)\) AS print_0)" + } +}))); + +INSTANTIATE_TEST_SUITE_P( + ParserKQLQuery_IP, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print has_ipv4('127.0.0.1', '127.0.0.1')", + "SELECT kql_has_ipv4('127.0.0.1', '127.0.0.1') AS print_0" + }, + { + "print has_ipv4_prefix('127.0.0.1', '127.0.0.')", + "SELECT kql_has_ipv4_prefix('127.0.0.1', '127.0.0.') AS print_0" + }, + { + "print has_any_ipv4('127.0.0.1', '1.2.3.4', '127.0.0.1')", + "SELECT kql_has_any_ipv4('127.0.0.1', '1.2.3.4', '127.0.0.1') AS print_0" + }, + { + "print has_any_ipv4_prefix('127.0.0.1', '1.2.3.4', '127.0.0.')", + "SELECT kql_has_any_ipv4_prefix('127.0.0.1', '1.2.3.4', '127.0.0.') AS print_0" + }, + { + "print has_any_ipv4('1.2.3.4', dynamic(['1.2.3.4']))", + "SELECT kql_has_any_ipv4('1.2.3.4', ['1.2.3.4']) AS print_0" + }, + { + "print has_any_ipv4_prefix('1.2.3.4', dynamic(['1.2.3.4']))", + "SELECT kql_has_any_ipv4_prefix('1.2.3.4', ['1.2.3.4']) AS print_0" + }, + { + "print has_ipv6('2600:1404:6400:1695:0:0:0:1e89', '2600:1404:6400:1695:0:0:0:1e89')", + "SELECT kql_has_ipv6('2600:1404:6400:1695:0:0:0:1e89', '2600:1404:6400:1695:0:0:0:1e89') AS print_0" + }, + { + "print has_ipv6_prefix('2600:1404:6400:1695:0:0:0:1e89', '2600:1404:6400:1695:0:0:0:')", + "SELECT kql_has_ipv6_prefix('2600:1404:6400:1695:0:0:0:1e89', '2600:1404:6400:1695:0:0:0:') AS print_0" + }, + { + "print has_any_ipv6('2600:1404:6400:1695:0:0:0:1e89', '0:0:0:0:0:ffff:1.2.3.4', '2600:1404:6400:1695:0:0:0:1e89')", + "SELECT kql_has_any_ipv6('2600:1404:6400:1695:0:0:0:1e89', '0:0:0:0:0:ffff:1.2.3.4', '2600:1404:6400:1695:0:0:0:1e89') AS print_0" + }, + { + "print has_any_ipv6_prefix('2600:1404:6400:1695:0:0:0:1e89', '0:0:0:0:0:ffff:1.2.3.4', '2600:1404:6400:1695:0:0:0:')", + "SELECT kql_has_any_ipv6_prefix('2600:1404:6400:1695:0:0:0:1e89', '0:0:0:0:0:ffff:1.2.3.4', '2600:1404:6400:1695:0:0:0:') AS print_0" + }, + { + "print has_any_ipv6('2600:1404:6400:1695:0:0:0:1e89', dynamic(['2600:1404:6400:168a:0:0:0:1e89']))", + "SELECT kql_has_any_ipv6('2600:1404:6400:1695:0:0:0:1e89', ['2600:1404:6400:168a:0:0:0:1e89']) AS print_0" + }, + { + "print has_any_ipv6_prefix('2600:1404:6400:1695:0:0:0:1e89', dynamic(['2600:1404:6400:168a:0:0:0:1e89']))", + "SELECT kql_has_any_ipv6_prefix('2600:1404:6400:1695:0:0:0:1e89', ['2600:1404:6400:168a:0:0:0:1e89']) AS print_0" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp b/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp new file mode 100644 index 000000000000..4fc9f176ba8b --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MVExpand.cpp @@ -0,0 +1,45 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MVExpand, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | mv-expand c", + "SELECT *\nFROM T\nARRAY JOIN c\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand c, d", + "SELECT *\nFROM T\nARRAY JOIN\n c,\n d\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand c to typeof(bool)", + "SELECT\n * EXCEPT c_ali,\n c_ali AS c\nFROM\n(\n SELECT\n * EXCEPT c,\n accurateCastOrNull(toInt64OrNull(toString(c)), 'Boolean') AS c_ali\n FROM\n (\n SELECT *\n FROM T\n ARRAY JOIN c\n )\n)\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand b | mv-expand c", + "SELECT *\nFROM\n(\n SELECT *\n FROM T\n ARRAY JOIN b\n SETTINGS enable_unaligned_array_join = 1\n)\nARRAY JOIN c\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand with_itemindex=index b, c, d", + "SELECT\n index,\n *\nFROM T\nARRAY JOIN\n b,\n c,\n d,\n range(0, arrayMax([length(b), length(c), length(d)])) AS index\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand array_concat(c,d)", + "SELECT\n *,\n array_concat_\nFROM T\nARRAY JOIN arrayConcat(c, d) AS array_concat_\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand x = c, y = d", + "SELECT\n *,\n x,\n y\nFROM T\nARRAY JOIN\n c AS x,\n d AS y\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand xy = array_concat(c, d)", + "SELECT\n *,\n xy\nFROM T\nARRAY JOIN arrayConcat(c, d) AS xy\nSETTINGS enable_unaligned_array_join = 1" + }, + { + "T | mv-expand with_itemindex=index c,d to typeof(bool)", + "SELECT\n * EXCEPT d_ali,\n d_ali AS d\nFROM\n(\n SELECT\n * EXCEPT d,\n accurateCastOrNull(toInt64OrNull(toString(d)), 'Boolean') AS d_ali\n FROM\n (\n SELECT\n index,\n *\n FROM T\n ARRAY JOIN\n c,\n d,\n range(0, arrayMax([length(c), length(d)])) AS index\n )\n)\nSETTINGS enable_unaligned_array_join = 1" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp new file mode 100644 index 000000000000..45ed12ca37a5 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_MakeSeries.cpp @@ -0,0 +1,45 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_MakeSeries, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM T\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga)) < 0, 0, length(range(toUInt64(10), toUInt64(15), toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(toUInt64(10), toUInt64(15), toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(10) + (toInt64((toFloat64(Purchase) - toFloat64(10)) / 1) * 1) AS Purchase_ali\n FROM T2\n WHERE (toInt64(toFloat64(Purchase)) >= toUInt64(10)) AND (toInt64(toFloat64(Purchase)) < toUInt64(15))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(86400))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(86400))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 62135596800, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 62135596800, 9, 'UTC'), range(low, high, toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) + 62135596800) / 86400) * 86400) AS Purchase_ali\n FROM T\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "T2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 1.0 by Supplier, Fruit", + "SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS PriceAvg\nFROM\n(\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 1 AS high,\n arraySort(arrayZip(Purchase, PriceAvg)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(PriceAvg_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(low, high, toUInt64(1))) - length(ga)) < 0, 0, length(range(low, high, toUInt64(1))) - length(ga))), 1))) AS PriceAvg,\n arrayDistinct(arrayConcat(groupArray(Purchase_ali), arrayMap(x -> toFloat64(x), range(low, high, toUInt64(1))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS PriceAvg_ali,\n toFloat64(toInt64((toFloat64(Purchase) + 0) / 1) * 1) AS Purchase_ali\n FROM T2\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n)" + }, + { + "make_series_test_table | make-series avg(Price+1) on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit;", + "SELECT *\nFROM\n(\n SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS avg_\n FROM\n (\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, avg_)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(avg__ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS avg_,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price + 1) AS avg__ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM make_series_test_table\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n )\n)\nORDER BY\n Supplier DESC NULLS LAST,\n Fruit DESC NULLS LAST" + }, + { + "make_series_test_table | make-series avg(Price+1)+1 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit", + "SELECT *\nFROM\n(\n SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS avg_\n FROM\n (\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, avg_)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(avg__ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS avg_,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price + 1) + 1 AS avg__ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM make_series_test_table\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n )\n)\nORDER BY\n Supplier DESC NULLS LAST,\n Fruit DESC NULLS LAST" + }, + { + "make_series_test_table | make-series ceiling(avg(Price+1)+1) on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit", + "SELECT *\nFROM\n(\n SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS Column1\n FROM\n (\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, Column1)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(Column1_ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS Column1,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n ceiling(avg(Price + 1) + 1) AS Column1_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM make_series_test_table\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n )\n)\nORDER BY\n Supplier DESC NULLS LAST,\n Fruit DESC NULLS LAST" + }, + { + "make_series_test_table | make-series ceiling(avg(Price+1)+1) default = strlen('123')+1.5 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit;", + "SELECT *\nFROM\n(\n SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS Column1\n FROM\n (\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, Column1)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(Column1_ali) AS ga, arrayMap(x -> (lengthUTF8('123') + 1.5), range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))) - length(ga))), 1))) AS Column1,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC')), toUInt64(toDateTime64('2016-09-13', 9, 'UTC')), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n ceiling(avg(Price + 1) + 1) AS Column1_ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC')) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC'))) / 86400) * 86400) AS Purchase_ali\n FROM make_series_test_table\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC'))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC')))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n )\n)\nORDER BY\n Supplier DESC NULLS LAST,\n Fruit DESC NULLS LAST" + }, + { + "make_series_test_table | make-series avg(Price) on Purchase from datetime(2016-09-10)+1d to datetime(2016-09-13)+1d step 1d by Supplier, Fruit | order by Supplier, Fruit", + "SELECT *\nFROM\n(\n SELECT\n Supplier,\n Fruit,\n zipped.1 AS Purchase,\n zipped.2 AS avg_\n FROM\n (\n SELECT\n toUInt64(min(Purchase_ali)) AS low,\n toUInt64(max(Purchase_ali)) + 86400 AS high,\n arraySort(arrayZip(Purchase, avg_)) AS zipped,\n Supplier,\n Fruit,\n arrayConcat(groupArray(avg__ali) AS ga, arrayMap(x -> 0, range(0, toUInt32(if((length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC') + toIntervalNanosecond(86400000000000)), toUInt64(toDateTime64('2016-09-13', 9, 'UTC') + toIntervalNanosecond(86400000000000)), toUInt64(86400))) - length(ga)) < 0, 0, length(range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC') + toIntervalNanosecond(86400000000000)), toUInt64(toDateTime64('2016-09-13', 9, 'UTC') + toIntervalNanosecond(86400000000000)), toUInt64(86400))) - length(ga))), 1))) AS avg_,\n arrayDistinct(arrayConcat(groupArray(toDateTime64(Purchase_ali - 0, 9, 'UTC')), arrayMap(x -> toDateTime64(x - 0, 9, 'UTC'), range(toUInt64(toDateTime64('2016-09-10', 9, 'UTC') + toIntervalNanosecond(86400000000000)), toUInt64(toDateTime64('2016-09-13', 9, 'UTC') + toIntervalNanosecond(86400000000000)), toUInt64(86400))))) AS Purchase\n FROM\n (\n SELECT\n Supplier,\n Fruit,\n avg(Price) AS avg__ali,\n toFloat64(toDateTime64('2016-09-10', 9, 'UTC') + toIntervalNanosecond(86400000000000)) + (toInt64((toFloat64(toDateTime64(Purchase, 9, 'UTC')) - toFloat64(toDateTime64('2016-09-10', 9, 'UTC') + toIntervalNanosecond(86400000000000))) / 86400) * 86400) AS Purchase_ali\n FROM make_series_test_table\n WHERE (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) >= toUInt64(toDateTime64('2016-09-10', 9, 'UTC') + toIntervalNanosecond(86400000000000))) AND (toInt64(toFloat64(toDateTime64(Purchase, 9, 'UTC'))) < toUInt64(toDateTime64('2016-09-13', 9, 'UTC') + toIntervalNanosecond(86400000000000)))\n GROUP BY\n Supplier,\n Fruit,\n Purchase_ali\n ORDER BY Purchase_ali ASC\n )\n GROUP BY\n Supplier,\n Fruit\n )\n)\nORDER BY\n Supplier DESC NULLS LAST,\n Fruit DESC NULLS LAST" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Math.cpp b/src/Parsers/tests/KQL/gtest_KQL_Math.cpp new file mode 100644 index 000000000000..8fe53d307157 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Math.cpp @@ -0,0 +1,137 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Math, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print abs(-5)", + "SELECT abs(-5) AS print_0" + }, + { + "print ceiling(-1.1), ceiling(0), ceiling(0.9)", + "SELECT\n ceil(-1.1) AS print_0,\n ceil(0) AS print_1,\n ceil(0.9) AS print_2" + }, + { + "print exp(2);", + "SELECT exp(2) AS print_0" + }, + { + "print exp2(2)", + "SELECT exp2(2) AS print_0" + }, + { + "print exp10(3)", + "SELECT exp10(3) AS print_0" + }, + { + "print log(5)", + "SELECT log(5) AS print_0" + }, + { + "print log2(5)", + "SELECT log2(5) AS print_0" + }, + { + "print log10(5)", + "SELECT log10(5) AS print_0" + }, + { + "print pow(2, 3)", + "SELECT pow(2, 3) AS print_0" + }, + { + "print sqrt(256)", + "SELECT sqrt(256) AS print_0" + }, + { + "print acos(-0.45)", + "SELECT acos(-0.45) AS print_0" + }, + { + "print asin(0.5)", + "SELECT asin(0.5) AS print_0" + }, + { + "print atan(0.5);", + "SELECT atan(0.5) AS print_0" + }, + { + "print atan2(1, -1);", + "SELECT atan2(1, -1) AS print_0" + }, + { + "print cos(-0.45)", + "SELECT cos(-0.45) AS print_0" + }, + { + "print cot(-0.45)", + "SELECT 1 / tan(-0.45) AS print_0" + }, + { + "print degrees(pi()/4)", + "SELECT degrees(pi() / 4) AS print_0" + }, + { + "print gamma(-0.45)", + "SELECT tgamma(-0.45) AS print_0" + }, + { + "print isfinite(1.0/0.0)", + "SELECT isFinite(1. / 0.) AS print_0" + }, + { + "print isinf(1.0/0.0)", + "SELECT isInfinite(1. / 0.) AS print_0" + }, + { + "print loggamma(-0.45)", + "SELECT lgamma(-0.45) AS print_0" + }, + { + "print max_of(10, 1, -3, 17)", + "SELECT arrayReduce('max', [10, 1, -3, 17]) AS print_0" + }, + { + "print min_of(10, 1, -3, 17)", + "SELECT arrayReduce('min', [10, 1, -3, 17]) AS print_0" + }, + { + "print pi()", + "SELECT pi() AS print_0" + }, + { + "print radians(180)", + "SELECT radians(180) AS print_0" + }, + { + "print rand()", + "SELECT if(0 < 2, randCanonical(), moduloOrZero(rand(), 0)) AS print_0" + }, + { + "print rand(1000)", + "SELECT if(1000 < 2, randCanonical(), moduloOrZero(rand(), 1000)) AS print_0" + }, + { + "print rand(0)", + "SELECT if(0 < 2, randCanonical(), moduloOrZero(rand(), 0)) AS print_0" + }, + { + "print round(2.15, 1)", + "SELECT round(2.15, 1) AS print_0" + }, + { + "print sign(-42)", + "SELECT sign(-42) AS print_0" + }, + { + "print sin(-0.45)", + "SELECT sin(-0.45) AS print_0" + }, + { + "print tan(-0.45)", + "SELECT tan(-0.45) AS print_0" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Operators.cpp b/src/Parsers/tests/KQL/gtest_KQL_Operators.cpp new file mode 100644 index 000000000000..e966315909bd --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Operators.cpp @@ -0,0 +1,309 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Operators, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | order by Age desc, FirstName asc", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC NULLS LAST,\n FirstName ASC NULLS FIRST" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC NULLS FIRST,\n FirstName DESC NULLS LAST" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC NULLS LAST,\n FirstName DESC NULLS LAST" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n Occupation,\n count() AS count_\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n Occupation,\n sum(Age) AS sum_Age\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n Occupation,\n avg(Age) AS avg_Age\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n Occupation,\n min(Age) AS min_Age\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n Occupation,\n max(Age) AS max_Age\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + }, + { + "Customers | where Age in ((Customers|project Age|where Age < 30))", + "SELECT *\nFROM Customers\nWHERE Age IN (\n SELECT Age\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Education, 'School'), hasTokenCaseInsensitive(Education, 'School') AND (positionCaseInsensitive(Education, 'School') > 0))" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(Education, 'School'), hasTokenCaseInsensitive(Education, 'School') AND (positionCaseInsensitive(Education, 'School') > 0))" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenOrNull(Education, 'School'), hasToken(Education, 'School') AND (position(Education, 'School') > 0))" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT ifNull(hasTokenOrNull(Education, 'School'), hasToken(Education, 'School') AND (position(Education, 'School') > 0))" + }, + { + "Customers|where Occupation has_any ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Skilled'), hasTokenCaseInsensitive(Occupation, 'Skilled') AND (positionCaseInsensitive(Occupation, 'Skilled') > 0)) OR ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'abcd'), hasTokenCaseInsensitive(Occupation, 'abcd') AND (positionCaseInsensitive(Occupation, 'abcd') > 0))" + }, + { + "Customers|where Occupation has_all ('Skilled','abcd')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Skilled'), hasTokenCaseInsensitive(Occupation, 'Skilled') AND (positionCaseInsensitive(Occupation, 'Skilled') > 0)) AND ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'abcd'), hasTokenCaseInsensitive(Occupation, 'abcd') AND (positionCaseInsensitive(Occupation, 'abcd') > 0))" + }, + { + "Customers|where Occupation has_all (strcat('Skill','ed'),'Manual')", + "SELECT *\nFROM Customers\nWHERE ifNull(hasTokenCaseInsensitiveOrNull(Occupation, concat(ifNull(kql_tostring('Skill'), ''), ifNull(kql_tostring('ed'), ''), '')), hasTokenCaseInsensitive(Occupation, 'concat') AND hasTokenCaseInsensitive(Occupation, 'ifNull') AND hasTokenCaseInsensitive(Occupation, 'kql') AND hasTokenCaseInsensitive(Occupation, 'tostring') AND hasTokenCaseInsensitive(Occupation, 'Skill') AND hasTokenCaseInsensitive(Occupation, 'ifNull') AND hasTokenCaseInsensitive(Occupation, 'kql') AND hasTokenCaseInsensitive(Occupation, 'tostring') AND hasTokenCaseInsensitive(Occupation, 'ed') AND (positionCaseInsensitive(Occupation, concat(ifNull(kql_tostring('Skill'), ''), ifNull(kql_tostring('ed'), ''), '')) > 0)) AND ifNull(hasTokenCaseInsensitiveOrNull(Occupation, 'Manual'), hasTokenCaseInsensitive(Occupation, 'Manual') AND (positionCaseInsensitive(Occupation, 'Manual') > 0))" + }, + { + "Customers | where Occupation == strcat('Pro','fessional') | take 1", + "SELECT *\nFROM Customers\nWHERE Occupation = concat(ifNull(kql_tostring('Pro'), ''), ifNull(kql_tostring('fessional'), ''), '')\nLIMIT 1" + }, + { + "Customers | project countof('The cat sat on the mat', 'at')", + "SELECT kql_count_overlapping_substrings('The cat sat on the mat', 'at') AS Column1\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'normal')", + "SELECT kql_count_overlapping_substrings('The cat sat on the mat', 'at') AS Column1\nFROM Customers" + }, + { + "Customers | project countof('The cat sat on the mat', 'at', 'regex')", + "SELECT countMatches('The cat sat on the mat', 'at') AS Column1\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 10')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 10', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 0) AS Column1\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 20', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 1) AS Column1\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 30')", + "SELECT kql_extract('The price of PINEAPPLE ice cream is 30', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 2) AS Column1\nFROM Customers" + }, + { + "Customers | project extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 40', typeof(int))", + "SELECT accurateCastOrNull(kql_extract('The price of PINEAPPLE ice cream is 40', '(\\b[A-Z]+\\b).+(\\b\\\\d+)', 2), 'Int32') AS Column1\nFROM Customers" + }, + { + "Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 50')", + "SELECT extractAllGroups('The price of PINEAPPLE ice cream is 50', '(\\\\w)(\\\\w+)(\\\\w)') AS Column1\nFROM Customers" + }, + { + "Customers | project split('aa_bb', '_')", + "SELECT if(empty('_'), splitByString(' ', 'aa_bb'), splitByString('_', 'aa_bb')) AS Column1\nFROM Customers" + }, + { + "Customers | project split('aaa_bbb_ccc', '_', 1)", + "SELECT multiIf((length(if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc'))) >= 2) AND (2 > 0), arrayPushBack([], if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc'))[2]), 2 = 0, if(empty('_'), splitByString(' ', 'aaa_bbb_ccc'), splitByString('_', 'aaa_bbb_ccc')), arrayPushBack([], NULL[1])) AS Column1\nFROM Customers" + }, + { + "Customers | project strcat_delim('-', '1', '2', 'A')", + "SELECT concat(ifNull(kql_tostring('1'), ''), '-', ifNull(kql_tostring('2'), ''), '-', ifNull(kql_tostring('A'), '')) AS Column1\nFROM Customers" + }, + { + "print x=1, s=strcat('Hello', ', ', 'World!')", + "SELECT\n 1 AS x,\n concat(ifNull(kql_tostring('Hello'), ''), ifNull(kql_tostring(', '), ''), ifNull(kql_tostring('World!'), ''), '') AS s" + }, + { + "print parse_urlquery('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT concat('{', concat('\"Query Parameters\":', concat('{\"', replace(replace(if(position('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment', '?') > 0, queryString('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), 'https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment'), '=', '\":\"'), '&', '\",\"'), '\"}')), '}') AS print_0" + }, + { + "print strcmp('a','b')", + "SELECT multiIf('a' = 'b', 0, 'a' < 'b', -1, 1) AS print_0" + }, + { + "Customers | summarize t = make_list(FirstName) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, FirstName IS NOT NULL) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_list_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set(FirstName, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArray(10)(FirstName) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "Customers | summarize t = make_set_if(FirstName, Age > 10, 10) by FirstName", + "SELECT\n FirstName,\n groupUniqArrayIf(10)(FirstName, Age > 10) AS t\nFROM Customers\nGROUP BY FirstName" + }, + { + "print output = dynamic([1, 2, 3])", + "SELECT [1, 2, 3] AS output" + }, + { + "print output = dynamic(['a', 'b', 'c'])", + "SELECT ['a', 'b', 'c'] AS output" + }, + { + "T | extend duration = endTime - startTime", + "SELECT\n * EXCEPT duration,\n endTime - startTime AS duration\nFROM T" + }, + { + "T |project endTime, startTime | extend duration = endTime - startTime", + "SELECT\n * EXCEPT duration,\n endTime - startTime AS duration\nFROM\n(\n SELECT\n endTime,\n startTime\n FROM T\n)" + }, + { + "T | extend c =c*2, b-a, d = a +b , a*b", + "SELECT\n * EXCEPT (c, Column1, d, Column2),\n c * 2 AS c,\n b - a AS Column1,\n a + b AS d,\n a * b AS Column2\nFROM T" + }, + { + "print 5, 4 | extend Column3 = 'a', 'b', 'c' | extend 'd'", + "SELECT\n * EXCEPT Column4,\n 'd' AS Column4\nFROM\n(\n SELECT\n * EXCEPT (Column3, Column1, Column2),\n 'a' AS Column3,\n 'b' AS Column1,\n 'c' AS Column2\n FROM\n (\n SELECT\n 5 AS print_0,\n 4 AS print_1\n )\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_ProjectAway.cpp b/src/Parsers/tests/KQL/gtest_KQL_ProjectAway.cpp new file mode 100644 index 000000000000..41c68b365aa0 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_ProjectAway.cpp @@ -0,0 +1,49 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_ProjectAway, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | project-away FirstName", + "SELECT * EXCEPT FirstName\nFROM Customers" + }, + { + "Customers | project-away FirstName, LastName", + "SELECT * EXCEPT (FirstName, LastName)\nFROM Customers" + }, + { + "Customers | project-away *Name", + "SELECT * EXCEPT '^.*Name$'\nFROM Customers" + }, + { + "Customers | project-away *Name, *tion", + "SELECT * EXCEPT '^.*Name$'\nFROM\n(\n SELECT * EXCEPT '^.*tion$'\n FROM Customers\n)" + }, + { + "Customers | project-away *Name, Age", + "SELECT * EXCEPT Age\nFROM\n(\n SELECT * EXCEPT '^.*Name$'\n FROM Customers\n)" + }, + { + "Customers | project-away *Name, Age, Education", + "SELECT * EXCEPT (Age, Education)\nFROM\n(\n SELECT * EXCEPT '^.*Name$'\n FROM Customers\n)" + }, + { + "Customers | project-away *irstName, Age, *astName, Education", + "SELECT * EXCEPT (Age, Education)\nFROM\n(\n SELECT * EXCEPT '^.*astName$'\n FROM\n (\n SELECT * EXCEPT '^.*irstName$'\n FROM Customers\n )\n)" + }, + { + "Customers | where Age< 30 | limit 2 | project-away FirstName", + "SELECT * EXCEPT FirstName\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n LIMIT 2\n)" + }, + { + "Customers|summarize sum(Age), avg(Age) by FirstName | project-away sum_Age", + "SELECT * EXCEPT sum_Age\nFROM\n(\n SELECT\n FirstName,\n sum(Age) AS sum_Age,\n avg(Age) AS avg_Age\n FROM Customers\n GROUP BY FirstName\n)" + }, + { + "Customers|extend FullName = strcat(FirstName,' ',LastName) | project-away FirstName, LastName", + "SELECT * EXCEPT (FirstName, LastName)\nFROM\n(\n SELECT\n * EXCEPT FullName,\n concat(ifNull(kql_tostring(FirstName), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(LastName), ''), '') AS FullName\n FROM Customers\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_ProjectRename.cpp b/src/Parsers/tests/KQL/gtest_KQL_ProjectRename.cpp new file mode 100644 index 000000000000..e67727136f0f --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_ProjectRename.cpp @@ -0,0 +1,21 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_ProjectRename, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | project-rename FN=FirstName", + "SELECT\n * EXCEPT FirstName,\n FirstName AS FN\nFROM Customers" + }, + { + "print FirstName='FN', LastName='LN' | project-rename FN=FirstName, LN=LastName", + "SELECT\n * EXCEPT (FirstName, LastName),\n FirstName AS FN,\n LastName AS LN\nFROM\n(\n SELECT\n 'FN' AS FirstName,\n 'LN' AS LastName\n)" + }, + { + "print FirstName='FN', LastName='LN' | project-rename FN=FirstName, LN=LastName, LastName", + "SELECT\n * EXCEPT (FirstName, LastName),\n FirstName AS FN,\n LastName AS LN\nFROM\n(\n SELECT\n 'FN' AS FirstName,\n 'LN' AS LastName\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_Sort.cpp b/src/Parsers/tests/KQL/gtest_KQL_Sort.cpp new file mode 100644 index 000000000000..e91bfbdd63b9 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_Sort.cpp @@ -0,0 +1,109 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Sort, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | order by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | order by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST" + }, + { + "Customers | order by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | order by FirstName asc nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST" + }, + { + "Customers | order by FirstName asc nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS LAST" + }, + { + "Customers | order by FirstName desc nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | order by FirstName desc nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | order by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | order by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | order by FirstName, Age", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST" + }, + { + "Customers | order by FirstName asc, Age desc", + "SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS FIRST,\n Age DESC NULLS LAST" + }, + { + "Customers | order by FirstName desc, Age asc", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age ASC NULLS FIRST" + }, + { + "Customers | order by FirstName asc nulls first, Age asc nulls first", + "SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS FIRST,\n Age ASC NULLS FIRST" + }, + { + "Customers | order by FirstName asc nulls last, Age asc nulls last", + "SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS LAST,\n Age ASC NULLS LAST" + }, + { + "Customers | order by FirstName desc nulls first, Age desc nulls first", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS FIRST,\n Age DESC NULLS FIRST" + }, + { + "Customers | order by FirstName desc nulls last, Age desc nulls last", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST" + }, + { + "Customers | order by FirstName nulls first, Age nulls first", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS FIRST,\n Age DESC NULLS FIRST" + }, + { + "Customers | order by FirstName nulls last, Age nulls last", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST" + }, + { + "Customers | order by FirstName, Age asc nulls last, LastName nulls first", + "SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age ASC NULLS LAST,\n LastName DESC NULLS FIRST" + }, + { + "Customers | order by FirstName ASC", + "throws" + }, + { + "Customers | order by FirstName DESC", + "throws" + }, + { + "Customers | order by FirstName nulls", + "throws" + }, + { + "Customers | order by FirstName nulls middle", + "throws" + }, + { + "Customers | order by FirstName asc desc", + "throws" + }, + { + "Customers | order by FirstName nulls first desc", + "throws" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp new file mode 100644 index 000000000000..214818985590 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp @@ -0,0 +1,287 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_IP, ParserRegexTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print res = base64_decode_tostring('S3VzdG8====')", + R"(SELECT IF\(\(length\(\'S3VzdG8====\'\) % 4\) != 0, NULL, IF\(countMatches\(substring\(\'S3VzdG8====\', 1, length\(\'S3VzdG8====\'\) - 2\), \'=\'\) > 0, NULL, IF\(isValidUTF8\(tryBase64Decode\(\'S3VzdG8====\'\) AS decoded_str_\d+\), decoded_str_\d+, NULL\)\)\) AS res)" + }, +}))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_String, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print base64_encode_fromguid(A)", + "SELECT if(toTypeName(A) NOT IN ['UUID', 'Nullable(UUID)'], toString(throwIf(true, 'Expected guid as argument')), base64Encode(UUIDStringToNum(toString(A), 2))) AS print_0" + }, + { + "print base64_decode_toguid(A)", + "SELECT toUUIDOrNull(UUIDNumToString(toFixedString(base64Decode(A), 16), 2)) AS print_0" + }, + { + "print base64_decode_toarray('S3VzdG8=')", + "SELECT IF((length('S3VzdG8=') % 4) != 0, [NULL], IF(length(tryBase64Decode('S3VzdG8=')) = 0, [NULL], IF(countMatches(substring('S3VzdG8=', 1, length('S3VzdG8=') - 2), '=') > 0, [NULL], arrayMap(x -> reinterpretAsUInt8(x), splitByRegexp('', base64Decode(assumeNotNull(IF(length(tryBase64Decode('S3VzdG8=')) = 0, '', 'S3VzdG8=')))))))) AS print_0" + }, + { + "print replace_regex('Hello, World!', '.', '\\0\\0')", + "SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS print_0" + }, + { + "print has_any_index('this is an example', dynamic(['this', 'example'])) ", + "SELECT if(empty(['this', 'example']), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty(['this', 'example']), [''], arrayMap(x -> toString(x), ['this', 'example']))), 1) - 1) AS print_0" + }, + { + "print has_any_index('this is an example', dynamic([]))", + "SELECT if(empty([]), -1, indexOf(arrayMap(x -> (x IN splitByChar(' ', 'this is an example')), if(empty([]), [''], arrayMap(x -> toString(x), []))), 1) - 1) AS print_0" + }, + { + "print translate('krasp', 'otsku', 'spark')", + "SELECT if(length('otsku') = 0, '', translate('spark', 'krasp', multiIf(length('otsku') = 0, 'krasp', (length('krasp') - length('otsku')) > 0, concat('otsku', repeat(substr('otsku', length('otsku'), 1), toUInt16(length('krasp') - length('otsku')))), (length('krasp') - length('otsku')) < 0, substr('otsku', 1, length('krasp')), 'otsku'))) AS print_0" + }, + { + "print trim_start('[^\\w]+', strcat('- ','Te st1','// $'))", + "SELECT replaceRegexpOne(concat(ifNull(kql_tostring('- '), ''), ifNull(kql_tostring('Te st1'), ''), ifNull(kql_tostring('// $'), ''), ''), concat('^', '[^\\\\w]+'), '') AS print_0" + }, + { + "print trim_end('.com', 'bing.com')", + "SELECT replaceRegexpOne('bing.com', concat('.com', '$'), '') AS print_0" + }, + { + "print trim('--', '--https://bing.com--')", + "SELECT replaceRegexpOne(replaceRegexpOne('--https://bing.com--', concat('--', '$'), ''), concat('^', '--'), '') AS print_0" + }, + { + "print bool(1)", + "SELECT if((toTypeName(1) = 'IntervalNanosecond') OR ((accurateCastOrNull(1, 'Bool') IS NULL) != (1 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Bool literal'), 'Bool'), accurateCastOrNull(1, 'Bool')) AS print_0" + }, + { + "print guid(74be27de-1e4e-49d9-b579-fe0b331d3642)", + "SELECT toUUIDOrNull('74be27de-1e4e-49d9-b579-fe0b331d3642') AS print_0" + }, + { + "print guid('74be27de-1e4e-49d9-b579-fe0b331d3642')", + "SELECT toUUIDOrNull('74be27de-1e4e-49d9-b579-fe0b331d3642') AS print_0" + }, + { + "print guid('74be27de1e4e49d9b579fe0b331d3642')", + "SELECT toUUIDOrNull('74be27de1e4e49d9b579fe0b331d3642') AS print_0" + }, + { + "print int(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Int32') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Int32 literal'), 'Int32'), accurateCastOrNull(32.5, 'Int32')) AS print_0" + }, + { + "print long(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Int64') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Int64 literal'), 'Int64'), accurateCastOrNull(32.5, 'Int64')) AS print_0" + }, + { + "print real(32.5)", + "SELECT if((toTypeName(32.5) = 'IntervalNanosecond') OR ((accurateCastOrNull(32.5, 'Float64') IS NULL) != (32.5 IS NULL)), accurateCastOrNull(throwIf(true, 'Failed to parse Float64 literal'), 'Float64'), accurateCastOrNull(32.5, 'Float64')) AS print_0" + }, + { + "print time('1.22:34:8.128')", + "SELECT toIntervalNanosecond(167648128000000) AS print_0" + }, + { + "print time('1d')", + "SELECT toIntervalNanosecond(86400000000000) AS print_0" + }, + { + "print time('1.5d')", + "SELECT toIntervalNanosecond(129600000000000) AS print_0" + }, + { + "print timespan('1.5d')", + "SELECT toIntervalNanosecond(129600000000000) AS print_0" + }, + { + "print time('1 d')", + "SELECT toIntervalNanosecond(86400000000000) AS print_0" + }, + { + "print time('1.5 d')", + "SELECT toIntervalNanosecond(129600000000000) AS print_0" + }, + { + "print time(1 h)", + "SELECT toIntervalNanosecond(3600000000000) AS print_0" + }, + { + "print time(1 sec)", + "SELECT toIntervalNanosecond(1000000000) AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(bool));", + "SELECT accurateCastOrNull(toInt64OrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1)), 'Boolean') AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(date));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'DateTime') AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(guid));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'UUID') AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(int));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Int32') AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(long));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Int64') AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(real));", + "SELECT accurateCastOrNull(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), 'Float64') AS print_0" + }, + { + "print extract('x=([0-9.]+)', 1, 'hello x=456|wo' , typeof(decimal));", + "SELECT toDecimal128OrNull(if(countSubstrings(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), '.') > 1, NULL, kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1)), length(substr(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), position(kql_extract('hello x=456|wo', 'x=([0-9.]+)', 1), '.') + 1))) AS print_0" + }, + { + "print parse_version('1.2.3.40')", + "SELECT if((length(splitByChar('.', '1.2.3.40')) > 4) OR (length(splitByChar('.', '1.2.3.40')) < 1) OR (match('1.2.3.40', '.*[a-zA-Z]+.*') = 1) OR empty('1.2.3.40') OR hasAll(splitByChar('.', '1.2.3.40'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '1.2.3.40'), 4)))), 8), 0)) AS print_0" + }, + { + "print parse_version('1')", + "SELECT if((length(splitByChar('.', '1')) > 4) OR (length(splitByChar('.', '1')) < 1) OR (match('1', '.*[a-zA-Z]+.*') = 1) OR empty('1') OR hasAll(splitByChar('.', '1'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '1'), 4)))), 8), 0)) AS print_0" + }, + { + "print parse_version('')", + "SELECT if((length(splitByChar('.', '')) > 4) OR (length(splitByChar('.', '')) < 1) OR (match('', '.*[a-zA-Z]+.*') = 1) OR empty('') OR hasAll(splitByChar('.', ''), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', ''), 4)))), 8), 0)) AS print_0" + }, + { + "print parse_version('...')", + "SELECT if((length(splitByChar('.', '...')) > 4) OR (length(splitByChar('.', '...')) < 1) OR (match('...', '.*[a-zA-Z]+.*') = 1) OR empty('...') OR hasAll(splitByChar('.', '...'), ['']), toDecimal128OrNull('NULL', 0), toDecimal128OrNull(substring(arrayStringConcat(arrayMap(x -> leftPad(x, 8, '0'), arrayMap(x -> if(empty(x), '0', x), arrayResize(splitByChar('.', '...'), 4)))), 8), 0)) AS print_0" + }, + { + "print parse_json( dynamic([1, 2, 3]))", + "SELECT [1, 2, 3] AS print_0" + }, + { + "print parse_json('{\"a\":123.5, \"b\":\"{\\\"c\\\":456}\"}')", + "SELECT if(isValidJSON('{\"a\":123.5, \"b\":\"{\"c\":456}\"}'), JSON_QUERY('{\"a\":123.5, \"b\":\"{\"c\":456}\"}', '$'), toJSONString('{\"a\":123.5, \"b\":\"{\"c\":456}\"}')) AS print_0" + }, + { + "print extract_json( '$.a' , '{\"a\":123, \"b\":\"{\"c\":456}\"}' , typeof(long))", + "SELECT accurateCastOrNull(JSON_VALUE('{\"a\":123, \"b\":\"{\"c\":456}\"}', '$.a'), 'Int64') AS print_0" + }, + { + "print extract_json( '$.a' , '{\"a\":123, \"b\":\"{\"c\":456}\"}' , typeof(bool))", + "SELECT if(toInt64OrNull(JSON_VALUE('{\"a\":123, \"b\":\"{\"c\":456}\"}', '$.a')) > 0, true, false) AS print_0" + }, + { + "print parse_command_line('echo \"hello world!\" print$?', 'windows')", + "SELECT if(empty('echo \"hello world!\" print$?') OR hasAll(splitByChar(' ', 'echo \"hello world!\" print$?'), ['']), arrayMap(x -> NULL, splitByChar(' ', '')), splitByChar(' ', 'echo \"hello world!\" print$?')) AS print_0" + }, + { + "print reverse(123)", + "SELECT reverse(ifNull(kql_tostring(123), '')) AS print_0" + }, + { + "print reverse(123.34)", + "SELECT reverse(ifNull(kql_tostring(123.34), '')) AS print_0" + }, + { + "print reverse('clickhouse')", + "SELECT reverse(ifNull(kql_tostring('clickhouse'), '')) AS print_0" + }, + { + "print parse_csv('aa,b,cc')", + "SELECT if(CAST(position('aa,b,cc', '\\n'), 'UInt8'), splitByChar(',', substring('aa,b,cc', 1, position('aa,b,cc', '\\n') - 1)), splitByChar(',', substring('aa,b,cc', 1, length('aa,b,cc')))) AS print_0" + }, + { + "print parse_csv('record1,a,b,c\nrecord2,x,y,z')", + "SELECT if(CAST(position('record1,a,b,c\\nrecord2,x,y,z', '\\n'), 'UInt8'), splitByChar(',', substring('record1,a,b,c\\nrecord2,x,y,z', 1, position('record1,a,b,c\\nrecord2,x,y,z', '\\n') - 1)), splitByChar(',', substring('record1,a,b,c\\nrecord2,x,y,z', 1, length('record1,a,b,c\\nrecord2,x,y,z')))) AS print_0" + }, + { + "Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName", + "SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC NULLS LAST" + }, + { + "print indexof('abcdefg','cde')", + "SELECT kql_indexof(kql_tostring('abcdefg'), kql_tostring('cde'), 0, -1, 1) AS print_0" + }, + { + "print indexof('abcdefg','cde',0,3)", + "SELECT kql_indexof(kql_tostring('abcdefg'), kql_tostring('cde'), 0, 3, 1) AS print_0" + }, + { + "print indexof('abcdefg','cde',1,2)", + "SELECT kql_indexof(kql_tostring('abcdefg'), kql_tostring('cde'), 1, 2, 1) AS print_0" + }, + { + "print indexof('abcdefg','cde',-5)", + "SELECT kql_indexof(kql_tostring('abcdefg'), kql_tostring('cde'), -5, -1, 1) AS print_0" + }, + { + "print indexof(1234567,5,1,4)", + "SELECT kql_indexof(kql_tostring(1234567), kql_tostring(5), 1, 4, 1) AS print_0" + }, + { + "print indexof('abcdefg','cde',2,-1)", + "SELECT kql_indexof(kql_tostring('abcdefg'), kql_tostring('cde'), 2, -1, 1) AS print_0" + }, + { + "print indexof('abcdefgabcdefg', 'cde', 3)", + "SELECT kql_indexof(kql_tostring('abcdefgabcdefg'), kql_tostring('cde'), 3, -1, 1) AS print_0" + }, + { + "print indexof('abcdefgabcdefg', 'cde', 1, 13, 3) ", + "SELECT kql_indexof(kql_tostring('abcdefgabcdefg'), kql_tostring('cde'), 1, 13, 3) AS print_0" + }, + { + "print indexof(1d, '.')", + "SELECT kql_indexof(kql_tostring(toIntervalNanosecond(86400000000000)), kql_tostring('.'), 0, -1, 1) AS print_0" + }, + { + "print strrep(3s,2,' ')", + "SELECT substr(repeat(concat(ifNull(kql_tostring(toIntervalNanosecond(3000000000)), ''), ' '), 2), 1, length(repeat(concat(ifNull(kql_tostring(toIntervalNanosecond(3000000000)), ''), ' '), 2)) - length(' ')) AS print_0" + }, + { + "print isempty(1.12345)", + "SELECT empty(ifNull(kql_tostring(1.12345), '')) AS print_0" + }, + { + "print isnotempty('1.12345')", + "SELECT notEmpty(ifNull(kql_tostring('1.12345'), '')) AS print_0" + }, + { + "print string_size('⒦⒰⒮⒯⒪')", + "SELECT length('⒦⒰⒮⒯⒪') AS print_0" + }, + { + "print to_utf8('⒦⒰⒮⒯⒪')", + "SELECT arrayMap(x -> if(substring(bin(x), 1, 1) = '0', reinterpretAsInt64(reverse(UNBIN(substring(bin(x), 2, 7)))), if(substring(bin(x), 1, 3) = '110', reinterpretAsInt64(reverse(UNBIN(concat(substring(bin(x), 4, 5), substring(bin(x), 11, 6))))), if(substring(bin(x), 1, 4) = '1110', reinterpretAsInt64(reverse(UNBIN(concat(substring(bin(x), 5, 4), substring(bin(x), 11, 6), substring(bin(x), 19, 6))))), if(substring(bin(x), 1, 5) = '11110', reinterpretAsInt64(reverse(UNBIN(concat(substring(bin(x), 6, 3), substring(bin(x), 11, 6), substring(bin(x), 19, 6), substring(bin(x), 27, 6))))), -1)))), ngrams('⒦⒰⒮⒯⒪', 1)) AS print_0" + }, + { + "print new_guid()", + "SELECT generateUUIDv4() AS print_0" + }, + { + "print str = make_string(dynamic([75, 117, 115, 116, 111]))", + "SELECT kql_make_string([75, 117, 115, 116, 111]) AS str" + }, + { + "MyTable | project t = make_string(col_arr, col1, col2)", + "SELECT kql_make_string(col_arr, col1, col2) AS t\nFROM MyTable" + }, + { + "print parse_url('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment')", + "SELECT kql_parseurl('https://john:123@google.com:1234/this/is/a/path?k1=v1&k2=v2#fragment') AS print_0", + }, + { + "table | project indexof_regex(A, B, C, D, E)", + "SELECT kql_indexof_regex(A, B, C, D, E) AS Column1\nFROM table" + }, + { + "Customers | project t = isascii(FirstName)", + "SELECT NOT toBool(arrayExists(x -> ((x < 0) OR (x > 127)), arrayMap(x -> ascii(x), splitByString('', assumeNotNull(FirstName))))) AS t\nFROM Customers" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp b/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp new file mode 100644 index 000000000000..312da51cc5d5 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp @@ -0,0 +1,57 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | top 5 by Age", + "SELECT *\nFROM Customers\nORDER BY Age DESC NULLS LAST\nLIMIT 5" + }, + { + "Customers | top 5 by Age desc", + "SELECT *\nFROM Customers\nORDER BY Age DESC NULLS LAST\nLIMIT 5" + }, + { + "Customers | top 5 by Age asc", + "SELECT *\nFROM Customers\nORDER BY Age ASC NULLS FIRST\nLIMIT 5" + }, + { + "Customers | top 5 by FirstName desc nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST\nLIMIT 5" + }, + { + "Customers | top 5 by FirstName desc nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST\nLIMIT 5" + }, + { + "Customers | top 5 by Age | top 2 by FirstName", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC NULLS LAST\n LIMIT 5\n)\nORDER BY FirstName DESC NULLS LAST\nLIMIT 2" + }, + { + "Customers| top-hitters a = 3 of Age by extra", + "SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC NULLS LAST\nLIMIT 3 AS a" + }, + { + "Customers| top-hitters 3 of Age", + "SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC NULLS LAST\nLIMIT 3" + }, + { + "Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age", + "SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC NULLS LAST\nLIMIT 2" + }, + { + "Customers| top-hitters 3 of Age by extra | where Age > 30", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n)\nWHERE Age > 30" + }, + { + "Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200" + }, + { + "Customers| top-hitters 3 of Age | where approximate_count_Age > 2", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC NULLS LAST\n LIMIT 3\n)\nWHERE approximate_count_Age > 2" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp b/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp new file mode 100644 index 000000000000..6da8f7ff42bf --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_TopNested.cpp @@ -0,0 +1,61 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopNested, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "sales | top-nested 3 of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n )\nSELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\nFROM tb1_normal" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount), top-nested 2 of salesdate by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb2_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate AS salesdate,\n sum(amount) AS aggregated_salesdate\n FROM tb1_normal\n INNER JOIN source_table AS join1 USING (region, salesperson)\n GROUP BY\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate\n ),\n tb2_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate,\n ROW_NUMBER() OVER (PARTITION BY region, salesperson ORDER BY aggregated_salesdate DESC) AS row2\n FROM tb2_prev\n ),\n tb2_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_partition\n WHERE row2 <= 2\n )\nSELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\nFROM tb2_normal" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region\n FROM tb0_normal\n UNION ALL\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region\n FROM tb0_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n NULL AS salesperson,\n NULL AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n NULL AS salesperson,\n NULL AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb0_all_others AS\n (\n SELECT\n NULL AS region,\n NULL AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson\n FROM tb0_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson\n FROM tb1_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb0_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount), top-nested 2 of salesdate with others = 'all other date' by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n tb1_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson AS salesperson,\n sum(amount) AS aggregated_salesperson\n FROM tb0_normal\n INNER JOIN source_table AS join1 USING (region)\n GROUP BY\n region,\n aggregated_region,\n salesperson\n ),\n tb1_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n ROW_NUMBER() OVER (PARTITION BY region ORDER BY aggregated_salesperson DESC) AS row1\n FROM tb1_prev\n ),\n tb1_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson\n FROM tb1_partition\n WHERE row1 <= 2\n ),\n tb1_others_prev AS\n (\n SELECT\n region,\n sum(amount) AS aggregated_salesperson_value\n FROM source_table\n LEFT JOIN tb1_normal USING (region, salesperson)\n WHERE empty(tb1_normal.salesperson) AND (source_table.region IN (\n SELECT region\n FROM tb1_normal\n ))\n GROUP BY region\n ),\n tb1_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n aggregated_salesperson_value\n FROM tb1_others_prev\n RIGHT JOIN tb1_normal USING (region)\n ),\n tb2_prev AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate AS salesdate,\n sum(amount) AS aggregated_salesdate\n FROM tb1_normal\n INNER JOIN source_table AS join1 USING (region, salesperson)\n GROUP BY\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate\n ),\n tb2_partition AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate,\n ROW_NUMBER() OVER (PARTITION BY region, salesperson ORDER BY aggregated_salesdate DESC) AS row2\n FROM tb2_prev\n ),\n tb2_normal AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_partition\n WHERE row2 <= 2\n ),\n tb2_others_prev AS\n (\n SELECT\n region,\n salesperson,\n sum(amount) AS aggregated_salesdate_value\n FROM source_table\n LEFT JOIN tb2_normal USING (region, salesperson, salesdate)\n WHERE empty(tb2_normal.salesdate) AND (source_table.region IN (\n SELECT region\n FROM tb2_normal\n )) AND (source_table.salesperson IN (\n SELECT salesperson\n FROM tb2_normal\n ))\n GROUP BY\n region,\n salesperson\n ),\n tb2_others AS\n (\n SELECT DISTINCT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n aggregated_salesdate_value\n FROM tb2_others_prev\n RIGHT JOIN tb2_normal USING (region, salesperson)\n ),\n tb0_all_others AS\n (\n SELECT\n 'all other region' AS region,\n aggregated_region_value AS aggregated_region,\n 'all other person' AS salesperson,\n aggregated_region_value AS aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_region_value AS aggregated_salesdate\n FROM tb0_others\n ),\n tb1_all_others AS\n (\n SELECT\n region,\n aggregated_region,\n 'all other person' AS salesperson,\n aggregated_salesperson_value AS aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_salesperson_value AS aggregated_salesdate\n FROM tb1_others\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb2_normal\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n 'all other date' AS salesdate,\n aggregated_salesdate_value AS aggregated_salesdate\n FROM tb2_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb0_all_others\n UNION ALL\n SELECT\n region,\n aggregated_region,\n salesperson,\n aggregated_salesperson,\n salesdate,\n aggregated_salesdate\n FROM tb1_all_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested 3 of region by sum(amount)*2 + 5", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n (sum(amount) * 2) + 5 AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested strlen('abc') of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT lengthUTF8('abc')\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested 3 of region with others = strcat('all other',' region') by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n LIMIT 3\n ),\n tb0_others AS\n (\n SELECT sum(amount) AS aggregated_region_value\n FROM source_table\n WHERE region NOT IN (\n SELECT region\n FROM tb0_normal\n )\n ),\n last_query AS\n (\n SELECT\n region,\n aggregated_region\n FROM tb0_normal\n UNION ALL\n SELECT\n concat(ifNull(kql_tostring('all other'), ''), ifNull(kql_tostring(' region'), ''), '') AS region,\n aggregated_region_value AS aggregated_region\n FROM tb0_others\n )\nSELECT *\nFROM last_query" + }, + { + "sales | top-nested of region by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n region AS region,\n sum(amount) AS aggregated_region\n FROM source_table\n GROUP BY region\n ORDER BY aggregated_region DESC\n )\nSELECT\n region,\n aggregated_region\nFROM tb0_normal" + }, + { + "sales | top-nested of substring(region,0,1) by sum(amount)", + "WITH\n source_table AS\n (\n SELECT *\n FROM sales\n ),\n tb0_normal AS\n (\n SELECT\n if(toInt64(length(region)) <= 0, '', substr(region, (((0 % toInt64(length(region))) + toInt64(length(region))) % toInt64(length(region))) + 1, 1)) AS Column1,\n sum(amount) AS aggregated_Column1\n FROM source_table\n GROUP BY Column1\n ORDER BY aggregated_Column1 DESC\n )\nSELECT\n Column1,\n aggregated_Column1\nFROM tb0_normal" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp new file mode 100644 index 000000000000..b577e0a8607c --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dateTimeFunctions.cpp @@ -0,0 +1,230 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Datetime, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print week_of_year(datetime(2020-12-31))", + "SELECT toWeek(kql_datetime('2020-12-31'), 3, 'UTC') AS print_0" + }, + { + "print startofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), -1)) AS print_0" + }, + { + "print startofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), -1)) AS print_0" + }, + { + "print startofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), -1)) AS print_0" + }, + { + "print startofyear(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), -1)) AS print_0" + }, + { + "print monthofyear(datetime(2015-12-14))", + "SELECT toMonth(kql_datetime('2015-12-14')) AS print_0" + }, + { + "print hourofday(datetime(2015-12-14 10:54:00))", + "SELECT toHour(kql_datetime('2015-12-14 10:54:00')) AS print_0" + }, + { + "print getyear(datetime(2015-10-12))", + "SELECT toYear(kql_datetime('2015-10-12')) AS print_0" + }, + { + "print getmonth(datetime(2015-10-12))", + "SELECT toMonth(kql_datetime('2015-10-12')) AS print_0" + }, + { + "print dayofyear(datetime(2015-10-12))", + "SELECT toDayOfYear(kql_datetime('2015-10-12')) AS print_0" + }, + { + "print dayofmonth(datetime(2015-10-12))", + "SELECT toDayOfMonth(kql_datetime('2015-10-12')) AS print_0" + }, + { + "print unixtime_seconds_todatetime(1546300899)", + "SELECT if(toTypeName(assumeNotNull(1546300899)) IN ['Int32', 'Int64', 'Float64', 'UInt32', 'UInt64'], kql_todatetime(1546300899), kql_todatetime(throwIf(true, 'unixtime_seconds_todatetime only accepts int, long and double type of arguments'))) AS print_0" + }, + { + "print dayofweek(datetime(2015-12-20))", + "SELECT (toDayOfWeek(kql_datetime('2015-12-20')) % 7) * toIntervalNanosecond(86400000000000) AS print_0" + }, + { + "print now()", + "SELECT now64(9, 'UTC') AS print_0" + }, + { + "print now(1d)", + "SELECT now64(9, 'UTC') + toIntervalNanosecond(86400000000000) AS print_0" + }, + { + "print ago(2d)", + "SELECT now64(9, 'UTC') + (-1 * toIntervalNanosecond(172800000000000)) AS print_0" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofday(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofmonth(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addMonths(toStartOfMonth(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), -1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofweek(datetime(2017-01-01 10:10:17), 1)", + "SELECT kql_todatetime(addWeeks(toStartOfWeek(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), -1) ", + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), -1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print endofyear(datetime(2017-01-01 10:10:17), 1)" , + "SELECT kql_todatetime(addYears(toStartOfYear(kql_datetime('2017-01-01 10:10:17')), 1 + 1)) - toIntervalNanosecond(100) AS print_0" + }, + { + "print make_datetime(2017,10,01)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((0 >= 0) AND (0 <= 59)) AND ((0 >= 0) AND (0 <= 59)) AND (0 >= 0) AND (0 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 0, 0, truncate(0), (0 - truncate(0)) * 10000000., 7, 'UTC')), 9, 'UTC'), NULL) AS print_0" + }, + { + "print make_datetime(2017,10,01,12,10)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((12 >= 0) AND (12 <= 59)) AND ((10 >= 0) AND (10 <= 59)) AND (0 >= 0) AND (0 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 12, 10, truncate(0), (0 - truncate(0)) * 10000000., 7, 'UTC')), 9, 'UTC'), NULL) AS print_0" + }, + { + "print make_datetime(2017,10,01,12,11,0.1234567)", + "SELECT if(((2017 >= 1900) AND (2017 <= 2261)) AND ((10 >= 1) AND (10 <= 12)) AND ((12 >= 0) AND (12 <= 59)) AND ((11 >= 0) AND (11 <= 59)) AND (0.1234567 >= 0) AND (0.1234567 < 60) AND (toModifiedJulianDayOrNull(concat(leftPad(toString(2017), 4, '0'), '-', leftPad(toString(10), 2, '0'), '-', leftPad(toString(1), 2, '0'))) IS NOT NULL), toDateTime64OrNull(toString(makeDateTime64(2017, 10, 1, 12, 11, truncate(0.1234567), (0.1234567 - truncate(0.1234567)) * 10000000., 7, 'UTC')), 9, 'UTC'), NULL) AS print_0" + }, + { + "print unixtime_microseconds_todatetime(1546300800000000)", + "SELECT kql_todatetime(fromUnixTimestamp64Micro(1546300800000000, 'UTC')) AS print_0" + }, + { + "print unixtime_milliseconds_todatetime(1546300800000)", + "SELECT kql_todatetime(fromUnixTimestamp64Milli(1546300800000, 'UTC')) AS print_0" + }, + { + "print unixtime_nanoseconds_todatetime(1546300800000000000)", + "SELECT kql_todatetime(fromUnixTimestamp64Nano(1546300800000000000, 'UTC')) AS print_0" + }, + { + "print datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31))", + "SELECT dateDiff('year', kql_datetime('2000-12-31'), kql_datetime('2017-01-01')) AS print_0" + }, + { + "print datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59))", + "SELECT dateDiff('minute', kql_datetime('2017-10-30 23:00:59'), kql_datetime('2017-10-30 23:05:01')) AS print_0" + }, + { + "print datetime(null)", + "SELECT kql_datetime(NULL) AS print_0" + }, + { + "print datetime('2014-05-25T08:20:03.123456Z')", + "SELECT kql_datetime('2014-05-25T08:20:03.123456Z') AS print_0" + }, + { + "print datetime(2015-12-14 18:54)", + "SELECT kql_datetime('2015-12-14 18:54') AS print_0" + }, + { + "print datetime(2015-12-31 23:59:59.9)", + "SELECT kql_datetime('2015-12-31 23:59:59.9') AS print_0" + }, + { + "print datetime(\"2015-12-31 23:59:59.9\")", + "SELECT kql_datetime('2015-12-31 23:59:59.9') AS print_0" + }, + { + "print datetime('2015-12-31 23:59:59.9')", + "SELECT kql_datetime('2015-12-31 23:59:59.9') AS print_0" + }, + { + "print make_timespan(67,12,30,59.9799)", + "SELECT (((67 * toIntervalNanosecond(86400000000000)) + (12 * toIntervalNanosecond(3600000000000))) + (30 * toIntervalNanosecond(60000000000))) + (59.9799 * toIntervalNanosecond(1000000000)) AS print_0" + }, + { + "print todatetime('2014-05-25T08:20:03.123456Z')", + "SELECT kql_todatetime('2014-05-25T08:20:03.123456Z') AS print_0" + }, + { + "print format_datetime(todatetime('2009-06-15T13:45:30.6175425'), 'yy-M-dd [H:mm:ss.fff]')", + "SELECT concat(substring(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%i:%S.]')), 1, position(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%i:%S.]')), '.')), substring(substring(toString(kql_todatetime('2009-06-15T13:45:30.6175425')), position(toString(kql_todatetime('2009-06-15T13:45:30.6175425')), '.') + 1), 1, 3), substring(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%i:%S.]')), position(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%i:%S.]')), '.') + 1, length(toString(formatDateTime(kql_todatetime('2009-06-15T13:45:30.6175425'), '%y-%m-%d [%H:%i:%S.]'))))) AS print_0" + }, + { + "print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s tt')", + "SELECT formatDateTime(kql_datetime('2015-12-14 02:03:04.12345'), '%y-%m-%e %I:%i:%S %p') AS print_0" + }, + { + "print format_timespan(time(1d), 'd-[hh:mm:ss]')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000)))) < 1, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000))), 1, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(86400000000000)))), '-', '[', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(86400000000000), toIntervalNanosecond(1000000000)) % 60)), ']', '') AS print_0" + }, + { + "print format_timespan(time('12:30:55.123'), 'ddddd-[hh:mm:ss.ffff]')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000)))) < 5, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000))), 5, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(86400000000000)))), '-', '[', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(1000000000)) % 60)), '.', if(length(substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4)) < 4, rightPad(substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4), 4, '0'), substring(toString(intDiv(toIntervalNanosecond(45055123000000), toIntervalNanosecond(100)) % 10000000), 1, 4)), ']', '') AS print_0" + }, + { + "print format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF')", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000))), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))), '.', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)), ':', substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 2), '') AS print_0" + }, + { + "print format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]');", + "SELECT concat(if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))) < 3, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000))), 3, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(86400000000000)))), '.', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)) < 1, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24), 1, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(3600000000000)) % 24)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(60000000000)) % 60)), ':', if(length(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)) < 2, leftPad(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60), 2, '0'), toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(1000000000)) % 60)), ' ', '[', if(length(substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7)) < 7, rightPad(substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7), 7, '0'), substring(toString(intDiv(toIntervalNanosecond(2538005123450000), toIntervalNanosecond(100)) % 10000000), 1, 7)), ']', '') AS print_0" + }, + { + "print datetime_part('day', datetime(2017-10-30 01:02:03.7654321))", + "SELECT formatDateTime(kql_datetime('2017-10-30 01:02:03.7654321'), '%e') AS print_0" + }, + { + "print datetime_add('day',1,datetime(2017-10-30 01:02:03.7654321))", + "SELECT kql_datetime(kql_datetime('2017-10-30 01:02:03.7654321') + toIntervalDay(1)) AS print_0" + }, + { + "print totimespan(time(1d))", + "SELECT kql_totimespan(toIntervalNanosecond(86400000000000)) AS print_0" + }, + { + "print totimespan('0.01:34:23')", + "SELECT kql_totimespan('0.01:34:23') AS print_0" + }, + { + "print totimespan(time('-1:12:34'))", + "SELECT kql_totimespan(toIntervalNanosecond(-4354000000000)) AS print_0" + }, + { + "print totimespan(-1d)", + "SELECT kql_totimespan(-toIntervalNanosecond(86400000000000)) AS print_0" + }, + { + "print totimespan('abc')", + "SELECT kql_totimespan('abc') AS print_0" + }, + { + "print time(2)", + "SELECT toIntervalNanosecond(172800000000000) AS print_0" + }, + { + "hits | project bin(todatetime(EventTime), 1m)", + "SELECT kql_bin(kql_todatetime(EventTime), toIntervalNanosecond(60000000000)) AS Column1\nFROM hits" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp b/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp new file mode 100644 index 000000000000..d47a48328da2 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_dynamicFunctions.cpp @@ -0,0 +1,152 @@ +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Dynamic, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print t = array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c']))", + "SELECT kql_array_sort_asc([NULL, 'd', 'a', 'c', 'c']).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([4, 1, 3, 2]))", + "SELECT kql_array_sort_asc([4, 1, 3, 2]).1 AS t" + }, + { + "print t = array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_asc(['b', 'a', 'c'], ['q', 'p', 'r']).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], false).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , false)", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL], false).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]) , true)", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL], true).1 AS t" + }, + { + "print t = array_sort_asc( dynamic([null, 'd', null, null, 'a', 'c', 'c', null, null, null]))", + "SELECT kql_array_sort_asc([NULL, 'd', NULL, NULL, 'a', 'c', 'c', NULL, NULL, NULL]).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']), 1 < 2)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], 1 < 2).1 AS t" + }, + { + "print t = array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2)", + "SELECT kql_array_sort_asc(['d', NULL, 'a', 'c', 'c'], 1 > 2).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), false)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], false).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 > 2)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], 1 > 2).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), true)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], true).1 AS t" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]), 1 < 2)", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30], 1 < 2).1 AS t" + }, + { + "print t = array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c']))", + "SELECT kql_array_sort_desc([NULL, 'd', 'a', 'c', 'c']).1 AS t" + }, + { + "print t = array_sort_desc(dynamic([4, 1, 3, 2]))", + "SELECT kql_array_sort_desc([4, 1, 3, 2]).1 AS t" + }, + { + "print t = array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_desc(['b', 'a', 'c'], ['q', 'p', 'r']).1 AS t" + }, + { + "print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))", + "SELECT kql_array_sort_desc(['b', 'a', 'c'], ['q', 'p', 'r']) AS print_0" + }, + { + "print t = array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false)", + "SELECT kql_array_sort_desc(['d', NULL, 'a', 'c', 'c'], false).1 AS t" + }, + { + "print array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))[0]", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS print_0" + }, + { + "print (t) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))", + "SELECT kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t" + }, + { + "print (t,w) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30]))", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).2 AS w" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),y=5", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n 5 AS y" + }, + { + "print 5, (t) = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),y=5", + "SELECT\n 5 AS print_0,\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n 5 AS y" + }, + { + "print t = array_sort_asc(dynamic([2, 1, null,3]), dynamic([20, 10, 40, 30])),w = array_sort_asc(dynamic([2, 1, 3]))", + "SELECT\n kql_array_sort_asc([2, 1, NULL, 3], [20, 10, 40, 30]).1 AS t,\n kql_array_sort_asc([2, 1, 3]).1 AS w" + }, + { + "print A[0]", + "SELECT A[1] AS print_0" + }, + { + "print A[0][1]", + "SELECT (A[1])[2] AS print_0" + }, + { + "print A[-5]", + "SELECT A[-5] AS print_0" + }, + { + "print A[-1][1]", + "SELECT (A[-1])[2] AS print_0" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[0]", + "SELECT [[1, 2, 3, 4, 5], [20, 30]][1] AS print_0" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[1][1]", + "SELECT ([[1, 2, 3, 4, 5], [20, 30]][2])[2] AS print_0" + }, + { + "print dynamic([[1,2,3,4,5],[20,30]])[1][-1]", + "SELECT ([[1, 2, 3, 4, 5], [20, 30]][2])[-1] AS print_0" + }, + { + "print A[B[1]]", + "SELECT A[if((B[2]) >= 0, (B[2]) + 1, B[2])] AS print_0" + }, + { + "print A[strlen('a')-1]", + "SELECT A[if((lengthUTF8('a') - 1) >= 0, (lengthUTF8('a') - 1) + 1, lengthUTF8('a') - 1)] AS print_0" + }, + { + "print strlen(A[0])", + "SELECT lengthUTF8(A[1]) AS print_0" + }, + { + "print repeat(1, 3)", + "SELECT if(3 < 0, [NULL], arrayWithConstant(abs(3), 1)) AS print_0" + }, + { + "print repeat(1, -3)", + "SELECT if(-3 < 0, [NULL], arrayWithConstant(abs(-3), 1)) AS print_0" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp b/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp new file mode 100644 index 000000000000..b8a67a4ef95e --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_lookup_join.cpp @@ -0,0 +1,106 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_lookup_join, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "FactTable | lookup kind=leftouter DimTable on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=inner DimTable on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nINNER JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN\n(\n SELECT *\n FROM DimTable\n WHERE Personal = 'Bill'\n) AS right_ USING (Personal, Family)" + }, + { + "FactTable | project Personal , Family| lookup kind=leftouter DimTable on Personal, Family", + "SELECT *\nFROM\n(\n SELECT\n Personal,\n Family\n FROM FactTable\n) AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter DimTable on $left.Personal == $right.Personal, $left.Family == $right.Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | lookup kind=leftouter DimTable on Personal , $left.Family == $right.Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable|lookup kind=leftouter DimTable on Personal , ($left.Family == $right.Family)", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable | project Row, Personal , Family | lookup kind=leftouter (FactTable | lookup kind=leftouter DimTable on Personal) on Personal, Family", + "SELECT *\nFROM\n(\n SELECT\n Row,\n Personal,\n Family\n FROM FactTable\n) AS left_\nLEFT JOIN\n(\n SELECT *\n FROM FactTable AS left_\n LEFT JOIN DimTable AS right_ USING (Personal)\n) AS right_ USING (Personal, Family)" + }, + { + "FactTable|project Row, Personal , Family| lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family| lookup kind=inner DimTable on Personal, Family", + "SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Row,\n Personal,\n Family\n FROM FactTable\n ) AS left_\n LEFT JOIN\n (\n SELECT *\n FROM DimTable\n WHERE Personal = 'Bill'\n ) AS right_ USING (Personal, Family)\n) AS left_\nINNER JOIN DimTable AS right_ USING (Personal, Family)" + }, + { + "FactTable| lookup kind=leftouter DimTable on $left.Personal == $right.FirstName, Family", + "SELECT *\nFROM FactTable AS left_\nLEFT JOIN DimTable AS right_ ON (left_.Personal = right_.FirstName) AND (left_.Family = right_.Family)" + }, + { + "FactTable| lookup kind=leftouter DimTable on $left.Personal == $right.FirstName, Family| lookup kind=inner DimTable on $left.Personal == $right.FirstName", + "SELECT *\nFROM\n(\n SELECT *\n FROM FactTable AS left_\n LEFT JOIN DimTable AS right_ ON (left_.Personal = right_.FirstName) AND (left_.Family = right_.Family)\n) AS left_\nINNER JOIN DimTable AS right_ ON left_.Personal = right_.FirstName" + }, + { + "X | join Y on Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=innerunique Y on Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=inner Y on Key", + "SELECT *\nFROM X AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftouter Y on Key", + "SELECT *\nFROM X AS left_\nLEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightouter Y on Key", + "SELECT *\nFROM X AS left_\nRIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=fullouter Y on Key", + "SELECT *\nFROM X AS left_\nFULL OUTER JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftanti Y on Key", + "SELECT *\nFROM X AS left_\nANTI LEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightanti Y on Key", + "SELECT *\nFROM X AS left_\nANTI RIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=leftsemi Y on Key", + "SELECT *\nFROM X AS left_\nSEMI LEFT JOIN Y AS right_ USING (Key)" + }, + { + "X | join kind=rightsemi Y on Key", + "SELECT *\nFROM X AS left_\nSEMI RIGHT JOIN Y AS right_ USING (Key)" + }, + { + "X | join Y on $left.Key == $right.Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ USING (Key)" + }, + { + "X | join Y on $left.Key == $right.Key2", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN Y AS right_ ON left_.Key = right_.Key2" + }, + { + "X | join (Y | project Key, value2) on $left.Key == $right.Key", + "SELECT *\nFROM\n(\n SELECT *\n FROM X\n LIMIT 1 BY Key\n) AS left_\nINNER JOIN\n(\n SELECT\n Key,\n value2\n FROM Y\n) AS right_ USING (Key)" + } + +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_operator.cpp b/src/Parsers/tests/KQL/gtest_KQL_operator.cpp new file mode 100644 index 000000000000..8748ded123da --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_operator.cpp @@ -0,0 +1,21 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_operator, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "TableWithVariousDataTypes | project JoinDate | where JoinDate between (datetime('2020-06-30') .. datetime('2025-06-30'))", + "SELECT JoinDate\nFROM TableWithVariousDataTypes\nWHERE kql_between(JoinDate, kql_datetime('2020-06-30'), kql_datetime('2025-06-30'))" + }, + { + "TableWithVariousDataTypes | project JoinDate | where JoinDate !between (datetime('2020-01-01') .. 2d)", + "SELECT JoinDate\nFROM TableWithVariousDataTypes\nWHERE NOT kql_between(JoinDate, kql_datetime('2020-01-01'), toIntervalNanosecond(172800000000000))" + }, + { + "TableWithVariousDataTypes | project Age | where Age between (10 .. 12) or Age between (30 .. 50)", + "SELECT Age\nFROM TableWithVariousDataTypes\nWHERE kql_between(Age, 10, 12) OR kql_between(Age, 30, 50)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp b/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp new file mode 100644 index 000000000000..9b39368aaef0 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_operator_in_sql.cpp @@ -0,0 +1,134 @@ +#include + +#include +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_operator_in_sql, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "select * from kql(Customers | where FirstName !in ('Peter', 'Latoya'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName NOT IN ('Peter', 'Latoya')\n)" + }, + { + "select * from kql(Customers | where FirstName !contains 'Pet');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%Pet%')\n)" + }, + { + "select * from kql(Customers | where FirstName !contains_cs 'Pet');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName LIKE '%Pet%')\n)" + }, + { + "select * from kql(Customers | where FirstName !endswith 'ter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ter')\n)" + }, + { + "select * from kql(Customers | where FirstName !endswith_cs 'ter');" + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT endsWith(FirstName, 'ter')\n)" + }, + { + "select * from kql(Customers | where FirstName != 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName != 'Peter'\n)" + }, + { + "select * from kql(Customers | where FirstName !has 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(FirstName, 'Peter'), hasTokenCaseInsensitive(FirstName, 'Peter') AND (positionCaseInsensitive(FirstName, 'Peter') > 0))\n)" + }, + { + "select * from kql(Customers | where FirstName !has_cs 'peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT ifNull(hasTokenOrNull(FirstName, 'peter'), hasToken(FirstName, 'peter') AND (position(FirstName, 'peter') > 0))\n)" + }, + { + "select * from kql(Customers | where FirstName !hasprefix 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT (FirstName ILIKE 'Peter%')) AND (NOT (FirstName ILIKE '% Peter%'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hasprefix_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT startsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '% Peter%'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hassuffix 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT (FirstName ILIKE '%Peter')) AND (NOT (FirstName ILIKE '%Peter %'))\n)" + }, + { + "select * from kql(Customers | where FirstName !hassuffix_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (NOT endsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '%Peter %'))\n)" + }, + { + "select * from kql(Customers | where FirstName !startswith 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE 'Peter%')\n)" + }, + { + "select * from kql(Customers | where FirstName !startswith_cs 'Peter');", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT startsWith(FirstName, 'Peter')\n)" + }, + { + "select * from kql(print t = 'a' in~ ('A', 'b', 'c'))", + "SELECT *\nFROM\n(\n SELECT lower('a') IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ('peter', 'apple'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter')))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE FirstName = 'Peter'\n )\n)" + }, + { + "select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30)))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql(print t = 'a' !in~ ('A', 'b', 'c'))", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(print t = 'a' !in~ (dynamic(['A', 'b', 'c'])))", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ('peter', 'apple'))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter')))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE FirstName = 'Peter'\n )\n)" + }, + { + "select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30)))", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql(Customers | where FirstName =~ 'peter' and LastName =~ 'naRA')", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (lower(FirstName) = lower('peter')) AND (lower(LastName) = lower('naRA'))\n)" + }, + { + "select * from kql(Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA')", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE (lower(FirstName) != lower('nEyMaR')) AND (lower(LastName) = lower('naRA'))\n)" + }, + { + "select * from kql($$Customers | where FirstName !in ('Peter', 'Latoya')$$)", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE FirstName NOT IN ('Peter', 'Latoya')\n)" + }, + { + "select * from kql($$Customers | where FirstName !contains 'Pet'$$);", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName ILIKE '%Pet%')\n)" + }, + { + "select * from kql($$Customers | where FirstName !contains_cs 'Pet'$$);", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE NOT (FirstName LIKE '%Pet%')\n)" + }, + { + "select * from kql($$Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30))$$)", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n WHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n )\n)" + }, + { + "select * from kql($$print t = 'a' !in~ (dynamic(['A', 'b', 'c']))$$)", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + }, + { + "select * from kql($IBM$print t = 'a' !in~ (dynamic(['A', 'b', 'c']))$IBM$)", + "SELECT *\nFROM\n(\n SELECT lower('a') NOT IN (lower('A'), lower('b'), lower('c')) AS t\n)" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_range.cpp b/src/Parsers/tests/KQL/gtest_KQL_range.cpp new file mode 100644 index 000000000000..d52f37d82baf --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_range.cpp @@ -0,0 +1,81 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Range, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "print range(1, 10, 2)", + "SELECT kql_range(1, 10, 2) AS print_0" + }, + { + "print range(1, 10)", + "SELECT kql_range(1, 10) AS print_0" + }, + { + "print range(1.2, 10.3, 2.2)", + "SELECT kql_range(1.2, 10.3, 2.2) AS print_0" + }, + { + "print range(1.2, 10.3, 2)", + "SELECT kql_range(1.2, 10.3, 2) AS print_0" + }, + { + "print range(1.2, 10,2.2)", + "SELECT kql_range(1.2, 10, 2.2) AS print_0" + }, + { + "print range(1, 10, 2.2)", + "SELECT kql_range(1, 10, 2.2) AS print_0" + }, + { + "print range(1, 10.5, 2.2)", + "SELECT kql_range(1, 10.5, 2.2) AS print_0" + }, + { + "print range(1.1, 10 ,2.2)", + "SELECT kql_range(1.1, 10, 2.2) AS print_0" + }, + { + "print range(1.2, 10, 2)", + "SELECT kql_range(1.2, 10, 2) AS print_0" + }, + { + "print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h)", + "SELECT kql_range(kql_datetime('2001-01-01'), kql_datetime('2001-01-02'), toIntervalNanosecond(18000000000000)) AS print_0" + }, + { + "print range(datetime('2001-01-01'), datetime('2001-01-02'))", + "SELECT kql_range(kql_datetime('2001-01-01'), kql_datetime('2001-01-02')) AS print_0" + }, + { + "print range(1h, 5h, 2h)", + "SELECT kql_range(toIntervalNanosecond(3600000000000), toIntervalNanosecond(18000000000000), toIntervalNanosecond(7200000000000)) AS print_0" + }, + { + "print range(1.5h, 5h, 2h)", + "SELECT kql_range(toIntervalNanosecond(5400000000000), toIntervalNanosecond(18000000000000), toIntervalNanosecond(7200000000000)) AS print_0" + }, + { + "print range(ago(1d),now(),1d)", + "SELECT kql_range(now64(9, 'UTC') + (-1 * toIntervalNanosecond(86400000000000)), now64(9, 'UTC'), toIntervalNanosecond(86400000000000)) AS print_0" + }, + { + "print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d)", + "SELECT kql_range(kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-01 10:10:17')), 0 + 1)) - toIntervalNanosecond(100), kql_todatetime(addDays(toStartOfDay(kql_datetime('2017-01-03 10:10:17')), 0 + 1)) - toIntervalNanosecond(100), toIntervalNanosecond(86400000000000)) AS print_0" + }, + { + "range Age from 20 to 25 step 1", + "SELECT *\nFROM\n(\n SELECT kql_range(20, 25, 1) AS Age\n)\nARRAY JOIN Age" + }, + { + "range LastWeek from ago(7d) to now() step 1d", + "SELECT *\nFROM\n(\n SELECT kql_range(now64(9, 'UTC') + (-1 * toIntervalNanosecond(604800000000000)), now64(9, 'UTC'), toIntervalNanosecond(86400000000000)) AS LastWeek\n)\nARRAY JOIN LastWeek" + }, + { + "range FirstWeek from datetime('2023-01-01') to datetime('2023-01-07') step 1d", + "SELECT *\nFROM\n(\n SELECT kql_range(kql_datetime('2023-01-01'), kql_datetime('2023-01-07'), toIntervalNanosecond(86400000000000)) AS FirstWeek\n)\nARRAY JOIN FirstWeek" + } +}))); diff --git a/src/Parsers/tests/KQL/gtest_KQL_test_subquery.cpp b/src/Parsers/tests/KQL/gtest_KQL_test_subquery.cpp new file mode 100644 index 000000000000..8544f11f0373 --- /dev/null +++ b/src/Parsers/tests/KQL/gtest_KQL_test_subquery.cpp @@ -0,0 +1,109 @@ +#include + +#include + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Subquery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !in ('Peter', 'Latoya')));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE FirstName NOT IN ('Peter', 'Latoya')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName, Age | where Age !in (28, 29)));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE Age NOT IN (28, 29)\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains 'ste'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ste%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'Ste'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName LIKE '%Ste%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'ste'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName LIKE '%ste%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith 'ore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName ILIKE '%ore')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith_cs 'Ore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT endsWith(FirstName, 'Ore')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName != 'Theodore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE FirstName != 'Theodore'\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !~ 'theodore'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE lower(FirstName) != lower('theodore')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !has 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT ifNull(hasTokenCaseInsensitiveOrNull(FirstName, 'Peter'), hasTokenCaseInsensitive(FirstName, 'Peter') AND (positionCaseInsensitive(FirstName, 'Peter') > 0))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !has_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT ifNull(hasTokenOrNull(FirstName, 'Peter'), hasToken(FirstName, 'Peter') AND (position(FirstName, 'Peter') > 0))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT (FirstName ILIKE 'Peter%')) AND (NOT (FirstName ILIKE '% Peter%'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT startsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '% Peter%'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT (FirstName ILIKE '%Peter')) AND (NOT (FirstName ILIKE '%Peter %'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE (NOT endsWith(FirstName, 'Peter')) AND (NOT (FirstName LIKE '%Peter %'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT (FirstName ILIKE 'Peter%')\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith_cs 'Peter'));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE NOT startsWith(FirstName, 'Peter')\n)" + }, + { + "Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple')));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ('peter', 'apple')));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where substring(FirstName,0,3) in~ ((Customers | project substring(FirstName,0,3) | where FirstName in~ ('peter', 'apple')));", + "SELECT *\nFROM Customers\nWHERE lower(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))) IN (\n SELECT lower(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))) AS Column1\n FROM Customers\n WHERE lower(FirstName) IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where FirstName in~ ((Customers | where FirstName !in~ ('peter', 'apple')| project FirstName));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE lower(FirstName) NOT IN (lower('peter'), lower('apple'))\n)" + }, + { + "Customers | where FirstName in ((Customers | project FirstName, LastName, Age));", + "SELECT *\nFROM Customers\nWHERE FirstName IN (\n SELECT FirstName\n FROM Customers\n)" + }, + { + "Customers | where FirstName in~ ((Customers | project FirstName, LastName, Age|where Age <30));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where FirstName !in ((Customers | project FirstName, LastName, Age |where Age <30 ));", + "SELECT *\nFROM Customers\nWHERE FirstName NOT IN (\n SELECT FirstName\n FROM Customers\n WHERE Age < 30\n)" + }, + { + "Customers | where FirstName !in~ ((Customers | project FirstName, LastName, Age |where Age <30));", + "SELECT *\nFROM Customers\nWHERE lower(FirstName) NOT IN (\n SELECT lower(FirstName)\n FROM Customers\n WHERE Age < 30\n)" + } +}))); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 18e91c533e02..8b4055439d5f 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,9 +1,9 @@ +#include #include #include #include #include #include -#include #include #include #include @@ -13,11 +13,7 @@ #include #include #include -#include #include -#include -#include -#include namespace { @@ -25,13 +21,6 @@ using namespace DB; using namespace std::literals; } - -struct ParserTestCase -{ - const std::string_view input_text; - const char * expected_ast = nullptr; -}; - std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr parser) { return ostr << "Parser: " << parser->getName(); @@ -42,57 +31,6 @@ std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case) return ostr << "ParserTestCase input: " << test_case.input_text; } -class ParserTest : public ::testing::TestWithParam, ParserTestCase>> -{}; - -TEST_P(ParserTest, parseQuery) -{ - const auto & parser = std::get<0>(GetParam()); - const auto & [input_text, expected_ast] = std::get<1>(GetParam()); - - ASSERT_NE(nullptr, parser); - - if (expected_ast) - { - if (std::string(expected_ast).starts_with("throws")) - { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } - else - { - ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); - if (std::string("CREATE USER or ALTER USER query") != parser->getName() - && std::string("ATTACH access entity query") != parser->getName()) - { - WriteBufferFromOwnString buf; - formatAST(*ast->clone(), buf, false, false); - String formatted_ast = buf.str(); - EXPECT_EQ(expected_ast, formatted_ast); - } - else - { - if (input_text.starts_with("ATTACH")) - { - auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt().value_or(""); - EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); - } - else - { - WriteBufferFromOwnString buf; - formatAST(*ast->clone(), buf, false, false); - String formatted_ast = buf.str(); - EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); - } - } - } - } - else - { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); - } -} - INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -302,188 +240,6 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, } }))); -INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, - ::testing::Combine( - ::testing::Values(std::make_shared()), - ::testing::ValuesIn(std::initializer_list{ - { - "Customers", - "SELECT *\nFROM Customers" - }, - { - "Customers | project FirstName,LastName,Occupation", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" - }, - { - "Customers | project FirstName,LastName,Occupation | take 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" - }, - { - "Customers | project FirstName,LastName,Occupation | limit 3", - "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" - }, - { - "Customers | project FirstName,LastName,Occupation | take 1 | take 3", - "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" - }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | take 1", - "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" - }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", - "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" - }, - { - "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", - "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" - }, - { - "Customers | sort by FirstName desc", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC" - }, - { - "Customers | take 3 | order by FirstName desc", - "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" - }, - { - "Customers | sort by FirstName asc", - "SELECT *\nFROM Customers\nORDER BY FirstName ASC" - }, - { - "Customers | sort by FirstName", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC" - }, - { - "Customers | order by LastName", - "SELECT *\nFROM Customers\nORDER BY LastName DESC" - }, - { - "Customers | order by Age desc, FirstName asc ", - "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" - }, - { - "Customers | order by Age asc, FirstName desc", - "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" - }, - { - "Customers | sort by FirstName | order by Age ", - "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" - }, - { - "Customers | sort by FirstName nulls first", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" - }, - { - "Customers | sort by FirstName nulls last", - "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" - }, - { - "Customers | where Occupation == 'Skilled Manual'", - "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" - }, - { - "Customers | where Occupation != 'Skilled Manual'", - "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" - }, - { - "Customers |where Education in ('Bachelors','High School')", - "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" - }, - { - "Customers | where Education !in ('Bachelors','High School')", - "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" - }, - { - "Customers |where Education contains_cs 'Degree'", - "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" - }, - { - "Customers | where Occupation startswith_cs 'Skil'", - "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" - }, - { - "Customers | where FirstName endswith_cs 'le'", - "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" - }, - { - "Customers | where Age == 26", - "SELECT *\nFROM Customers\nWHERE Age = 26" - }, - { - "Customers | where Age > 20 and Age < 30", - "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" - }, - { - "Customers | where Age > 30 | where Education == 'Bachelors'", - "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" - }, - { - "Customers |summarize count() by Occupation", - "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers|summarize sum(Age) by Occupation", - "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers|summarize avg(Age) by Occupation", - "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers|summarize min(Age) by Occupation", - "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers |summarize max(Age) by Occupation", - "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" - }, - { - "Customers | where FirstName contains 'pet'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" - }, - { - "Customers | where FirstName !contains 'pet'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" - }, - { - "Customers | where FirstName endswith 'er'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" - }, - { - "Customers | where FirstName !endswith 'er'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" - }, - { - "Customers | where Education has 'School'", - "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" - }, - { - "Customers | where Education !has 'School'", - "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" - }, - { - "Customers | where Education has_cs 'School'", - "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" - }, - { - "Customers | where Education !has_cs 'School'", - "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" - }, - { - "Customers | where FirstName matches regex 'P.*r'", - "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" - }, - { - "Customers | where FirstName startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" - }, - { - "Customers | where FirstName !startswith 'pet'", - "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" - } -}))); - static constexpr size_t kDummyMaxQuerySize = 256 * 1024; static constexpr size_t kDummyMaxParserDepth = 256; diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp new file mode 100644 index 000000000000..2030ab09c801 --- /dev/null +++ b/src/Parsers/tests/gtest_common.cpp @@ -0,0 +1,74 @@ +#include "gtest_common.h" + +#include +#include +#include +#include + +#include + +#include + +TEST_P(ParserTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_NE(nullptr, parser); + + if (expected_ast) + { + if (std::string(expected_ast).starts_with("throws")) + { + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } + else + { + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + if (std::string("CREATE USER or ALTER USER query") != parser->getName() + && std::string("ATTACH access entity query") != parser->getName()) + { + DB::WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); + } + else + { + if (input_text.starts_with("ATTACH")) + { + auto salt = (dynamic_cast(ast.get())->auth_data)->getSalt().value_or(""); + EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast))); + } + else + { + DB::WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); + } + } + } + } + else + { + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + } +} + +TEST_P(ParserRegexTest, parseQuery) +{ + const auto & parser = std::get<0>(GetParam()); + const auto & [input_text, expected_ast] = std::get<1>(GetParam()); + + ASSERT_TRUE(parser); + ASSERT_TRUE(expected_ast); + + DB::ASTPtr ast; + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + + DB::WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + EXPECT_THAT(buf.str(), ::testing::MatchesRegex(expected_ast)); +} diff --git a/src/Parsers/tests/gtest_common.h b/src/Parsers/tests/gtest_common.h new file mode 100644 index 000000000000..4eca9390d92c --- /dev/null +++ b/src/Parsers/tests/gtest_common.h @@ -0,0 +1,17 @@ +#include + +#include + +#include + +struct ParserTestCase +{ + const std::string_view input_text; + const char * expected_ast = nullptr; +}; + +class ParserTest : public ::testing::TestWithParam, ParserTestCase>> +{}; + +class ParserRegexTest : public ::testing::TestWithParam, ParserTestCase>> +{}; diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index e6c2bcec2c85..bf8b380e7268 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index e3edc281e831..58d227a5ae57 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -175,10 +175,13 @@ struct SocketInterruptablePollWrapper } while (rc < 0 && errno == POCO_EINTR); - if (rc >= 1 && poll_buf[0].revents & POLLIN) - socket_ready = true; - if (rc >= 2 && poll_buf[1].revents & POLLIN) - fd_ready = true; + if (rc >= 1) + { + if (poll_buf[0].revents & POLLIN) + socket_ready = true; + if (poll_buf[1].revents & POLLIN) + fd_ready = true; + } #endif } diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp index c6916ee39d94..4952cd1bd24c 100644 --- a/src/Server/ServerType.cpp +++ b/src/Server/ServerType.cpp @@ -40,7 +40,7 @@ const char * ServerType::serverTypeToString(ServerType::Type type) return type_name.data(); } -bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) const +bool ServerType::shouldStart(Type server_type, const std::string & server_custom_name) const { if (type == Type::QUERIES_ALL) return true; @@ -77,13 +77,15 @@ bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) } } - return type == server_type && custom_name == custom_name_; + if (type == Type::CUSTOM) + return server_type == type && server_custom_name == "protocols." + custom_name + ".port"; + + return server_type == type; } bool ServerType::shouldStop(const std::string & port_name) const { Type port_type; - std::string port_custom_name; if (port_name == "http_port") port_type = Type::HTTP; @@ -119,20 +121,12 @@ bool ServerType::shouldStop(const std::string & port_name) const port_type = Type::INTERSERVER_HTTPS; else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) - { - constexpr size_t protocols_size = std::string_view("protocols.").size(); - constexpr size_t port_size = std::string_view("protocols.").size(); - port_type = Type::CUSTOM; - port_custom_name = port_name.substr(protocols_size, port_name.size() - port_size); - } - else - port_type = Type::UNKNOWN; - if (port_type == Type::UNKNOWN) + else return false; - return shouldStart(type, port_custom_name); + return shouldStart(port_type, port_name); } } diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h index 345d1a10119b..1fab492222ae 100644 --- a/src/Server/ServerType.h +++ b/src/Server/ServerType.h @@ -10,7 +10,6 @@ class ServerType enum Type { - UNKNOWN, TCP, TCP_WITH_PROXY, TCP_SECURE, @@ -34,7 +33,8 @@ class ServerType static const char * serverTypeToString(Type type); - bool shouldStart(Type server_type, const std::string & custom_name_ = "") const; + /// Checks whether provided in the arguments type should be started or stopped based on current server type. + bool shouldStart(Type server_type, const std::string & server_custom_name = "") const; bool shouldStop(const std::string & port_name) const; Type type; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ec92f57aeda5..878fb21cceaf 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -103,6 +103,8 @@ class IStorage : public std::enable_shared_from_this, public TypePromo IStorage(const IStorage &) = delete; IStorage & operator=(const IStorage &) = delete; + ~IStorage() override = default; + /// The main name of the table type (for example, StorageMergeTree). virtual std::string getName() const = 0; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index db8cb6b42dec..41eb18ab5416 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -371,7 +371,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option, context->getSettingsRef().disable_url_encoding); + auto request_uri = Poco::URI(*option, context->getSettingsRef().enable_url_encoding); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 63b9a443f958..9cb03c14ab8e 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -31,13 +31,15 @@ class IStorageSystemOneBlock : public IStorage virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; public: - explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) + explicit IStorageSystemOneBlock(StorageID table_id_) : IStorage(std::move(table_id_)) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); setInMemoryMetadata(storage_metadata); } + ~IStorageSystemOneBlock() override = default; + Pipe read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f84c554afc0e..031c7454ab6b 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -42,10 +42,12 @@ const char * auto_contributors[] { "Alex", "Alex Bocharov", "Alex Cao", + "Alex Cheng", "Alex Karo", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", + "AlexBykovski", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", @@ -232,6 +234,7 @@ const char * auto_contributors[] { "CheSema", "Chebarykov Pavel", "Chen Yufei", + "Chen768959", "Cheng Pan", "Chienlung Cheung", "Christian", @@ -485,6 +488,7 @@ const char * auto_contributors[] { "John", "John Hummel", "John Skopis", + "John Spurlock", "Jonatas Freitas", "Jonathan-Ackerman", "Jordi", @@ -659,6 +663,7 @@ const char * auto_contributors[] { "Mikhail Gaidamaka", "Mikhail Guzov", "Mikhail Korotov", + "Mikhail Koviazin", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", @@ -815,6 +820,7 @@ const char * auto_contributors[] { "Roman Vasin", "Roman Vlasenko", "Roman Zhukov", + "Rory Crispin", "Roy Bellingan", "Ruslan", "Ruslan Savchenko", @@ -832,7 +838,9 @@ const char * auto_contributors[] { "Salvatore Mesoraca", "Sami Kerola", "Samuel Chou", + "Samuel Colvin", "San", + "Sanjam Panda", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -883,6 +891,7 @@ const char * auto_contributors[] { "SmitaRKulkarni", "Snow", "Sofia Antipushina", + "Song Liyong", "Sorck", "Stanislav Dobrovolschii", "Stanislav Pavlovichev", @@ -893,6 +902,7 @@ const char * auto_contributors[] { "Stepan Herold", "Stephan", "Steve-金勇", + "StianBerger", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", @@ -977,6 +987,7 @@ const char * auto_contributors[] { "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", + "Vitaliy Pashkov", "Vitaliy Zakaznikov", "Vitaly", "Vitaly Artemyev", @@ -1029,6 +1040,7 @@ const char * auto_contributors[] { "Yakov Olkhovskiy", "YalalovSM", "Yangkuan Liu", + "Yarik Briukhovetskyi", "Yatian Xu", "Yatsishin Ilya", "Yağızcan Değirmenci", @@ -1053,6 +1065,7 @@ const char * auto_contributors[] { "Yury Karpovich", "Yury Stankevich", "Yusuke Tanaka", + "Zach Naimon", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1143,6 +1156,7 @@ const char * auto_contributors[] { "changvvb", "chasingegg", "chen", + "chen768959", "chen9t", "chengy8934", "chenjian", @@ -1179,6 +1193,7 @@ const char * auto_contributors[] { "detailyang", "dfenelonov", "dgrr", + "dheerajathrey", "dimarub2000", "dinosaur", "divanorama", @@ -1329,6 +1344,7 @@ const char * auto_contributors[] { "lanfz", "larryluogit", "laurieliyang", + "lcjh", "lehasm", "leosunli", "leozhang", @@ -1455,6 +1471,7 @@ const char * auto_contributors[] { "pawelsz-rb", "pdai", "pdv-ru", + "pedro.riera", "pengxiangcai", "peshkurov", "peter279k", @@ -1548,8 +1565,10 @@ const char * auto_contributors[] { "teng.ma", "terrylin", "tesw yew isal", + "therealnick233", "tianzhou", "tiger.yan", + "timfursov", "tison", "topvisor", "tpanetti", @@ -1563,6 +1582,7 @@ const char * auto_contributors[] { "usurai", "vahid-sohrabloo", "vdimir", + "velavokr", "velom", "vesslanjin", "vgocoder", @@ -1587,17 +1607,21 @@ const char * auto_contributors[] { "wuxiaobai24", "wzl", "xPoSx", + "xiao", + "xiaolei565", "xiedeyantu", "xieyichen", "xinhuitian", "xlwh", "xmy", + "xuelei", "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", "yangshuai", "yaqi-zhao", + "yariks5s", "yeer", "ygrek", "yhgcn", diff --git a/src/Storages/System/StorageSystemSchema.cpp b/src/Storages/System/StorageSystemSchema.cpp new file mode 100644 index 000000000000..5f356af7c194 --- /dev/null +++ b/src/Storages/System/StorageSystemSchema.cpp @@ -0,0 +1,63 @@ +#include "StorageSystemSchema.h" + +#include +#include +#include +#include +#include +#include + +namespace +{ +std::string toKQLDataTypeName(const DB::DataTypePtr & data_type) +{ + const auto nested_type = DB::removeNullable(data_type); + const auto kql_data_type = DB::toKQLDataType(nested_type->getTypeId(), DB::KQLScope::Column); + return toString(kql_data_type); +} +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +NamesAndTypesList StorageSystemSchema::getNamesAndTypes() +{ + return { + {"ColumnName", std::make_shared()}, + {"ColumnOrdinal", std::make_shared()}, + {"DataType", std::make_shared()}, + {"ColumnType", std::make_shared()}}; +} + +void StorageSystemSchema::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + if (res_columns.size() != 4) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected number of result columns when trying to fill {}", getName()); + + const auto & column_names = res_columns[0]; + const auto & column_ordinals = res_columns[1]; + const auto & column_data_types = res_columns[2]; + const auto & column_types = res_columns[3]; + + const auto & dialect = context->getSettingsRef().dialect; + const auto is_kql = dialect == Dialect::kusto; + + const auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, context); + const auto & names_and_types = sample_block.getNamesAndTypes(); + for (int i = 0; i < std::ssize(names_and_types); ++i) + { + const auto & name_and_type = names_and_types[i]; + column_names->insert(toField(name_and_type.name)); + column_ordinals->insert(toField(i)); + + const auto & type = name_and_type.type; + const auto & type_name = type->getName(); + column_data_types->insert(toField(type_name)); + column_types->insert(toField(is_kql ? toKQLDataTypeName(type) : type_name)); + } +} +} diff --git a/src/Storages/System/StorageSystemSchema.h b/src/Storages/System/StorageSystemSchema.h new file mode 100644 index 000000000000..69525d5006ff --- /dev/null +++ b/src/Storages/System/StorageSystemSchema.h @@ -0,0 +1,21 @@ +#pragma once + +#include "IStorageSystemOneBlock.h" + +namespace DB +{ +class StorageSystemSchema : public IStorageSystemOneBlock +{ +public: + static NamesAndTypesList getNamesAndTypes(); + + StorageSystemSchema(StorageID table_id_, ASTPtr query_) : IStorageSystemOneBlock(std::move(table_id_)), query(std::move(query_)) { } + ~StorageSystemSchema() override = default; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; + std::string getName() const override { return "SystemSchema"; } + +private: + ASTPtr query; +}; +} diff --git a/src/TableFunctions/TableFunctionGetSchema.cpp b/src/TableFunctions/TableFunctionGetSchema.cpp new file mode 100644 index 000000000000..fa9fd5c538e4 --- /dev/null +++ b/src/TableFunctions/TableFunctionGetSchema.cpp @@ -0,0 +1,43 @@ +#include "TableFunctionGetSchema.h" + +#include "TableFunctionFactory.h" + +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +ColumnsDescription TableFunctionGetSchema::getActualTableStructure(ContextPtr) const +{ + return ColumnsDescription{StorageSystemSchema::getNamesAndTypes()}; +} + +void TableFunctionGetSchema::parseArguments(const ASTPtr &, ContextPtr) +{ + // the same parameters are available in `executeImpl`, so we don't need to do anything here +} + +StoragePtr +TableFunctionGetSchema::executeImpl(const ASTPtr & ast_function, ContextPtr, const std::string & table_name, ColumnsDescription) const +{ + const auto * function = ast_function->as(); + if (!function) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected a function when parsing {}", name); + + const auto * query = function->tryGetQueryArgument(); + auto res = std::make_shared(StorageID(getDatabaseName(), table_name), query->clone()); + res->startup(); + return res; +} + +void registerTableFunctionGetSchema(TableFunctionFactory & factory) +{ + factory.registerFunction({.documentation = {}, .allow_readonly = true}); +} +} diff --git a/src/TableFunctions/TableFunctionGetSchema.h b/src/TableFunctions/TableFunctionGetSchema.h new file mode 100644 index 000000000000..aaa47dc1a62f --- /dev/null +++ b/src/TableFunctions/TableFunctionGetSchema.h @@ -0,0 +1,29 @@ +#pragma once + +#include "ITableFunction.h" + +namespace DB +{ +class TableFunctionGetSchema : public ITableFunction +{ +public: + static constexpr auto name = "getschema"; + + ~TableFunctionGetSchema() override = default; + + ColumnsDescription getActualTableStructure(ContextPtr context) const override; + std::string getName() const override { return name; } + bool hasStaticStructure() const override { return true; } + + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + +private: + const char * getStorageTypeName() const override { return "GetSchema"; } + + StoragePtr executeImpl( + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; +}; + +class TableFunctionFactory; +void registerTableFunctionGetSchema(TableFunctionFactory & factory); +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index eb6e0372223d..25af410ef9ec 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -1,6 +1,7 @@ #include "registerTableFunctions.h" -#include +#include "TableFunctionFactory.h" +#include "TableFunctionGetSchema.h" namespace DB { @@ -72,6 +73,7 @@ void registerTableFunctions() registerTableFunctionFormat(factory); registerTableFunctionExplain(factory); + registerTableFunctionGetSchema(factory); #if USE_AZURE_BLOB_STORAGE registerTableFunctionAzureBlobStorage(factory); diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 9410b37d69fb..11c1576ae56c 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -9,6 +9,7 @@ from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import TestResults +from env_helper import GITHUB_REPOSITORY class InsertException(Exception): @@ -141,7 +142,7 @@ def prepare_tests_results_for_clickhouse( report_url: str, check_name: str, ) -> List[dict]: - pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + pull_request_url = "https://github.com/{}/commits/master".format(GITHUB_REPOSITORY) base_ref = "master" head_ref = "master" base_repo = pr_info.repo_full_name diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index fff2975cea4e..8244e1f7930a 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -16,7 +16,13 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import format_description, get_commit, post_commit_status -from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL +from env_helper import ( + GITHUB_WORKSPACE, + RUNNER_TEMP, + GITHUB_RUN_URL, + DOCKER_USER, + DOCKER_REPO, +) from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from report import TestResults, TestResult @@ -106,7 +112,7 @@ def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: def get_changed_docker_images( - pr_info: PRInfo, images_dict: ImagesDict + pr_info: PRInfo, images_dict: ImagesDict, docker_repo: str ) -> Set[DockerImage]: if not images_dict: return set() @@ -140,7 +146,7 @@ def get_changed_docker_images( for f in files_changed: if f in dockerfile_files: - name = image_description["name"] + name = docker_repo + "/" + image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( "Found changed file '%s' which affects " @@ -164,7 +170,7 @@ def get_changed_docker_images( dependent, image, ) - name = images_dict[dependent]["name"] + name = docker_repo + "/" + images_dict[dependent]["name"] only_amd64 = images_dict[dependent].get("only_amd64", False) changed_images.append(DockerImage(dependent, name, only_amd64, image)) index += 1 @@ -277,6 +283,7 @@ def build_and_push_one_image( "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" + f"--build-arg DOCKER_REPO={DOCKER_REPO} " # A hack to invalidate cache, grep for it in docker/ dir f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " f"--tag {image.repo}:{version_string} " @@ -411,7 +418,6 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push images to docker hub", ) - return parser.parse_args() @@ -426,10 +432,11 @@ def main(): changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json") else: changed_json = os.path.join(TEMP_PATH, "changed_images.json") - if args.push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg - "docker login --username 'robotclickhouse' --password-stdin", + "docker login {} --username '{}' --password-stdin".format( + DOCKER_REPO, DOCKER_USER + ), input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, @@ -453,7 +460,7 @@ def main(): # If the event does not contain diff, nothing will be built pass - changed_images = get_changed_docker_images(pr_info, images_dict) + changed_images = get_changed_docker_images(pr_info, images_dict, DOCKER_REPO) if changed_images: logging.info( "Has changed images: %s", ", ".join([im.path for im in changed_images]) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index d89708b9277e..e297a515a430 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -11,7 +11,7 @@ from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import format_description, get_commit, post_commit_status -from env_helper import RUNNER_TEMP +from env_helper import RUNNER_TEMP, DOCKER_USER, DOCKER_REPO from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from report import TestResults, TestResult @@ -61,7 +61,6 @@ def parse_args() -> argparse.Namespace: default=argparse.SUPPRESS, help="don't push images to docker hub", ) - args = parser.parse_args() if len(args.suffixes) < 2: parser.error("more than two --suffix should be given") @@ -174,7 +173,9 @@ def main(): args = parse_args() if args.push: subprocess.check_output( # pylint: disable=unexpected-keyword-arg - "docker login --username 'robotclickhouse' --password-stdin", + "docker login {} --username '{}' --password-stdin".format( + DOCKER_REPO, DOCKER_USER + ), input=get_parameter_from_ssm("dockerhub_robot_password"), encoding="utf-8", shell=True, diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index c679ab984ee4..f51baaa7204a 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -5,7 +5,7 @@ from unittest.mock import patch, MagicMock from pathlib import Path -from env_helper import GITHUB_RUN_URL +from env_helper import GITHUB_RUN_URL, DOCKER_REPO from pr_info import PRInfo from report import TestResult import docker_images_check as di @@ -18,7 +18,7 @@ class TestDockerImageCheck(unittest.TestCase): docker_images_path = os.path.join( - os.path.dirname(__file__), "tests/docker_images.json" + os.path.dirname(__file__), "tests/docker_images_for_tests.json" ) def test_get_changed_docker_images(self): @@ -31,7 +31,9 @@ def test_get_changed_docker_images(self): images = sorted( list( di.get_changed_docker_images( - pr_info, di.get_images_dict("/", self.docker_images_path) + pr_info, + di.get_images_dict("/", self.docker_images_path), + DOCKER_REPO, ) ) ) @@ -129,6 +131,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " @@ -147,6 +150,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " @@ -164,6 +168,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): self.assertIn( "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " @@ -183,6 +188,7 @@ def test_build_and_push_one_image(self, mock_machine, mock_popen): self.assertIn( "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg DOCKER_REPO={DOCKER_REPO} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 9303f9ae2936..e0f05157dc41 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -32,6 +32,15 @@ "{pr_or_release}/{commit}/{build_name}/{artifact}" ) +DOCKER_REPO = os.getenv("DOCKER_REPO", "docker.io") +DOCKER_USER = os.getenv("DOCKER_USER", "robotclickhouse") +S3_REGION = os.getenv("S3_REGION", "us-east-1") +S3_ENDPOINT = os.getenv("S3_ENDPOINT", "https://s3.amazonaws.com") +VAULT_PATH = os.getenv("VAULT_PATH") +VAULT_TOKEN = os.getenv("VAULT_TOKEN") +VAULT_URL = os.getenv("VAULT_URL") +VAULT_MOUNT_POINT = os.getenv("VAULT_MOUNT_POINT", "secret") + # These parameters are set only on demand, and only once _GITHUB_JOB_ID = "" _GITHUB_JOB_URL = "" diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index b41eba49cc32..3897abf89250 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -4,6 +4,8 @@ from typing import Optional import boto3 # type: ignore +import hvac # type: ignore # pylint: disable=import-error +from env_helper import VAULT_URL, VAULT_TOKEN, VAULT_PATH, VAULT_MOUNT_POINT from github import Github from github.AuthenticatedUser import AuthenticatedUser @@ -16,9 +18,22 @@ class Token: def get_parameter_from_ssm(name, decrypt=True, client=None): - if not client: - client = boto3.client("ssm", region_name="us-east-1") - return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"] + if VAULT_URL: + if not client: + client = hvac.Client(url=VAULT_URL, token=VAULT_TOKEN) + parameter = client.secrets.kv.v2.read_secret_version( + mount_point=VAULT_MOUNT_POINT, path=VAULT_PATH + )["data"]["data"][name] + else: + if not client: + client = boto3.client("ssm", region_name="us-east-1") + parameter = client.get_parameter(Name=name, WithDecryption=decrypt)[ + "Parameter" + ]["Value"] + return parameter + + +ROBOT_TOKEN = None # type: Optional[Token] ROBOT_TOKEN = None # type: Optional[Token] @@ -28,12 +43,28 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"): global ROBOT_TOKEN if ROBOT_TOKEN is not None: return ROBOT_TOKEN.value - client = boto3.client("ssm", region_name="us-east-1") - parameters = client.describe_parameters( - ParameterFilters=[ - {"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]} + client = None + if VAULT_URL: + client = hvac.Client(url=VAULT_URL, token=VAULT_TOKEN) + response = client.secrets.kv.read_secret_version( + path=VAULT_PATH, mount_point=VAULT_MOUNT_POINT + ) + parameters = [ + {"Name": p} + for p in response["data"]["data"] + if p.startswith(token_prefix_env_name) ] - )["Parameters"] + else: + client = boto3.client("ssm", region_name="us-east-1") + parameters = client.describe_parameters( + ParameterFilters=[ + { + "Key": "Name", + "Option": "BeginsWith", + "Values": [token_prefix_env_name], + } + ] + )["Parameters"] assert parameters for token_name in [p["Name"] for p in parameters]: diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index ef47eacc082c..afdb07988932 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -63,6 +63,20 @@ "MikhailBurdukov", "tsolodov", # ClickHouse Employee "kitaisreal", + "ch-devops", + "larryluogit", + "bkuschel", + "SadiHassan", + "kashwy", + "HeenaBansal2009", + "umang8223", + "HarryLeeIBM", + "ltrk2", + "MeenaRenganathan22", + "mcmajam", + "bemitc", + "vibhaKulka", + "bhavnajindal", ] } diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 330a1309016c..0b647d4157f8 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -28,6 +28,7 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse + 96197510, # ClicbMouse } OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"} diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 9ee0a4312947..ee1afdd58fe3 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -19,6 +19,8 @@ CI, S3_URL, S3_DOWNLOAD, + S3_REGION, + S3_ENDPOINT, ) from compress_files import compress_file_fast @@ -47,8 +49,9 @@ class S3Helper: def __init__(self): config = botocore.config.Config(max_pool_connections=self.max_pool_size) - self.session = boto3.session.Session(region_name="us-east-1") - self.client = self.session.client("s3", endpoint_url=S3_URL, config=config) + self.session = boto3.session.Session(region_name=S3_REGION) + self.client = self.session.client("s3", endpoint_url=S3_ENDPOINT, config=config) + self.endpoint = S3_ENDPOINT self.host = S3_URL self.download_host = S3_DOWNLOAD @@ -113,8 +116,13 @@ def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> logging.info("Upload %s to %s. Meta: %s", file_path, s3_path, metadata) # last two replacements are specifics of AWS urls: # https://jamesd3142.wordpress.com/2018/02/28/amazon-s3-and-the-plus-symbol/ - url = f"{self.download_host}/{bucket_name}/{s3_path}" - return url.replace("+", "%2B").replace(" ", "%20") + return ( + "{host}/{bucket}/{path}".format( + host=self.endpoint, bucket=bucket_name, path=s3_path + ) + .replace("+", "%2B") + .replace(" ", "%20") + ) def upload_test_report_to_s3(self, file_path: str, s3_path: str) -> str: if CI: @@ -181,7 +189,9 @@ def upload_task(file_path: str) -> str: t = time.time() except Exception as ex: logging.critical("Failed to upload file, expcetion %s", ex) - return f"{self.download_host}/{bucket_name}/{s3_path}" + return "{host}/{bucket}/{path}".format( + host=self.endpoint, bucket=bucket_name, path=s3_path + ) p = Pool(self.max_pool_size) diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images_for_tests.json similarity index 100% rename from tests/ci/tests/docker_images.json rename to tests/ci/tests/docker_images_for_tests.json diff --git a/tests/integration/test_system_start_stop_listen/configs/cluster.xml b/tests/integration/test_system_start_stop_listen/configs/cluster.xml index 93d8f890f40f..34b6c32c6d02 100644 --- a/tests/integration/test_system_start_stop_listen/configs/cluster.xml +++ b/tests/integration/test_system_start_stop_listen/configs/cluster.xml @@ -3,11 +3,11 @@ - node1 + main_node 9000 - node2 + backup_node 9000 diff --git a/tests/integration/test_system_start_stop_listen/configs/protocols.xml b/tests/integration/test_system_start_stop_listen/configs/protocols.xml new file mode 100644 index 000000000000..1d8608bcaca3 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/protocols.xml @@ -0,0 +1,23 @@ + + 0.0.0.0 + + + 9000 + 8123 + 9004 + + + + + tcp + 0.0.0.0 + 9001 + native protocol (tcp) + + + http + 8124 + http protocol + + + diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py index ec1a000c5998..1925685af031 100644 --- a/tests/integration/test_system_start_stop_listen/test.py +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -2,20 +2,18 @@ import pytest -import time from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager -from helpers.test_tools import assert_eq_with_retry -import random -import string -import json +from helpers.client import Client +import requests cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", main_configs=["configs/cluster.xml"], with_zookeeper=True +main_node = cluster.add_instance( + "main_node", + main_configs=["configs/cluster.xml", "configs/protocols.xml"], + with_zookeeper=True, ) -node2 = cluster.add_instance( - "node2", main_configs=["configs/cluster.xml"], with_zookeeper=True +backup_node = cluster.add_instance( + "backup_node", main_configs=["configs/cluster.xml"], with_zookeeper=True ) @@ -30,11 +28,118 @@ def started_cluster(): cluster.shutdown() -def test_system_start_stop_listen_queries(started_cluster): - node1.query("SYSTEM STOP LISTEN QUERIES ALL") +def http_works(port=8123): + try: + response = requests.post(f"http://{main_node.ip_address}:{port}/ping") + if response.status_code == 400: + return True + except: + pass + + return False + + +def assert_everything_works(): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() + assert http_works(8124) + + +QUERY = "SELECT 1" +MYSQL_QUERY = "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', 'default', '', SETTINGS connect_timeout = 100, connection_wait_timeout = 100)" + + +def test_default_protocols(started_cluster): + # TCP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN TCP") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default TCP") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN HTTP") + assert http_works() == False + main_node.query("SYSTEM START LISTEN HTTP") + + # MySQL + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN MYSQL") + assert "Connections to mysql failed" in main_node.query_and_get_error(MYSQL_QUERY) + main_node.query("SYSTEM START LISTEN MYSQL") + + assert_everything_works() + + +def test_custom_protocols(started_cluster): + # TCP + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'tcp'") + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + main_node.query("SYSTEM START LISTEN CUSTOM 'tcp'") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'http'") + assert http_works(8124) == False + main_node.query("SYSTEM START LISTEN CUSTOM 'http'") + + assert_everything_works() + + +def test_all_protocols(started_cluster): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + + # STOP LISTEN QUERIES ALL + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() == False + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + # STOP LISTEN QUERIES DEFAULT + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES DEFAULT") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + + # STOP LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + + # Disable all protocols, check first START LISTEN QUERIES DEFAULT then START LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False - assert "Connection refused" in node1.query_and_get_error("SELECT 1", timeout=3) + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) - node2.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") - node1.query("SELECT 1") + assert_everything_works() diff --git a/tests/queries/0_stateless/02366_kql_count.reference b/tests/queries/0_stateless/02366_kql_count.reference new file mode 100644 index 000000000000..dde58001af97 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_count.reference @@ -0,0 +1,5 @@ +6 +4 +2 +2 +4 diff --git a/tests/queries/0_stateless/02366_kql_count.sql b/tests/queries/0_stateless/02366_kql_count.sql new file mode 100644 index 000000000000..2d630316d6b0 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_count.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +Customers | count; +Customers | where Age< 30 | count; +Customers | where Age< 30 | limit 2 | count; +Customers | where Age< 30 | limit 2 | count | project Count; +Customers |project FirstName|where FirstName != 'Peter'|sort by FirstName asc nulls first|count; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference new file mode 100644 index 000000000000..35136b5ff425 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.reference @@ -0,0 +1,4 @@ +-- test create table -- +Theodore +Diaz +Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql new file mode 100644 index 000000000000..b266679b06aa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_create_table.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); +Select '-- test create table --' ; +Select * from kql(Customers|project FirstName) limit 1;; +DROP TABLE IF EXISTS kql_table1; +CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); +select LastName from kql_table1 limit 1; +DROP TABLE IF EXISTS kql_table2; +CREATE TABLE kql_table2 +( + FirstName Nullable(String), + LastName String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); +select * from kql_table2 limit 1; +-- select * from kql(Customers | where FirstName !in ("test", "test2")); +DROP TABLE IF EXISTS Customers; +DROP TABLE IF EXISTS kql_table1; +DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference new file mode 100644 index 000000000000..5fbdad8f0810 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.reference @@ -0,0 +1,146 @@ +-- bool +true +\N +-- int +123 +\N +-2147483648 +2147483647 +-- long +123 +255 +-1 +\N +-9223372036854775808 +9223372036854775807 +456 +-- real +0.01 +\N +nan +inf +-inf +-- datetime +2015-12-31 23:59:59.900000000 +2015-12-31 00:00:00.000000000 +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.000000000 +2014-11-08 15:55:00.000000000 +2014-11-08 00:00:00.000000000 +\N +2014-05-25 08:20:03.123456000 +2014-11-08 15:55:55.123456000 +365.00:00:00 +1970-05-11 13:45:07.456345700 +-- time +\N +937830000000 +937831230000 +-937831230000 +937830000000 +937800000000 +73800000000 +73840000000 +73845678901 +12169841234500 +450551230000 +1.00:00:00 +-1.00:00:00 +00:00:00 +00:00:00.0000006 +2.00:00:00 +3.00:00:00 +-- timespan (time) +\N +2.00:00:00 +01:30:00 +00:30:00 +00:00:10 +00:00:00.1000000 +00:00:00.1000000 +00:00:00.0000100 +00:00:00.0000001 +3 +1120343 +1.12:00:00 +2.00:00:00 04:00:00 00:08:00 00:00:16 00:00:00.1230000 00:00:00.0004560 00:00:00.0000007 2.04:08:16.1234567 +false +true +864000000000 +864000000000 +1.00:00:00 +2.04:08:16.1234567 +331.08:12:40 +165.16:06:20 +-1.01:01:01.1234567 +864000000000 +-- guid +\N +-- null +1 +\N \N \N \N \N +-- decimal +\N +123.345 +100000 +-- dynamic +\N +1 +1.00:00:00 +[1,2,3] +[[1],[2],[3]] +['a','b','c'] +-- cast functions +true +1 +-- tobool("false") +false +1 +-- tobool(1) +true +1 +-- tobool(123) +true +1 +-- tobool("abc") +\N +\N +-- todouble() +123.4 +\N +-- toreal() +123.4 +\N +-- toint() +1 +\N +-- tostring() +123 + +-- todatetime() +1 +1 +1 +-- totimespan() +\N +00:00:00.0000001 +00:01:00 +\N +1120343 +1120343 +16:30:00 +\N +-- tolong() +123 +\N +638081280000000000 +636449221237654321 +-- todecimal() +123.345 +\N +\N +100000 +0.00001 +123.561 +653.4 +9999999999999999999999999999999999 diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql new file mode 100644 index 000000000000..1c13c23465f1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_datatype.sql @@ -0,0 +1,194 @@ + +DROP TABLE IF EXISTS tb1; +create table tb1 ( +str String +)ENGINE = Memory; +INSERT INTO tb1 VALUES ('123.561') , ('653.4'); + +-- datatable(s:string, i:long) [ +-- '0', 0, +-- '1899', 1899, +-- '1900', 1900, +-- '2261', 2261, +-- '2262', 2262, +-- '10000', 10000 +-- ] + +drop table if exists datetime_test; +create table datetime_test(s String, i Int64) engine = Memory; +insert into datetime_test values ('0', 0), ('1899', 1899), ('1900', 1900), ('2261', 2261), ('2262', 2262), ('10000', 10000); + +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +print '-- bool'; +print bool(true); +print bool(null); +print bool('false'); -- { clientError BAD_ARGUMENTS } +print '-- int'; +print int(123); +print int(null); +print int(-2147483648); +print int(2147483647); +print int('4'); -- { clientError BAD_ARGUMENTS } +print int(-2147483649); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print int(2147483648); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- long'; +print long(123); +print long(0xff); +print long(-1); +print long(null); +print long(-9223372036854775808); +print long(9223372036854775807); +print 456; +-- print long(-9223372036854775809); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print long(9223372036854775808); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print long('9023'); -- { clientError BAD_ARGUMENTS } +print '-- real'; +print real(0.01); +print real(null); +print real(nan); +print real(+inf); +print real(-inf); +print double('4.2'); -- { clientError BAD_ARGUMENTS } +print '-- datetime'; +print datetime(2015-12-31 23:59:59.9); +print datetime(2015-12-31); +print datetime('2014-05-25T08:20:03.123456'); +print datetime('2014-11-08 15:55:55'); +print datetime('2014-11-08 15:55'); +print datetime('2014-11-08'); +print datetime(null); +print datetime('2014-05-25T08:20:03.123456Z'); +print datetime('2014-11-08 15:55:55.123456Z'); +print datetime('2022') - datetime('2021'); +print datetime('1970-05-11 13:45:07.456345672'); +print '-- time'; +print tolong(time(null)); +print tolong(time(1.2:3:3)); +print tolong(time(1.2:3:3.123)); +print tolong(time(-1.2:3:3.123)); +print tolong(time(001.02:03:03)); +print tolong(time(001.02:03)); +print tolong(time(02:03)); +print tolong(time(02:03:04)); +print tolong(time(02:03:04.5678901)); +print time(24:03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:60:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:60.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:-03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:-04.5678901); -- { clientError BAD_ARGUMENTS } +print time(02:03:04.-5678901); -- { clientError BAD_ARGUMENTS } +print time(1.-02:03:04.5678901); -- { clientError BAD_ARGUMENTS } +print time(1.23); -- { clientError BAD_ARGUMENTS } +print time(02:03:04.56789012); -- { clientError BAD_ARGUMENTS } +print time(03:04.56789012); -- { clientError BAD_ARGUMENTS } +print tolong(time('14.02:03:04.12345')); +print tolong(time('12:30:55.123')); +print time(1d); +print time(-1d); +print time(6nanoseconds); +print time(6tick); +print time(2); +print time(2) + 1d; +print '-- timespan (time)'; +print timespan(null); +print timespan(2d); -- 2 days +print timespan(1.5h); -- 1.5 hour +print timespan(30m); -- 30 minutes +print timespan(10s); -- 10 seconds +print timespan(0.1s); -- 0.1 second +print timespan(100ms); -- 100 millisecond +print timespan(10microsecond); -- 10 microseconds +print timespan(1tick); -- 100 nanoseconds +print timespan(1.5h) / timespan(30m); +print timespan('12.23:12:23') / timespan(1s); +print (timespan(1.5d) / timespan(0.6d)) * timespan(0.6d); +print a = timespan(2d), b = timespan(4h), c = timespan(8m), d = timespan(16s), e = timespan(123millis), f = timespan(456micros), g = timespan(789nanos) | extend x = a + b + c + d + e + f + g; +print tobool(timespan(0s)); +print tobool(timespan(1d)); +print todouble(timespan(1d)); +-- print toint(timespan(1d)); -> 711573504 +print tolong(timespan(1d)); +print tostring(timespan(1d)); +print tostring(timespan(2d) + timespan(4h) + timespan(8m) + timespan(16s) + timespan(123millis) + timespan(456micros) + timespan(789nanos)); +print tostring((1h + 90d) * 2 + (6h + 32s + 30d + 2m) * 5); +print tostring(((1h + 90d) * 2 + (6h + 32s + 30d + 2m) * 5) / 2); +print tostring(-timespan(1d) - timespan(1h) - timespan(1m) - timespan(1s) - timespan(123456789nanos)); +print todecimal(timespan(1d)); +print 49h + (1h + 1m) * 999999h + 1s; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 1h * 1h; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 2h + 2; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print 2h - 2; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- guid' +print guid(74be27de-1e4e-49d9-b579-fe0b331d3642); +print guid(null); +print '-- null'; +print isnull(null); +print bool(null), int(null), long(null), real(null), double(null); +print '-- decimal'; +print decimal(null); +print decimal(123.345); +print decimal(1e5); +print '-- dynamic'; -- no support for mixed types and bags for now +print dynamic(null); +print dynamic(1); +print dynamic(timespan(1d)); -- should be 864000000000, `print gettype(dynamic(timespan(1d)));` returns `long` in ADX +print dynamic([1,2,3]); +print dynamic([[1], [2], [3]]); +print dynamic(['a', "b", 'c']); +print '-- cast functions' +print '--tobool("true")'; -- == true +print tobool('true'); -- == true +print tobool('true') == toboolean('true'); -- == true +print '-- tobool("false")'; -- == false +print tobool('false'); -- == false +print tobool('false') == toboolean('false'); -- == false +print '-- tobool(1)'; -- == true +print tobool(1); -- == true +print tobool(1) == toboolean(1); -- == true +print '-- tobool(123)'; -- == true +print tobool(123); -- == true +print tobool(123) == toboolean(123); -- == true +print '-- tobool("abc")'; -- == null +print tobool('abc'); -- == null +print tobool('abc') == toboolean('abc'); -- == null +print '-- todouble()'; +print todouble('123.4'); +print todouble('abc') == null; +print '-- toreal()'; +print toreal("123.4"); +print toreal('abc') == null; +print '-- toint()'; +print toint("123") == int(123); +print toint('abc'); +print '-- tostring()'; +print tostring(123); +print tostring(null); +print '-- todatetime()'; +print todatetime("2015-12-24") == datetime(2015-12-24); +print isnull(todatetime('abc')); +print todatetime('1970-05-11 13:45:07.456345672') == datetime('1970-05-11 13:45:07.456345672'); +print '-- totimespan()'; +print totimespan(null); +print totimespan(1tick); +print totimespan('0.00:01:00'); +print totimespan('abc'); +print totimespan('12.23:12:23') / totimespan(1s); +print totimespan(strcat('12.', '23', ':12:', '23')) / timespan(1s); +print totimespan(timespan(16:30)); +print totimespan("'asdadsasd"); +print '-- tolong()'; +print tolong('123'); +print tolong('abc'); +print tolong(datetime('2023-01-01')); +print tolong(datetime('2017-10-30 01:02:03.7654321')); +print '-- todecimal()'; +print todecimal(123.345); +print todecimal(null); +print todecimal('abc'); +print todecimal(1e5); +print todecimal(1e-5); +tb1 | project todecimal(str); +print todecimal('9999999999999999999999999999999999'); +-- print todecimal(4 * 2 + 3); -> 11 diff --git a/tests/queries/0_stateless/02366_kql_distinct.reference b/tests/queries/0_stateless/02366_kql_distinct.reference new file mode 100644 index 000000000000..74035603adfc --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.reference @@ -0,0 +1,30 @@ +-- distinct * -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +\N why Professional Partial College 38 +-- distinct one column -- +Skilled Manual +Management abcd defg +Professional +-- distinct two column -- +Skilled Manual Bachelors +Management abcd defg Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +Professional Partial College +-- distinct with where -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +-- distinct with where, order -- +Skilled Manual Bachelors +Skilled Manual Graduate Degree +Professional Graduate Degree +-- distinct with alias -- +8 +3 +6 +5 diff --git a/tests/queries/0_stateless/02366_kql_distinct.sql b/tests/queries/0_stateless/02366_kql_distinct.sql new file mode 100644 index 000000000000..04ef94b0e416 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_distinct.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; + +print '-- distinct * --'; +Customers | distinct *; + +print '-- distinct one column --'; +Customers | distinct Occupation; + +print '-- distinct two column --'; +Customers | distinct Occupation, Education; + +print '-- distinct with where --'; +Customers | where Age <30 | distinct Occupation, Education; + +print '-- distinct with where, order --'; +Customers | where Age <30 | order by Age| distinct Occupation, Education; + +print '-- distinct with alias --'; +Customers | project a = (Age % 10) | distinct a; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_extend.reference b/tests/queries/0_stateless/02366_kql_extend.reference new file mode 100644 index 000000000000..ea841b6fb2ec --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.reference @@ -0,0 +1,32 @@ +-- extend #1 -- +Aldi Apple 4 2016-09-10 400 +Costco Apple 2 2016-09-11 200 +-- extend #2 -- +Apple 200 +Apple 400 +-- extend #3 -- +Apple cost 480 on average based on 5 samples. +Snargaluff cost 28080 on average based on 5 samples. +-- extend #4 -- +1 +-- extend #5 -- +Aldi Apple 4 2016-09-10 Apple was purchased from Aldi for $4 on 2016-09-10T00:00:00.0000000Z 400 +Costco Apple 2 2016-09-11 Apple was purchased from Costco for $2 on 2016-09-11T00:00:00.0000000Z 200 +-- extend #6 -- +Aldi Apple 2016-09-10 400 +Costco Apple 2016-09-11 200 +Aldi Apple 2016-09-10 600 +Costco Snargaluff 2016-09-12 10000 +Aldi Apple 2016-09-12 700 +Aldi Snargaluff 2016-09-11 40000 +Costco Snargaluff 2016-09-12 10400 +Aldi Apple 2016-09-12 500 +Aldi Snargaluff 2016-09-11 60000 +Costco Snargaluff 2016-09-10 20000 +-- extend #7 -- +5 +-- extend #8 -- +-- extend #9 -- +-- extend #10 -- +-- extend #11 -- +5 [2,1] diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql new file mode 100644 index 000000000000..9325a7662405 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_extend.sql @@ -0,0 +1,58 @@ +-- datatable(Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ] + +DROP TABLE IF EXISTS Ledger; +CREATE TABLE Ledger +( + Supplier Nullable(String), + Fruit String , + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); + +set dialect = 'kusto'; + +print '-- extend #1 --'; +Ledger | extend PriceInCents = 100 * Price | take 2; + +print '-- extend #2 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | take 2; + +print '-- extend #3 --'; +Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; + +print '-- extend #4 --'; +Ledger | extend a = Price | extend b = a | extend c = a, d = b + 500 | extend Pass = bool(b == a and c == a and d == b + 500) | summarize binary_all_and(Pass); + +print '-- extend #5 --'; +Ledger | take 2 | extend strcat(Fruit, ' was purchased from ', Supplier, ' for $', tostring(Price), ' on ', tostring(Purchase)) | extend PriceInCents = 100 * Price; + +print '-- extend #6 --'; +Ledger | extend Price = 100 * Price; + +print '-- extend #7 --'; +print a = 4 | extend a = 5; + +print '-- extend #8 --'; +-- print x = 5 | extend array_sort_desc(range(0, x), range(1, x + 1)) + +print '-- extend #9 --'; +print x = 19 | extend = 4 + ; -- { clientError SYNTAX_ERROR } + +print '-- extend #10 --'; +Ledger | extend PriceInCents = * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; -- { clientError SYNTAX_ERROR } + +print '-- extend #11 --'; +print x = 5 | extend ex = array_sort_desc(dynamic([1, 2]), dynamic([3, 4])); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference new file mode 100644 index 000000000000..6276cd6d8675 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.reference @@ -0,0 +1,7 @@ + -- binary functions +4 7 +1 +1 +1 +7 3 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql new file mode 100644 index 000000000000..824022b564ce --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_binary.sql @@ -0,0 +1,8 @@ +set dialect='kusto'; +print ' -- binary functions'; +print binary_and(4,7), binary_or(4,7); +print binary_shift_left(1, 1) == binary_shift_left(1, 65); +print binary_shift_right(2, 1) == binary_shift_right(2, 65); +print binary_shift_right(binary_shift_left(1, 65), 65) == 1; +print binary_xor(2, 5), bitset_count_ones(42); +print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference new file mode 100644 index 000000000000..62d7b70c4d5f --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.reference @@ -0,0 +1,99 @@ +-- dayofmonth() +31 +-- dayofweek() +4.00:00:00 +2.00:00:00 +4.00:00:00 +-- dayofyear() +365 +-- getmonth() +10 +-- getyear() +2015 +-- hoursofday() +23 +-- startofday() +2017-01-01 00:00:00.000000000 +2016-12-31 00:00:00.000000000 +2017-01-02 00:00:00.000000000 +-- endofday() +2017-01-01 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-01-02 23:59:59.999999900 +-- endofmonth() +2017-01-31 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-02-28 23:59:59.999999900 +2022-09-30 23:59:59.999999900 +-- startofweek() +2017-01-01 00:00:00.000000000 +2016-12-25 00:00:00.000000000 +2017-01-08 00:00:00.000000000 +-- endofweek() +2017-01-07 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2017-01-14 23:59:59.999999900 +-- startofyear() +2017-01-01 00:00:00.000000000 +2016-01-01 00:00:00.000000000 +2018-01-01 00:00:00.000000000 +-- endofyear() +2017-12-31 23:59:59.999999900 +2016-12-31 23:59:59.999999900 +2018-12-31 23:59:59.999999900 +-- unixtime_seconds_todatetime() +2019-01-01 00:00:00.000000000 +1970-01-02 00:00:00.000000000 +1969-12-31 00:00:00.000000000 +-- unixtime_microseconds_todatetime +2019-01-01 00:00:00.000000000 +-- unixtime_milliseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- unixtime_nanoseconds_todatetime() +2019-01-01 00:00:00.000000000 +-- weekofyear() +52 +-- monthofyear() +12 +-- weekofyear() +52 +-- now() +1 +-- make_datetime() +2017-10-01 12:10:00.000000000 +\N +\N +\N +2017-10-01 12:10:00.000000000 +2017-10-01 12:11:00.123456700 +-- format_datetime +15-12-14 02:03:04.1234500 +17-01-29 [09:00:05] +2017-01-29 [09:00:05] +17-01-29 [09:00:05 AM] +-- format_timespan() +2:3:4.1234500 +29.09:00:05:12 +029.9:00:05 [1234500] +05/5-29:29,029.29_9[12]121234512 09 0 00 +-- make_timespan() +01:12:00 +01:12:30 +1.12:30:55.1230000 +-- ago() +-1.00:00:00 +-- datetime_diff() +17 2 13 4 29 2 5 10 +-86400000000700 +86400000000 +100 +-- datetime_part() +2017 4 10 44 30 303 01 02 03 +765 +765432 +765432100 +-- datetime_add() +2018-01-01 00:00:00.000000000 2017-04-01 00:00:00.000000000 2017-02-01 00:00:00.000000000 2017-01-08 00:00:00.000000000 2017-01-02 00:00:00.000000000 2017-01-01 01:00:00.000000000 2017-01-01 00:01:00.000000000 2017-01-01 00:00:01.000000000 +2017-01-01 00:00:00.000000000 +2017-01-01 00:00:00.001000000 +2017-01-01 00:00:00.000001000 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql new file mode 100644 index 000000000000..4aa00b61e6aa --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_datetime.sql @@ -0,0 +1,102 @@ +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +print '-- dayofmonth()'; +print dayofmonth(datetime(2015-12-31)); +print '-- dayofweek()'; +print dayofweek(datetime(2015-12-31)); +print dayofweek(datetime(2015-12-14 18:54:00)) + 1d; +print dayofweek(datetime(2015-12-18 18:54:00)) - dayofweek(datetime(2015-12-14 18:54:00)); +print '-- dayofyear()'; +print dayofyear(datetime(2015-12-31)); +print '-- getmonth()'; +print getmonth(datetime(2015-10-12)); +print '-- getyear()'; +print getyear(datetime(2015-10-12)); +print '-- hoursofday()'; +print hourofday(datetime(2015-12-31 23:59:59.9)); +print '-- startofday()'; +print startofday(datetime(2017-01-01 10:10:17)); +print startofday(datetime(2017-01-01 10:10:17), -1); +print startofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofday()'; +print endofday(datetime(2017-01-01 10:10:17)); +print endofday(datetime(2017-01-01 10:10:17), -1); +print endofday(datetime(2017-01-01 10:10:17), 1); +print '-- endofmonth()'; +print endofmonth(datetime(2017-01-01 10:10:17)); +print endofmonth(datetime(2017-01-01 10:10:17), -1); +print endofmonth(datetime(2017-01-01 10:10:17), 1); +print endofmonth(datetime(2022-09-23)); +print '-- startofweek()'; +print startofweek(datetime(2017-01-01 10:10:17)); +print startofweek(datetime(2017-01-01 10:10:17), -1); +print startofweek(datetime(2017-01-01 10:10:17), 1); +print '-- endofweek()'; +print endofweek(datetime(2017-01-01 10:10:17)); +print endofweek(datetime(2017-01-01 10:10:17), -1); +print endofweek(datetime(2017-01-01 10:10:17), 1); +print '-- startofyear()'; +print startofyear(datetime(2017-01-01 10:10:17)); +print startofyear(datetime(2017-01-01 10:10:17), -1); +print startofyear(datetime(2017-01-01 10:10:17), 1); +print '-- endofyear()'; +print endofyear(datetime(2017-01-01 10:10:17)); +print endofyear(datetime(2017-01-01 10:10:17), -1); +print endofyear(datetime(2017-01-01 10:10:17), 1); +print '-- unixtime_seconds_todatetime()'; +print unixtime_seconds_todatetime(1546300800); +print unixtime_seconds_todatetime(86400); +print unixtime_seconds_todatetime(-86400); +print '-- unixtime_microseconds_todatetime'; +print unixtime_microseconds_todatetime(1546300800000000); +print '-- unixtime_milliseconds_todatetime()'; +print unixtime_milliseconds_todatetime(1546300800000); +print '-- unixtime_nanoseconds_todatetime()'; +print unixtime_nanoseconds_todatetime(1546300800000000000); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- monthofyear()'; +print monthofyear(datetime(2015-12-31)); +print '-- weekofyear()'; +print week_of_year(datetime(2000-01-01)); +print '-- now()'; +print getyear(now(-2d))>1900; +print '-- make_datetime()'; +print make_datetime(2017,10,01,12,10); +print make_datetime(2300,10,01,12,10); +print make_datetime(2020,14,30,12,10); +print make_datetime(2020,10,35,12,10); +print year_month_day_hour_minute = make_datetime(2017,10,01,12,10); +print year_month_day_hour_minute_second = make_datetime(2017,10,01,12,11,0.1234567); +print '-- format_datetime'; +print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s.fffffff'); +print format_datetime(datetime(2017-01-29 09:00:05),'yy-MM-dd [HH:mm:ss]'); +print format_datetime(datetime(2017-01-29 09:00:05), 'yyyy-M-dd [H:mm:ss]'); +print format_datetime(datetime(2017-01-29 09:00:05), 'yy-MM-dd [hh:mm:ss tt]'); +print '-- format_timespan()'; +print format_timespan(time('14.02:03:04.12345'), 'h:m:s.fffffff'); +print format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF'); +print format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]'); +print format_timespan(time('29.09:00:05.12345'), 'ss/s-d:dd,ddd.dd_h[ff]FFfffffFF HH m mm'); +print '-- make_timespan()'; +print make_timespan(1,12); +print make_timespan(1,12,30); +print make_timespan(1,12,30,55.123); +print '-- ago()'; +print ago(1d) - now(); +print '-- datetime_diff()'; +print year = datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31)), quarter = datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30)), month = datetime_diff('month',datetime(2017-01-01),datetime(2015-12-30)), week = datetime_diff('week',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), day = datetime_diff('day',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), hour = datetime_diff('hour',datetime(2017-10-31 01:00),datetime(2017-10-30 23:59)), minute = datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59)), second = datetime_diff('second',datetime(2017-10-30 23:00:10.100),datetime(2017-10-30 23:00:00.900)); +print datetime_diff('nanosecond',datetime(2017-10-29 23:00:00.0000000),datetime(2017-10-30 23:00:00.0000007)); +print datetime_diff('microsecond',datetime(2017-10-30 23:00:00.0000000),datetime(2017-10-29 23:00:00.0000007)); +print datetime_diff('millisecond',datetime(2017-10-30 23:00:00.200100),datetime(2017-10-30 23:00:00.10090)); +print '-- datetime_part()'; +print year = datetime_part("year", datetime(2017-10-30 01:02:03.7654321)),quarter = datetime_part("quarter", datetime(2017-10-30 01:02:03.7654321)),month = datetime_part("month", datetime(2017-10-30 01:02:03.7654321)),weekOfYear = datetime_part("week_of_year", datetime(2017-10-30 01:02:03.7654321)),day = datetime_part("day", datetime(2017-10-30 01:02:03.7654321)),dayOfYear = datetime_part("dayOfYear", datetime(2017-10-30 01:02:03.7654321)),hour = datetime_part("hour", datetime(2017-10-30 01:02:03.7654321)),minute = datetime_part("minute", datetime(2017-10-30 01:02:03.7654321)),second = datetime_part("second", datetime(2017-10-30 01:02:03.7654321)); +print datetime_part("millisecond", datetime(2017-10-30 01:02:03.7654321)); +print datetime_part("microsecond", datetime(2017-10-30 01:02:03.7654321)); +print datetime_part("nanosecond", datetime(2017-10-30 01:02:03.7654321)); +print '-- datetime_add()'; +print year = datetime_add('year',1,make_datetime(2017,1,1)),quarter = datetime_add('quarter',1,make_datetime(2017,1,1)),month = datetime_add('month',1,make_datetime(2017,1,1)),week = datetime_add('week',1,make_datetime(2017,1,1)),day = datetime_add('day',1,make_datetime(2017,1,1)),hour = datetime_add('hour',1,make_datetime(2017,1,1)),minute = datetime_add('minute',1,make_datetime(2017,1,1)),second = datetime_add('second',1,make_datetime(2017,1,1)); +print datetime_add('nanosecond',1,make_datetime(2017,1,1)); +print datetime_add('millisecond',1,make_datetime(2017,1,1)); +print datetime_add('microsecond',1,make_datetime(2017,1,1)); diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.reference b/tests/queries/0_stateless/02366_kql_func_dynamic.reference new file mode 100644 index 000000000000..a459d94adb60 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.reference @@ -0,0 +1,189 @@ +-- constant index value +1 c ['A',NULL,'C'] +-- array_length() +4 +3 +-- array_sum() +10 +11 +1 +\N +0 +4 +-- array_index_of() +3 +1 +-- array_iif() +[1,5,3] +[1,5,3] +[1,5,NULL] +[NULL,NULL,NULL] +[1,NULL] +['1','2',NULL,'2',NULL] +['1','2',NULL,'2',NULL] +['1','2',NULL,'2',NULL] +[1.1,999.99,3.3,999.99,5.5] +[90,3,90,NULL,90] +[1,4,5,8,9] +-- array_concat() +[1,2,3,4,5,6] +-- array_reverse() +[] +[1] +[4,3,2,1] +['example','an','is','this'] +-- array_rotate_left() +[] +[] +[] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +-- array_rotate_right() +[] +[] +[] +[4,5,1,2,3] +[1,2,3,4,5] +[4,5,1,2,3] +[3,4,5,1,2] +[1,2,3,4,5] +[3,4,5,1,2] +-- array_shift_left() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_shift_right() +[] +[] +[] +[3,4,5,NULL,NULL] +[NULL,NULL,1,2,3] +[3,4,5,-1,-1] +['c','',''] +-- array_slice() +[2,3] +[3,4] +-- array_split() +[[1],[2,3],[4,5]] +[[1,2],[3,4,5]] +[[1],[2,3],[4,5]] +[[1,2,3,4],[],[4,5]] +-- array_sort_asc() +(['a','c','c','d',NULL]) +([1,2,3,4]) +['a','b','c'] +(['p','q','r'],['hello','clickhouse','world']) +([NULL,'a','c','c','d']) +([NULL,'a','c','c','d']) +([NULL,NULL,NULL]) +[1,2,3,NULL,NULL] +['a','e','b','c','d'] +(['George','John','Paul','Ringo']) +(['blue','green','yellow',NULL,NULL]) +([NULL,NULL,'blue','green','yellow']) +-- array_sort_desc() +(['d','c','c','a',NULL]) +([4,3,2,1]) +['c','b','a'] +(['r','q','p'],['world','clickhouse','hello']) +([NULL,'d','c','c','a']) +([NULL,'d','c','c','a']) +([NULL,NULL,NULL]) +[3,2,1,NULL,NULL] +['d','c','b','e','a'] +(['Ringo','Paul','John','George']) +(['yellow','green','blue',NULL,NULL]) +([NULL,NULL,'yellow','green','blue']) +-- jaccard_index() +0.75 +0 +0 +nan +0 +0.75 +0.25 +-- pack_array() +1 2 4 [1,2,4] +['ab','0.0.0.42','4.2'] +-- repeat() +[] +[1,1,1] +['asd','asd','asd'] +['1.00:00:00','1.00:00:00','1.00:00:00'] +[true,true,true] +[NULL] +[NULL] +-- set_difference() +[] +[] +[] +[] +[4,5,6] +[4] +[1,3] +[1,2,3] +['d','s'] +['Chewbacca','Han Solo'] +-- set_has_element() +0 +1 +0 +1 +0 +-- set_intersect() +[] +[1,2,3] +[1,2,3] +[] +[5] +[] +['a'] +['Darth Vader'] +-- set_union() +[] +[1,2,3] +[1,2,3,4,5,6] +[1,2,3,4] +[1,2,3,4,5] +[1,2,3] +['a','d','f','s'] +['Chewbacca','Darth Sidious','Darth Vader','Han Solo'] +-- zip() +[] +[[1,2],[3,4],[5,6]] +[['Darth','Vader','has a suit'],['Master','Yoda','doesn\'t have a suit']] +[[1,10],[2,20],[3,NULL]] +[[NULL,1],[NULL,2],[NULL,3]] +-- array_sort in table() +1 (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) +2 (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) +3 (['CA','Eng','Gem','Japan'],[25,11,10,23],[120,0,110,130]) +-- array_sort in table() with condition +1 (['CA','Eng','FR','US'],[11,20,12,16],[100,200,300,500]) +2 (['Eng','FR','Gem','Japan'],[10,33,22,31],[210,310,410,510]) +3 (['CA','Eng','Gem','Japan'],[25,11,10,23],[120,0,110,130]) +-- array_sort as condition +-- array_sort with single alias +1 ['CA','Eng','FR','US'] +2 ['Eng','FR','Gem','Japan'] +3 ['CA','Eng','Gem','Japan'] +1 ['CA','Eng','FR','US'] +2 ['Eng','FR','Gem','Japan'] +3 ['CA','Eng','Gem','Japan'] +-- array_sort with partial alias +1 ['CA','Eng','FR','US'] [11,20,12,16] +2 ['Eng','FR','Gem','Japan'] [10,33,22,31] +3 ['CA','Eng','Gem','Japan'] [25,11,10,23] +-- array_sort with all alias +1 ['CA','Eng','FR','US'] [11,20,12,16] [100,200,300,500] +2 ['Eng','FR','Gem','Japan'] [10,33,22,31] [210,310,410,510] +3 ['CA','Eng','Gem','Japan'] [25,11,10,23] [120,0,110,130] +[[1,2],[1,2],[1,2],[1,2]] diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.sql b/tests/queries/0_stateless/02366_kql_func_dynamic.sql new file mode 100644 index 000000000000..d96b5628b5d1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_dynamic.sql @@ -0,0 +1,199 @@ +DROP TABLE IF EXISTS array_test; +CREATE TABLE array_test (floats Array(Float64), + strings Array(String), + nullable_strings Array(Nullable(String)) + ) ENGINE=Memory; +INSERT INTO array_test VALUES([1.0, 2.5], ['a', 'c'], ['A', NULL, 'C']); +DROP TABLE IF EXISTS visit; +CREATE TABLE visit(pageid UInt8, ip_country Array(Nullable(String)), hit Array(Int64),duration Array(Int64)) ENGINE = Memory; +INSERT INTO visit VALUES (1,['CA', 'US','FR','Eng'], [11,16,12,20],[100,500,300,200]); +INSERT INTO visit VALUES (2,['Japan', 'Gem','FR','Eng'], [31,22,33,10],[510,410,310,210]); +INSERT INTO visit VALUES (3,['CA', 'Gem','Japan','Eng'], [25,10,23,11],[120,110,130,000]); +--INSERT INTO visit VALUES (4,['CA', 'Gem',null,'Eng'], [5,10,3,2],[220,320,310,150]); +--INSERT INTO visit VALUES (5,['FR', null,'US','Eng'], [16,12,23,10],[210,250,110,260]); +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +print '-- constant index value'; +array_test | project floats[0], strings[1], nullable_strings; +print '-- array_length()'; +print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley'])); +print array_length(dynamic([1, 2, 3])); +print array_length(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print array_length('a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print array_length(dynamic(42)); -> NULL +-- print array_length(dynamic('a')); -> NULL +print '-- array_sum()'; +print array_sum(dynamic([2, 5, 3])); +print array_sum(dynamic([2.5, 5.5, 3])); +print array_sum(dynamic([true, false, null])); +print array_sum(dynamic(['Alice', 'Bob'])); +print array_sum(dynamic([null, null, null])); +print array_sum(repeat(1, 4)); +print '-- array_index_of()'; +print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley'); +print array_index_of(dynamic([1, 2, 3]), 2); +print '-- array_iif()'; +print array_iif(dynamic([true,false,true]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([1,0,1]), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,false,true]), dynamic([1,2]), dynamic([4,5,6])); +print array_iif(dynamic(['a','b','c']), dynamic([1,2,3]), dynamic([4,5,6])); +print array_iif(dynamic([true,null]), dynamic([1, 2]), repeat(4, 2)); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([10, 0, 5, 0, -4]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([2.2, 0, 4.4, 0, 66.7]), dynamic(['1', '3']), '2'); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1.1, 2.2, 3.3, 4.4, 5.5]), 999.99); +print t = array_iif(dynamic([true, false, true, false, true]), 90, dynamic([1, 3])); +print t = array_iif(dynamic([true, false, true, false, true]), dynamic([1, 3, 5, 7, 9]), dynamic([2, 4, 6, 8, 10])); +print '-- array_concat()'; +print array_concat(dynamic([1,2,3]),dynamic([4,5,6])); +print '-- array_reverse()'; +print array_reverse(dynamic([])); +print array_reverse(dynamic([1])); +print array_reverse(dynamic([1,2,3,4])); +print array_reverse(dynamic(["this", "is", "an", "example"])); +print '-- array_rotate_left()'; +print array_rotate_left(dynamic([]), 0); +print array_rotate_left(dynamic([]), 500); +print array_rotate_left(dynamic([]), -500); +print array_rotate_left(dynamic([1,2,3,4,5]), 2); +print array_rotate_left(dynamic([1,2,3,4,5]), 5); +print array_rotate_left(dynamic([1,2,3,4,5]), 7); +print array_rotate_left(dynamic([1,2,3,4,5]), -2); +print array_rotate_left(dynamic([1,2,3,4,5]), -5); +print array_rotate_left(dynamic([1,2,3,4,5]), -7); +print '-- array_rotate_right()'; +print array_rotate_right(dynamic([]), 0); +print array_rotate_right(dynamic([]), 500); +print array_rotate_right(dynamic([]), -500); +print array_rotate_right(dynamic([1,2,3,4,5]), 2); +print array_rotate_right(dynamic([1,2,3,4,5]), 5); +print array_rotate_right(dynamic([1,2,3,4,5]), 7); +print array_rotate_right(dynamic([1,2,3,4,5]), -2); +print array_rotate_right(dynamic([1,2,3,4,5]), -5); +print array_rotate_right(dynamic([1,2,3,4,5]), -7); +print '-- array_shift_left()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_left(dynamic([1,2,3,4,5]), 2); +print array_shift_left(dynamic([1,2,3,4,5]), -2); +print array_shift_left(dynamic([1,2,3,4,5]), 2, -1); +print array_shift_left(dynamic(['a', 'b', 'c']), 2); +print '-- array_shift_right()'; +print array_shift_left(dynamic([]), 0); +print array_shift_left(dynamic([]), 555); +print array_shift_left(dynamic([]), -555); +print array_shift_right(dynamic([1,2,3,4,5]), -2); +print array_shift_right(dynamic([1,2,3,4,5]), 2); +print array_shift_right(dynamic([1,2,3,4,5]), -2, -1); +print array_shift_right(dynamic(['a', 'b', 'c']), -2); +print '-- array_slice()'; +print array_slice(dynamic([1,2,3]), 1, 2); +print array_slice(dynamic([1,2,3,4,5]), -3, -2); +print '-- array_split()'; +print array_split(dynamic([1,2,3,4,5]), dynamic([1,-2])); +print array_split(dynamic([1,2,3,4,5]), 2); +print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])); +print array_split(dynamic([1,2,3,4,5]), dynamic([-1,-2])); +print '-- array_sort_asc()'; +print array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_asc(dynamic([4, 1, 3, 2])); +print array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_asc( dynamic([null, null, null]) , false); +print array_sort_asc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_asc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_asc(split("John,Paul,George,Ringo", ",")); +print array_sort_asc(dynamic([null,"blue","yellow","green",null])); +print array_sort_asc(dynamic([null,"blue","yellow","green",null]), false); +print '-- array_sort_desc()'; +print array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c'])); +print array_sort_desc(dynamic([4, 1, 3, 2])); +print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; +print array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false); +print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); +print array_sort_desc( dynamic([null, null, null]) , false); +print array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; +print array_sort_desc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; +print array_sort_desc(split("John,Paul,George,Ringo", ",")); +print array_sort_desc(dynamic([null,"blue","yellow","green",null])); +print array_sort_desc(dynamic([null,"blue","yellow","green",null]), false); +print '-- jaccard_index()'; +print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([])); +print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])); +print jaccard_index(dynamic([]), dynamic([])); +print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])); +print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])); +print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- pack_array()'; +print pack_array(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print x = 1 | extend y = x * 2 | extend z = y * 2 | extend pack_array(x,y,z); +print pack_array(strcat('a', 'b'), format_ipv4(42), tostring(4.2)); +print '-- repeat()'; +print repeat(1, 0); +print repeat(1, 3); +print repeat("asd", 3); +print repeat(timespan(1d), 3); +print repeat(true, 3); +print repeat(1, -3); +print repeat(6.7,-4); +print '-- set_difference()'; +print set_difference(dynamic([]), dynamic([])); +print set_difference(dynamic([]), dynamic([9])); +print set_difference(dynamic([]), dynamic(["asd"])); +print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_difference(dynamic([4]), dynamic([1, 2, 3])); +print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- set_has_element()'; +print set_has_element(dynamic([]), 9); +print set_has_element(dynamic(["this", "is", "an", "example"]), "example"); +print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee"); +print set_has_element(dynamic([1, 2, 3]), 2); +print set_has_element(dynamic([1, 2, 3, 4.2]), 4); +print '-- set_intersect()'; +print set_intersect(dynamic([]), dynamic([])); +print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print set_intersect(dynamic([4]), dynamic([1, 2, 3])); +print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])); +print set_intersect(dynamic([1, 2, 3]), dynamic([])); +print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])); +print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); +print '-- set_union()'; +print set_union(dynamic([]), dynamic([])); +print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[0]; +print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[0]; +print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[0]; +print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; +print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; +print '-- zip()'; +print zip(dynamic([]), dynamic([])); +print zip(dynamic([1,3,5]), dynamic([2,4,6])); +print zip(dynamic(['Darth','Master']), dynamic(['Vader','Yoda']), dynamic(['has a suit','doesn\'t have a suit'])); +print zip(dynamic([1,2,3]), dynamic([10,20])); +print zip(dynamic([]), dynamic([1,2,3])); +print '-- array_sort in table()'; +visit | project pageid, array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort in table() with condition'; +visit | project pageid, array_sort_asc(ip_country, hit, duration, pageid > 4) | order by pageid asc; +print '-- array_sort as condition'; +visit | where isnull(array_sort_asc(ip_country, hit, duration)[2][0]); +print '-- array_sort with single alias'; +visit | project pageid, a = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +visit | project pageid, (a) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort with partial alias'; +visit | project pageid, (a,b) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print '-- array_sort with all alias'; +visit | project pageid, (a,b,c) = array_sort_asc(ip_country, hit, duration) | order by pageid asc; +print zip(repeat(1,4), repeat(2,4)); diff --git a/tests/queries/0_stateless/02366_kql_func_general.reference b/tests/queries/0_stateless/02366_kql_func_general.reference new file mode 100644 index 000000000000..ed3d0041abe6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_general.reference @@ -0,0 +1,83 @@ +-- case +Theodore Diaz Skilled Manual Bachelors 28 C +Stephanie Cox Management Bachelors 33 D +Peter Nara Skilled Manual Graduate Degree 26 C +Latoya Shen Professional Graduate Degree 25 C +Joshua Lee Professional Partial College 26 C +Edward Hernandez Skilled Manual High School 36 D +Dalton Wood Professional Partial College 42 D +Christine Nara Skilled Manual Partial College 33 D +Cameron Rodriguez Professional Partial College 28 C +Angel Stewart Professional Partial College 46 D +-- iff +Theodore Diaz Skilled Manual Bachelors 28 bigger +Stephanie Cox Management Bachelors 33 bigger +Peter Nara Skilled Manual Graduate Degree 26 bigger +Latoya Shen Professional Graduate Degree 25 bigger +Joshua Lee Professional Partial College 26 bigger +Edward Hernandez Skilled Manual High School 36 bigger +Dalton Wood Professional Partial College 42 bigger +Christine Nara Skilled Manual Partial College 33 bigger +Cameron Rodriguez Professional Partial College 28 bigger +Angel Stewart Professional Partial College 46 bigger +-- iif +Theodore Diaz Skilled Manual Bachelors 28 bigger +Stephanie Cox Management Bachelors 33 bigger +Peter Nara Skilled Manual Graduate Degree 26 bigger +Latoya Shen Professional Graduate Degree 25 bigger +Joshua Lee Professional Partial College 26 bigger +Edward Hernandez Skilled Manual High School 36 bigger +Dalton Wood Professional Partial College 42 bigger +Christine Nara Skilled Manual Partial College 33 bigger +Cameron Rodriguez Professional Partial College 28 bigger +Angel Stewart Professional Partial College 46 bigger +-- lookup +First +default +10 First 1 +11 First 2 +12 First 3 +-- gettype +string +int +array +datetime +-- toscalar #1 -- +5 5asd +-- toscalar #2 -- +1 +3 +5 +7 +9 +-- toscalar #3 -- +2 +-- toscalar #4 -- +1 +-- toscalar #5 -- +Stephanie Cox Management Bachelors 33 +-- toscalar #6 -- +Angel Stewart Professional Partial College 46 +Dalton Wood Professional Partial College 42 +-- toscalar #7 -- +2 +-- toscalar #8 -- +10 +-- not -- +false +true +false +false +false +false +false +false +false +false +false +false +false +NULL +NULL +false +true diff --git a/tests/queries/0_stateless/02366_kql_func_general.sql b/tests/queries/0_stateless/02366_kql_func_general.sql new file mode 100644 index 000000000000..b7f17bbd65ae --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_general.sql @@ -0,0 +1,94 @@ +-- let Customers = datatable (FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46 +-- ]; + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +DROP TABLE IF EXISTS dictionary_source_table; +CREATE TABLE dictionary_source_table +( + key String, + start_range UInt64, + end_range UInt64, + value String, + value_nullable Nullable(String) +) +ENGINE = Memory; +INSERT INTO dictionary_source_table VALUES('1', 10, 20, 'First', 'First'), ('2', 11, 21, 'Second', NULL), ('3', 12, 22, 'Third', 'Third'); + +CREATE DICTIONARY dictionary_table +( + key String, + start_range UInt64, + end_range UInt64, + value String, + value_nullable Nullable(String) +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'dictionary_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(FLAT()); + +set dialect='kusto'; + +print '-- case'; +Customers | extend t = case(Age <= 10, "A", Age <= 20, "B", Age <= 30, "C", "D"); +print '-- iff'; +Customers | extend t = iff(Age <= 10, "smaller", "bigger"); +print '-- iif'; +Customers | extend t = iif(Age <= 10, "smaller", "bigger"); +print '-- lookup'; +print lookup('dictionary_table', 'value', '1'); +print lookup('dictionary_table', 'value', '100', 'default'); +dictionary_source_table | project start_range, t = lookup('dictionary_table', 'value', '1'), key; +print '-- gettype'; +Customers | project t = gettype(FirstName) | limit 1; +Customers | project t = gettype(Age) | limit 1; +print t = gettype(range(1, 10)); +print t = gettype(todatetime('2023-09-08')); + +print '-- toscalar #1 --'; +print x = 5 | extend a = toscalar(print 5, 'asd' | project y = strcat(print_0, print_1)); +print '-- toscalar #2 --'; +range z from toscalar(print x=1) to toscalar(range x from 1 to 9 step 1 | count) step toscalar(2); +print '-- toscalar #3 --'; +range x from 1 to 2 step 1 | extend x=toscalar(print new_guid()), y=new_guid() | count; +print '-- toscalar #4 --'; +range x from 1 to 2 step 1 | extend x=toscalar(new_guid()), y=new_guid() | distinct x | count; +print '-- toscalar #5 --'; +Customers | where FirstName == toscalar(Customers | where Age > 30 | order by Age asc, FirstName); +print '-- toscalar #6 --'; +Customers | order by Age | limit toscalar(Customers | where Age == 33 | count); +print '-- toscalar #7 --'; +Customers | where Age == toscalar(print 33, 'asd') | count; +print '-- toscalar #8 --'; +Customers | limit toscalar(Customers | where Age > toscalar(toscalar(print 5, 'asd')) | count) | count; + +print '-- not --'; +print t = not(1); +print t = not(0); +print t = not(strlen('abc')); +Customers | project not(Age); +print t = not('hello'); +print t = not(dynamic([1, 2, 3])); +print t = not(true); +print t = not(false); diff --git a/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.reference b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.reference new file mode 100644 index 000000000000..aaaf086a3c67 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.reference @@ -0,0 +1,48 @@ +-- #1 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #2 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #3 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 1 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #4 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #5 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #6 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 1 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 1 +-- #7 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404 10.0.0.300 0 +09:46:0010.0.0.1 GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1GET /favicon.ico 404 10.0.0.1 0 +09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404 192.168.1.1 0 +-- #8 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.4','2.3.4.5','10.0.0.1'] 1 +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.','2.3.4.','10.0.0.'] 0 +-- #9 -- +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.4','2.3.4.5','10.0.0.1'] 1 +09:46:00 10.0.0.1 GET /favicon.ico 404 ['1.2.3.','2.3.4.','10.0.0.'] 1 diff --git a/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.sql b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.sql new file mode 100644 index 000000000000..214281cb0dea --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_has_any_ipv4.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS IP_STRING; +DROP TABLE IF EXISTS IP_ARRAY; + +CREATE TABLE IP_STRING (haystack String, needle String) ENGINE = Memory; +CREATE TABLE IP_ARRAY (haystack String, needle Array(String)) ENGINE = Memory; +INSERT INTO IP_STRING (haystack, needle) VALUES ('09:46:00 10.0.0.1 GET /favicon.ico 404', '10.0.0.1'), ('09:46:00 10.0.0.300 1.2.3.4 GET /favicon.ico 404', '10.0.0.300'), ('09:46:0010.0.0.1 GET /favicon.ico 404', '10.0.0.1'), ('09:46:00 10.0.0.1GET /favicon.ico 404', '10.0.0.1'), ('09:46:00 10.0.0.1 192.168.1.1 GET /favicon.ico 404', '192.168.1.1'); +INSERT INTO IP_ARRAY (haystack, needle) VALUES ('09:46:00 10.0.0.1 GET /favicon.ico 404', ['1.2.3.4', '2.3.4.5', '10.0.0.1']), ('09:46:00 10.0.0.1 GET /favicon.ico 404', ['1.2.3.', '2.3.4.', '10.0.0.']); + +set dialect='kusto'; +print has_ipv4('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_any_ipv4('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_ipv4_prefix('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_any_ipv4_prefix('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +print has_ipv4(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_ipv4_prefix(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4_prefix(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +print has_ipv4('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_ipv4_prefix('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv4_prefix('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +print "-- #1 --"; +IP_STRING | extend has_ipv4(haystack, needle); +print "-- #2 --"; +IP_STRING | extend has_any_ipv4(haystack, needle); +print "-- #3 --"; +IP_STRING | extend has_any_ipv4(haystack, needle, '1.2.3.4'); +print "-- #4 --"; +IP_STRING | extend has_any_ipv4(haystack, 'X', needle); +print "-- #5 --"; +IP_STRING | extend has_ipv4_prefix(haystack, needle); +print "-- #6 --"; +IP_STRING | extend has_ipv4_prefix(haystack, substring(needle, 0, strlen(needle)-1)); +print "-- #7 --"; +IP_STRING | extend has_ipv4_prefix(haystack, substring(needle, 0, strlen(needle)-2)); +print "-- #8 --"; +IP_ARRAY | extend has_any_ipv4(haystack, dynamic(needle)); +print "-- #9 --"; +IP_ARRAY | extend has_any_ipv4_prefix(haystack, dynamic(needle)); +set dialect='clickhouse'; +DROP TABLE IP_STRING; +DROP TABLE IP_ARRAY; + diff --git a/tests/queries/0_stateless/02366_kql_func_has_any_ipv6.reference b/tests/queries/0_stateless/02366_kql_func_has_any_ipv6.reference new file mode 100644 index 000000000000..f84f7dcff20d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_has_any_ipv6.reference @@ -0,0 +1,48 @@ +-- #1 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +-- #2 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +-- #3 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +-- #4 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +-- #5 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +-- #6 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 1 +-- #7 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404 2600:1404:6400:1695:0:0:0:1e89 0 +-- #8 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 ['2500:1404:6400:1695:0:0:0:1e89','2600:1404:6400:1695:0:0:0:1e89'] 1 +09:46:00 2600:1404:6400:1695:0:0:0:1e89 GET /favicon.ico 404 ['2400:1404:6400:1695:','2500:1404:6400:1695','2600:1404:6400:1695:'] 0 +-- #9 -- +09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404 ['2500:1404:6400:1695:0:0:0:1e89','2600:1404:6400:1695:0:0:0:1e89'] 1 +09:46:00 2600:1404:6400:1695:0:0:0:1e89 GET /favicon.ico 404 ['2400:1404:6400:1695:','2500:1404:6400:1695','2600:1404:6400:1695:'] 1 diff --git a/tests/queries/0_stateless/02366_kql_func_has_any_ipv6.sql b/tests/queries/0_stateless/02366_kql_func_has_any_ipv6.sql new file mode 100644 index 000000000000..1cfcd438155b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_has_any_ipv6.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS IP_STRING; +DROP TABLE IF EXISTS IP_ARRAY; + +CREATE TABLE IP_STRING (haystack String, needle String) ENGINE = Memory; +CREATE TABLE IP_ARRAY (haystack String, needle Array(String)) ENGINE = Memory; +INSERT INTO IP_STRING (haystack, needle) VALUES ('09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404', '2600:1404:6400:1695:0:0:0:1e89'), ('09:46:00 ::ffff:1.2.3.4 GET /favicon.ico 404', '2600:1404:6400:1695:0:0:0:1e89'), ('09:46:002600:1404:6400:1695::1e89 GET /favicon.ico 404', '2600:1404:6400:1695:0:0:0:1e89'), ('09:46:00 2600:1404:6400:1695::1e89GET /favicon.ico 404', '2600:1404:6400:1695:0:0:0:1e89'), ('09:46:00 2600:1404:6400:168a::1e89 2600:1404:6400:1695::1e89 GET /favicon.ico 404', '2600:1404:6400:1695:0:0:0:1e89'); +INSERT INTO IP_ARRAY (haystack, needle) VALUES ('09:46:00 2600:1404:6400:1695::1e89 GET /favicon.ico 404', ['2500:1404:6400:1695:0:0:0:1e89', '2600:1404:6400:1695:0:0:0:1e89']), ('09:46:00 2600:1404:6400:1695:0:0:0:1e89 GET /favicon.ico 404', ['2400:1404:6400:1695:', '2500:1404:6400:1695', '2600:1404:6400:1695:']); + +set dialect='kusto'; +print has_ipv6('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_any_ipv6('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_ipv6_prefix('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print has_any_ipv6_prefix('X'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +print has_ipv6(1, 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv6(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_ipv6_prefix(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv6_prefix(1,2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +print has_ipv6('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv6('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_ipv6_prefix('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print has_any_ipv6_prefix('X', 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +print "-- #1 --"; +IP_STRING | extend has_ipv6(haystack, needle); +print "-- #2 --"; +IP_STRING | extend has_any_ipv6(haystack, needle); +print "-- #3 --"; +IP_STRING | extend has_any_ipv6(haystack, needle, '0:0:0:0:0:ffff:1.2.4.5'); +print "-- #4 --"; +IP_STRING | extend has_any_ipv6(haystack, 'X', needle); +print "-- #5 --"; +IP_STRING | extend has_ipv6_prefix(haystack, needle); +print "-- #6 --"; +IP_STRING | extend has_ipv6_prefix(haystack, substring(needle, 0, strlen(needle)-4)); +print "-- #7 --"; +IP_STRING | extend has_ipv6_prefix(haystack, substring(needle, 0, strlen(needle)-5)); +print "-- #8 --"; +IP_ARRAY | extend has_any_ipv6(haystack, dynamic(needle)); +print "-- #9 --"; +IP_ARRAY | extend has_any_ipv6_prefix(haystack, dynamic(needle)); +set dialect='clickhouse'; +DROP TABLE IP_STRING; +DROP TABLE IP_ARRAY; + diff --git a/tests/queries/0_stateless/02366_kql_func_hash.reference b/tests/queries/0_stateless/02366_kql_func_hash.reference new file mode 100644 index 000000000000..05f4c6121eae --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_hash.reference @@ -0,0 +1,17 @@ + -- hash -- +1846988464401551951 +51 +1380966698541616202 +-8804195676797548855 +9185342943168159635 +-8804195676797548855 +-6832119211494701554 +61 +61 +61 + -- hash_sha256 -- +78ae647dc5544d227130a0682a51e30bc7777fbb6d8a8f17007463a3ecd1d524 +ba666752dc1a20eb750b0eb64e780cc4c968bc9fb8813461c1d7e750f302d71d +6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b +\N +1bad6b8cf97131fceab8543e81f7757195fbb1d36b376ee994ad1cf17699c464 diff --git a/tests/queries/0_stateless/02366_kql_func_hash.sql b/tests/queries/0_stateless/02366_kql_func_hash.sql new file mode 100644 index 000000000000..44fcd29f543d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_hash.sql @@ -0,0 +1,20 @@ +set dialect='kusto'; +print ' -- hash --'; +print hash('World'); +print hash('World', 100); +print hash(datetime("2015-01-01")); +print hash(-1); +print hash(int(-1)); +print hash(long(-1)); +print hash(real(-1)); +print hash(-1, 100); +print hash(-1, -1); -- { serverError ARGUMENT_OUT_OF_BOUND } +print hash(-1, 'World'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print hash(-1, long(100)); +print hash(-1, int(100)); +print ' -- hash_sha256 --'; +print hash_sha256('World'); +print hash_sha256(datetime(2020-01-01)); +print hash_sha256(1); +print hash_sha256(''); +print hash_sha256(-1); diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference new file mode 100644 index 000000000000..fdba4622c9a9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.reference @@ -0,0 +1,122 @@ +-- ipv4_is_private(\'127.0.0.1\') +0 +-- ipv4_is_private(\'10.1.2.3\') +1 +-- ipv4_is_private(\'192.168.1.1/24\') +1 +ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) +1 +-- ipv4_is_private(\'abc\') +\N +-- ipv4_netmask_suffix(\'192.168.1.1/24\') +24 +-- ipv4_netmask_suffix(\'192.168.1.1\') +32 +-- ipv4_netmask_suffix(\'127.0.0.1/16\') +16 +-- ipv4_netmask_suffix(\'abc\') +\N +ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) +16 +-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') +1 +-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') +0 +-- ipv4_is_in_range(\'abc\', \'127.0.0.1\') +\N +-- parse_ipv6(127.0.0.1) +0000:0000:0000:0000:0000:ffff:7f00:0001 +-- parse_ipv6(fe80::85d:e82c:9446:7994) +fe80:0000:0000:0000:085d:e82c:9446:7994 +-- parse_ipv4(\'127.0.0.1\') +2130706433 +-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') +1 +-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\')) +-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 +1 +-- parse_ipv4_mask(\'abc\', 31) +\N +\N +-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) +1 +-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') +0 +-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') +1 +-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) +1 +-- ipv4_is_match(\'abc\', \'def\', 24) +\N +-- ipv4_compare() +0 +-1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +-- format_ipv4() +192.168.1.0 +192.168.1.1 +192.168.1.0 +192.168.1.0 +1 +1 +127.0.0.0 +-- format_ipv4_mask() +192.168.1.0/24 +192.168.1.0/24 +192.168.1.0/24 +192.168.1.1/32 +192.168.1.0/24 +1 +1 +127.0.0.0/24 +-- parse_ipv6_mask() +0000:0000:0000:0000:0000:0000:0000:0000 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +0000:0000:0000:0000:0000:ffff:ffff:ffff +fe80:0000:0000:0000:085d:e82c:9446:7994 +fe80:0000:0000:0000:085d:e82c:9446:7900 +0000:0000:0000:0000:0000:ffff:c0a8:ffff +0000:0000:0000:0000:0000:ffff:c0a8:ff00 +-- ipv6_is_match() +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql new file mode 100644 index 000000000000..8123bd6a3d11 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_ip.sql @@ -0,0 +1,129 @@ +set dialect='kusto'; +print '-- ipv4_is_private(\'127.0.0.1\')'; +print ipv4_is_private('127.0.0.1'); +print '-- ipv4_is_private(\'10.1.2.3\')'; +print ipv4_is_private('10.1.2.3'); +print '-- ipv4_is_private(\'192.168.1.1/24\')'; +print ipv4_is_private('192.168.1.1/24'); +print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; +print ipv4_is_private(strcat('192.','168.','1.','1','/24')); +print '-- ipv4_is_private(\'abc\')'; +print ipv4_is_private('abc'); -- == null + +print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; +print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 +print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; +print ipv4_netmask_suffix('192.168.1.1'); -- == 32 +print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; +print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 +print '-- ipv4_netmask_suffix(\'abc\')'; +print ipv4_netmask_suffix('abc'); -- == null +print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; +print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 + +print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; +print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true +print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; +print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; +print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false +print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null +print ipv4_is_in_range('abc', '127.0.0.1'); + +print '-- parse_ipv6(127.0.0.1)'; +print parse_ipv6('127.0.0.1'); +print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; +print parse_ipv6('fe80::85d:e82c:9446:7994'); +print '-- parse_ipv4(\'127.0.0.1\')'; +print parse_ipv4('127.0.0.1'); +print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; +print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); +print '-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\'))'; +print parse_ipv4(arrayStringConcat(['127', '0', '0', '1'], '.')); -- { clientError UNKNOWN_FUNCTION } + +print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; +print parse_ipv4_mask('127.0.0.1', 24) == 2130706432; +print '-- parse_ipv4_mask(\'abc\', 31)'; +print parse_ipv4_mask('abc', 31) +print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; +print parse_ipv4_mask('192.1.168.2', 1000); +print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; +print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); +print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; +print ipv4_is_match('127.0.0.1', '127.0.0.1'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; +print ipv4_is_match('192.168.1.1', '192.168.1.255'); +print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; +print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); +print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; +print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); +print '-- ipv4_is_match(\'abc\', \'def\', 24)'; +print ipv4_is_match('abc', 'dev', 24); +print '-- ipv4_compare()'; +print ipv4_compare('127.0.0.1', '127.0.0.1'); +print ipv4_compare('192.168.1.1', '192.168.1.255'); +print ipv4_compare('192.168.1.255', '192.168.1.1'); +print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print ipv4_compare('192.168.1.1/24', '192.168.1.255'); +print ipv4_compare('192.168.1.1', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); +print ipv4_compare('192.168.1.1', '192.168.1.0', 31); +print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); +print ipv4_compare('192.168.1.1', '192.168.1.255', 24); +print '-- format_ipv4()'; +print format_ipv4('192.168.1.255', 24); +print format_ipv4('192.168.1.1', 32); +print format_ipv4('192.168.1.1/24', 32); +print format_ipv4(3232236031, 24); +print format_ipv4('192.168.1.1/24', -1) == ''; +print format_ipv4('abc', 24) == ''; +print format_ipv4(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- format_ipv4_mask()'; +print format_ipv4_mask('192.168.1.255', 24); +print format_ipv4_mask(3232236031, 24); +print format_ipv4_mask('192.168.1.1', 24); +print format_ipv4_mask('192.168.1.1', 32); +print format_ipv4_mask('192.168.1.1/24', 32); +print format_ipv4_mask('192.168.1.1/24', -1) == ''; +print format_ipv4_mask('abc', 24) == ''; +print format_ipv4_mask(strcat('127.0', '.0.', '1', '/32'), 12 + 12); +print '-- parse_ipv6_mask()'; +print parse_ipv6_mask("127.0.0.1", 24); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120); +print parse_ipv6_mask("192.168.255.255", 120); +print parse_ipv6_mask("192.168.255.255/24", 124); +print parse_ipv6_mask("255.255.255.255", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128); +print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124); +print parse_ipv6_mask("::192.168.255.255", 128); +print parse_ipv6_mask("::192.168.255.255/24", 128); +print '-- ipv6_is_match()'; +print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; +print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; +print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison +print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison +print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_math.reference b/tests/queries/0_stateless/02366_kql_func_math.reference new file mode 100644 index 000000000000..abcf6931d42d --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.reference @@ -0,0 +1,100 @@ +-- isnan -- +1 +0 +0 +-- abs -- +5 +-- ceiling -- +-1 +0 +1 +-- exp -- +7.389056098924109 +1.6487212707014907 +0.3678794411711252 +-- exp2 -- +4 +1.4142135623730951 +0.5 +-- exp10 -- +1000 +3.162277660168379 +0.001 +-- log -- +1.6094379132876024 +-0.6931471805599453 +nan +-- log2 -- +2.321928094887362 +-1 +nan +-- log10 -- +0.6989700043360189 +-0.3010299956639812 +nan +-- pow -- +8 +0.7071067811865476 +-1 +-- sqrt -- +16 +nan +0.7071067811865476 +-- acos -- +0 +2.037561665842193 +-- asin -- +1.5707963267948966 +0.5235987755982989 +-- atan -- +0.7853981633974483 +0.4636476090008061 +-- atan2 -- +2.356194490192345 +-0.7853981633974483 +-- cos -- +0.5403023058681398 +0.9004471023526769 +-- cot -- +0.6420926159343306 +-2.0701573613012125 +inf +-- degrees -- +45 +-- gamma -- +1 +-3.591387263852389 +-- isfinite -- +0 +-- isinf -- +1 +-- loggamma -- +3.1780538303479458 +1.2785385523288975 +-- max_of -- +17 +test +-- min_of -- +-3 +abc +-- pi -- +3.141592653589793 +-- radians -- +1.5707963267948966 +3.141592653589793 +6.283185307179586 +-- rand -- +1 +1 +-- round -- +2.2 +-- sign -- +-1 +0 +1 +-- sin -- +0.8414709848078965 +-0.43496553411123023 +-- tan -- +1.5574077246549023 +-0.4830550656165784 diff --git a/tests/queries/0_stateless/02366_kql_func_math.sql b/tests/queries/0_stateless/02366_kql_func_math.sql new file mode 100644 index 000000000000..e5f3585ec8f7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_math.sql @@ -0,0 +1,135 @@ +set dialect = 'kusto'; +print '-- isnan --'; +print isnan(double(nan)); +print isnan(4.2); +print isnan(4); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print isnan(real(+inf)); +print isnan(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- abs --'; +print abs(-5); +print abs('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print abs(1d); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- ceiling --'; +print ceiling(-1.1); +print ceiling(0); +print ceiling(0.9); +print ceiling('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- exp --'; +print exp(2); +print exp(0.5); +print exp(-1); +print exp('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- exp2 --'; +print exp2(2); +print exp2(0.5); +print exp2(-1); +print exp2('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- exp10 --'; +print exp10(3); +print exp10(0.5); +print exp10(-3); +print exp10('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- log --'; +print log(5); +print log(0.5); +print log(-5); +print log('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- log2 --'; +print log2(5); +print log2(0.5); +print log2(-5); +print log2('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- log10 --'; +print log10(5); +print log10(0.5); +print log10(-5); +print log10('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- pow --'; +print pow(2, 3); +print pow(0.5, 0.5); +print pow(-1, -1); +print pow('test', 'test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- sqrt --'; +print sqrt(256); +print sqrt(-1); +print sqrt(0.5); +print sqrt('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- acos --'; +print acos(1); +print acos(-0.45); +print acos('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- asin --'; +print asin(1); +print asin(0.5); +print asin('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- atan --'; +print atan(1); +print atan(0.5); +print atan('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- atan2 --'; +print atan2(1, -1); +print atan2(-0.5, 0.5); +print atan2('test', 'test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- cos --'; +print cos(1); +print cos(-0.45); +print cos('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- cot --'; +print cot(1); +print cot(-0.45); +print cot(0); +print cot('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- degrees --'; +print degrees(pi()/4); +print degrees('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- gamma --'; +print gamma(1); +print gamma(-0.45); +print gamma('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- isfinite --'; +print isfinite(1.0/0.0); +print isfinite('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- isinf --'; +print isinf(1.0/0.0); +print isinf('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- loggamma --'; +print loggamma(5); +print loggamma(-0.45); +print loggamma('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- max_of --'; +print max_of(10, 1, -3, 17); +print max_of('test', 'abc'); +print max_of(1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print max_of(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print '-- min_of --'; +print min_of(10, 1, -3, 17); +print min_of('test', 'abc'); +print min_of(1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print min_of(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print '-- pi --'; +print pi(); +print pi('any'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +print '-- radians --'; +print radians(90); +print radians(180); +print radians(360); +print radians('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- rand --'; +print x = rand() | project x >= 0 and x <= 1; +print x = rand(1234) | project x >= 0 and x <= 1233; +print '-- round --'; +print round(2.15, 1); +print round('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- sign --'; +print sign(-42); +print sign(0); +print sign(11.2); +print sign('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- sin --'; +print sin(1); +print sin(-0.45); +print sin('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- tan --'; +print tan(1); +print tan(-0.45); +print tan('test'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.reference b/tests/queries/0_stateless/02366_kql_func_scalar.reference new file mode 100644 index 000000000000..b3df8bca60e1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.reference @@ -0,0 +1,27 @@ +-- bin() +4 +3 +1970-05-11 00:00:00.000000000 +14.00:00:00 +1970-05-11 13:45:07.345000000 +2022-09-26 10:13:23.982000000 +1970-05-11 13:45:07.345623000 +2022-09-26 10:13:23.987232000 +1970-05-11 13:45:07.456336000 +1970-05-11 13:45:07.456345700 +2022-09-26 10:13:23.987234100 +2022-09-26 10:13:23.987234100 +\N +25 1 +26 1 +28 2 +33 1 +38 1 +-- bin_at() +4.5 +-12:00:00 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 +2018-02-26 15:14:00.000000000 5 +2018-02-24 15:14:00.000000000 3 +2018-02-23 15:14:00.000000000 4 diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.sql b/tests/queries/0_stateless/02366_kql_func_scalar.sql new file mode 100644 index 000000000000..996fab5764f3 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_scalar.sql @@ -0,0 +1,50 @@ +-- datatable (Date:datetime, Num:int) [ +-- '2018-02-24T15:14:00', 3, +-- '2018-02-23T16:14:00', 4, +-- '2018-02-26T15:14:00', 5 +-- ] | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; + +DROP TABLE IF EXISTS Bin_at_test; +CREATE TABLE Bin_at_test +( + `Date` DateTime64(9, 'UTC'), + Num Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Bin_at_test VALUES ('2018-02-24T15:14:00', 3), ('2018-02-23T16:14:00', 4), ('2018-02-26T15:14:00', 5); + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +print '-- bin()'; +print bin(4.5, 1); +print floor(4.5, 3); +print bin(datetime(1970-05-11 13:45:07), 1d); +print bin(16d, 7d); +print bin(datetime(1970-05-11 13:45:07.345623), 1ms); +print bin(datetime(2022-09-26 10:13:23.987234), 6ms); +print bin(datetime(1970-05-11 13:45:07.345623), 1microsecond); +print bin(datetime(2022-09-26 10:13:23.987234), 6microseconds); +print bin(datetime(1970-05-11 13:45:07.456345672), 16microseconds); +print bin(datetime('1970-05-11 13:45:07.456345672'), 1tick); +print bin(datetime(2022-09-26 10:13:23.987234128), 100nanosecond); +print bin(datetime(2022-09-26 10:13:23.987234128), 1tick); +print bin(datetime(2022-09-26 10:13:23.987234128), 99nanosecond); +Customers | summarize count() by bin(Age, Age) | sort by Age asc; + +print '-- bin_at()'; +print bin_at(6.5, 2.5, 7); +print bin_at(1h, 1d, 12h); +print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0)); +print bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0)); +Bin_at_test | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference new file mode 100644 index 000000000000..46a6b299a39b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.reference @@ -0,0 +1,522 @@ +-- test String Functions -- +-- Customers |where Education contains \'degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers |where Education contains \'Degree\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers |where Education !contains \'Degree\' +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName endswith \'RE\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where ! FirstName endswith \'RE\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +--Customers | where FirstName endswith_cs \'re\' +Theodore Diaz Skilled Manual Bachelors 28 + +-- Customers | where FirstName !endswith_cs \'re\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation == \'Skilled Manual\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation != \'Skilled Manual\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation has \'Skilled\' +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation !has \'Skilled\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hasprefix_cs \'Ab\' + +-- Customers | where Occupation !hasprefix_cs \'Ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hasprefix_cs \'ab\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hasprefix_cs \'ab\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'Ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation !hassuffix \'Ent\' +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers | where Occupation hassuffix \'ent\' +Stephanie Cox Management abcd defg Bachelors 33 + +-- Customers |where Education in (\'Bachelors\',\'High School\') +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where Education !in (\'Bachelors\',\'High School\') +\N why Professional Partial College 38 +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName matches regex \'P.*r\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName startswith \'pet\' +Peter Nara Skilled Manual Graduate Degree 26 + +-- Customers | where FirstName !startswith \'pet\' +Latoya Shen Professional Graduate Degree 25 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where FirstName startswith_cs \'pet\' + +-- Customers | where FirstName !startswith_cs \'pet\' +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isempty(LastName) +Apple Skilled Manual Bachelors 28 + +-- print isempty(1.2345) +0 + +-- Customers | where isnotempty(LastName) +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +\N why Professional Partial College 38 + +-- print isnotempty(1.2345) +1 + +-- Customers | where isnotnull(FirstName) +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 + +-- Customers | where isnull(FirstName) +\N why Professional Partial College 38 + +-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 +https://www.test.com/hello word + +-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 +https%3A%2F%2Fwww.test.com%2Fhello%20word + +-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) + y +Lat en +Pet ra +The az +Ste x +App + +-- Customers | project name = strcat(FirstName, \' \', LastName) + why +Latoya Shen +Peter Nara +Theodore Diaz +Stephanie Cox +Apple + +-- Customers | project FirstName, strlen(FirstName) +\N \N +Latoya 6 +Peter 5 +Theodore 8 +Stephanie 9 +Apple 5 + +-- Customers | project strrep(FirstName,2,\'_\') +_ +Latoya_Latoya +Peter_Peter +Theodore_Theodore +Stephanie_Stephanie +Apple_Apple + +--print from_str = strrep("ABC", 2) +ABCABC +--print from_int = strrep(123,3,".") +123.123.123 +--print from_time = strrep(3s,2," ") +00:00:03 00:00:03 + +-- Customers | project toupper(FirstName) +\N +LATOYA +PETER +THEODORE +STEPHANIE +APPLE + +-- Customers | project tolower(FirstName) +\N +latoya +peter +theodore +stephanie +apple + + +-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 + +-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 + +-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) +3 +3 +1 +3 +3 +2 +2 +1 +2 + +-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) +PINEAPPLE ice cream is 20 +PINEAPPLE +20 + +20 +\N +\N +\N +\N +\N +45.6 +45.6 +alert + +-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction) +[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] +[['8','d'],['d','7'],['4','1'],['8','3'],['2','9']] + +-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) + + +John +iPhone +\N +26 +26 +26 +true +26 +\N + +-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) +['aa','bb'] +['bbb'] +[''] +['a','','b'] +['aa','cc'] +['aabbcc'] +['aaa','bbb','ccc'] +[NULL] + +-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. +1-2-Ab +1-2-A-00:00:01 +1-2-A-55 +1-2-A-7.99 +qqqqq fffffff \'asd bcd\' "moo moo " + +-- base64_encode_fromguid() +8jMxriJurkmwahbmqbIS6w== +-- base64_decode_toarray() +[NULL] +[75,117,115,116,111] +[NULL] +-- base64_decode_toguid() +10e99626-bc2b-4c75-bb3e-fe606de25700 +1 +-- base64_encode_tostring + +S3VzdG8x +-- base64_decode_tostring + +Kusto1 +\N +\N +-- parse_url() same as ADX +{"Scheme":"scheme","Host":"host","Port":"1234","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} +{"Scheme":"","Host":"","Port":"","Path":"","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"2001:db8:3333:4444:5555:6666:7777:8888","Port":"1234","Path":"/filepath/index.htm","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"host","Port":"1234","Path":"","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"","Port":"","Path":"/this/is/a/path/index.htm","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"","Port":"","Path":"","Username":"","Password":"","Query Parameters":{},"Fragment":"fragment"} +{"Scheme":"http","Host":"host","Port":"abcd","Path":"","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"host","Port":"","Path":"/filepath","Username":"","Password":"","Query Parameters":{"arg":":bogus@some"},"Fragment":""} +{"Scheme":"http","Host":"","Port":"","Path":"","Username":"username","Password":"password","Query Parameters":{},"Fragment":""} +-- parse_url() differs from ADX +{"Scheme":"http","Host":"host","Port":"1234","Path":"/","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"","Port":"1234","Path":"/","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"","Port":"","Path":"","Username":"","Password":"","Query Parameters":{"arg":"value"},"Fragment":""} +{"Scheme":"http","Host":"host","Port":"1234","Path":"","Username":"","Password":"","Query Parameters":{"arg":"value"},"Fragment":""} +{"Scheme":"http","Host":"","Port":"","Path":"/","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"","Port":"","Path":"/filepath","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"","Port":"port:","Path":"/anything","Username":"","Password":"","Query Parameters":{"arg":"value"},"Fragment":""} +{"Scheme":"http","Host":"","Port":"port:port","Path":"/anything","Username":"","Password":"","Query Parameters":{"arg":"value"},"Fragment":""} +{"Scheme":"http","Host":"host","Port":"","Path":"/","Username":"","Password":"","Query Parameters":{},"Fragment":""} +-- parse_url() invalid IPV6 +{"Scheme":"http","Host":"2001","Port":"db8:3333:4444:5555:6666:7777:8888:1234","Path":"/filepath/index.htm","Username":"","Password":"","Query Parameters":{},"Fragment":""} +{"Scheme":"http","Host":"2001","Port":"db8:3333:4444:5555:6666:7777:8888","Path":"/filepath/index.htm","Username":"","Password":"","Query Parameters":{},"Fragment":""} +-- parse_urlquery() +{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} +-- strcat -- +a1235.00:00:00 +a111.01:00:00 +ab\'c +-- strcmp() +0 1 -1 1 +-- substring() +CD +-- translate() +kusto xxx +-- trim() +https://www.ibm.com +Te st1 + asd +asd +sd +-- trim_start() +www.ibm.com +Te st1// $ +asdw + +asd +-- trim_end() +https +- Te st1 +wasd + +asd +-- trim, trim_start, trim_end all at once +--https://bing.com-- -- https://bing.com-- --https://bing.com https://bing.com +-- replace_regex +Number was: 1 +-- has_any_index() +0 1 -1 -1 +-- parse_version() +1000000020000000300000040 +1000000020000000000000000 +1000000020000000000000000 +\N +\N +\N +\N +\N +\N +1000000020000000300000004 +1000000020000000000000000 +1000000020000000300000000 +1000000000000000000000000 +-- parse_json() +[1,2,3] +[{"a":123.5,"b":"{\\"c\\":456}"}] +-- parse_command_line() +[NULL] +[NULL] +-- reverse() +321 +43.321 + +dsa +][ +]3,2,1[ +]\'redaV\',\'htraD\'[ +Z0000000.00:00:21T51-01-7102 +00:00:30 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +-- parse_csv() +[''] +['aaa'] +['aa','b','cc'] +['record1','a','b','c'] +-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction) +9 +2 +2 +2 +-1 +2 +4 +2 +9 +-1 +-1 +-1 +9 +2 +1 +-- indexof_regex -- +0 +3 +-1 +-1 +\N +2 +\N +-1 +2 +\N +-1 +-1 +2 +2 +2 +\N +7 +-1 +5 +6 +6 +3 +1 +34 +0 +0 +-- indexof_regex tabular #1 -- +-1 +-1 +0 +-1 +-1 +-1 +-- has -- +1 +0 +1 +0 +0 +1 +1 +0 +1 +1 +0 + +asdf +asdf.ghkj +asdf.qwer + +asdf.qwer + +asdf.qwer +qwer +-- !has -- +asdfghkj +qwer +qwerqwer + +asdf +asdf.ghkj +asdfghkj +qwer +qwerqwer +-- has_all -- +asdf.qwer +-- has_any -- +asdf +asdf.ghkj +asdf.qwer +qwer +-- string_size -- +5 +15 +-- to_utf8 -- +[9382,9392,9390,9391,9386] +[1511,1493,1505,1496,1493,32,45,32,75,117,115,116,111] +-- make_string -- +Kusto +Kusto +Kusto +Kusto +QRST{م +UV>؄ +WXY$অ +{-م-P-QRST +>-؄-P-UV +$-অ-P-WXY +PQRSTUVWXYZ +PQ +{م爱🐂অ +-- isutf8 -- +1 +1 +-- isascii -- +false +true diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql new file mode 100644 index 000000000000..6f4f338fe48a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_func_string.sql @@ -0,0 +1,509 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- '', 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (Version:string) [ +-- '1.2.3.4', +-- '1.2', +-- '1.2.3', +-- '1' +-- ] + +DROP TABLE IF EXISTS Versions; +CREATE TABLE Versions +( + Version String +) ENGINE = Memory; +INSERT INTO Versions VALUES ('1.2.3.4'),('1.2'),('1.2.3'),('1'); + +-- datatable (Text:string) [ +-- 'asdf', +-- 'asdf.ghkj', +-- 'asdf.qwer', +-- 'asdfghkj', +-- 'qwer', +-- 'qwerqwer' +-- ] + +drop table if exists StringTest; +create table StringTest +( + Text String +) engine = Memory; + +insert into StringTest values ('asdf'), ('asdf.ghkj'), ('asdf.qwer'), ('asdfghkj'), ('qwer'), ('qwerqwer'); + +DROP TABLE IF EXISTS MyTable; +CREATE TABLE MyTable +( + `col_arr` Array(UInt64), + `col1` Int8, + `col2` Int16, + `col3` Int32, + `col4` Int64, +) ENGINE = Memory; + +INSERT INTO MyTable VALUES (array(81,82,83,84), 123, 1605, 29233 ,128002 ), (array(85, 86), 62, 1540 ,25151 , 49856), (array(87,88,89), 36, 2437, 127801, 50509) + +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +print '-- test String Functions --'; + +print '-- Customers |where Education contains \'degree\''; +Customers |where Education contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'degree\''; +Customers |where Education !contains 'degree' | order by LastName; +print ''; +print '-- Customers |where Education contains \'Degree\''; +Customers |where Education contains 'Degree' | order by LastName; +print ''; +print '-- Customers |where Education !contains \'Degree\''; +Customers |where Education !contains 'Degree' | order by LastName; +print ''; +print '-- Customers | where FirstName endswith \'RE\''; +Customers | where FirstName endswith 'RE' | order by LastName; +print ''; +print '-- Customers | where ! FirstName endswith \'RE\''; +Customers | where FirstName ! endswith 'RE' | order by LastName; +print ''; +print '--Customers | where FirstName endswith_cs \'re\''; +Customers | where FirstName endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where FirstName !endswith_cs \'re\''; +Customers | where FirstName !endswith_cs 're' | order by LastName; +print ''; +print '-- Customers | where Occupation == \'Skilled Manual\''; +Customers | where Occupation == 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation != \'Skilled Manual\''; +Customers | where Occupation != 'Skilled Manual' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'skilled\''; +Customers | where Occupation has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation !has \'skilled\''; +Customers | where Occupation !has 'skilled' | order by LastName; +print ''; +print '-- Customers | where Occupation has \'Skilled\''; +Customers | where Occupation has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation !has \'Skilled\''; +Customers | where Occupation !has 'Skilled'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'Ab\''; +Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; +Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hasprefix_cs \'ab\''; +Customers | where Occupation hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation !hasprefix_cs \'ab\''; +Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'Ent\''; +Customers | where Occupation hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation !hassuffix \'Ent\''; +Customers | where Occupation !hassuffix 'Ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers | where Occupation hassuffix \'ent\''; +Customers | where Occupation hassuffix 'ent'| order by LastName; +print ''; +print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; +Customers |where Education in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; +Customers | where Education !in ('Bachelors','High School')| order by LastName; +print ''; +print '-- Customers | where FirstName matches regex \'P.*r\''; +Customers | where FirstName matches regex 'P.*r'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith \'pet\''; +Customers | where FirstName startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith \'pet\''; +Customers | where FirstName !startswith 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName startswith_cs \'pet\''; +Customers | where FirstName startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where FirstName !startswith_cs \'pet\''; +Customers | where FirstName !startswith_cs 'pet'| order by LastName; +print ''; +print '-- Customers | where isempty(LastName)'; +Customers | where isempty(LastName); +print ''; +print '-- print isempty(1.2345)'; +print isempty(1.2345); +print ''; +print '-- Customers | where isnotempty(LastName)'; +Customers | where isnotempty(LastName); +print ''; +print '-- print isnotempty(1.2345)'; +print isnotempty(1.2345); +print ''; +print '-- Customers | where isnotnull(FirstName)'; +Customers | where isnotnull(FirstName)| order by LastName; +print ''; +print '-- Customers | where isnull(FirstName)'; +Customers | where isnull(FirstName)| order by LastName; +print ''; +print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; +Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; +print ''; +print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; +Customers | project url_encode('https://www.test.com/hello word') | take 1; +print ''; +print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; +Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; +print ''; +print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; +Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; +print ''; +print '-- Customers | project FirstName, strlen(FirstName)'; +Customers | project FirstName, strlen(FirstName)| order by LastName; +print ''; +print '-- Customers | project strrep(FirstName,2,\'_\')'; +Customers | project strrep(FirstName,2,'_')| order by LastName; +print ''; +print '--print from_str = strrep("ABC", 2)'; +print from_str = strrep('ABC', 2); +print '--print from_int = strrep(123,3,".")'; +print from_int = strrep(123, 3, '.'); +print '--print from_time = strrep(3s,2," ")'; +print from_time = strrep(3s, 2, ' '); +print ''; +print '-- Customers | project toupper(FirstName)'; +Customers | project toupper(FirstName)| order by LastName; +print ''; +print '-- Customers | project tolower(FirstName)'; +Customers | project tolower(FirstName)| order by LastName; +print ''; +-- Customer | where LastName in~ ("diaz", "cox") +print ''; +print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; +Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; +print ''; +print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; +Customers | where Occupation has_any ('Skilled', 'abcd'); +print ''; +print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; +Customers | project countof('The cat sat on the mat', 'at') | take 1; +Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; +Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; +print countof("aaa", "a"); +print countof("aaaa", "aa"); +print countof("ababa", "ab", "normal"); +print countof("ababa", "aba"); +print countof("ababa", "aba", "regex"); +print countof("abcabc", "a.c", "regex"); +print ''; +print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20'); +print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(bool)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(date)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(guid)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(int)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(long)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(real)); +print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(decimal)); +print extract(".*Action=(\\w+)",1, "dstPostNATPort=80 proto=tcp Action=alert"); +print ''; +print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction)'; +Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; +print extract_all("(\\w)(\\w+)(\\w)", dynamic([1,3]), "82b8be2d-dfa7-4bd1-8f63-24ad26d31449"); +print ''; +print '-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction)'; +print extract_json('', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('a', ''); -- { serverError BAD_ARGUMENTS } +print extract_json('$.firstName', ''); +print extract_json('$.phoneNumbers[0].type', ''); +print extractjson('$.firstName', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(string)); +print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(long)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(bool)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(double)); +print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(guid)); +-- print extract_json('$.phoneNumbers', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(dynamic)); we won't be able to handle this particular case for a while, because it should return a dictionary +print ''; +print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; +Customers | project split('aa_bb', '_') | take 1; +Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; +Customers | project split('', '_') | take 1; +Customers | project split('a__b', '_') | take 1; +Customers | project split('aabbcc', 'bb') | take 1; +Customers | project split('aabbcc', '') | take 1; +Customers | project split('aaa_bbb_ccc', '_', -1) | take 1; +Customers | project split('aaa_bbb_ccc', '_', 10) | take 1; +print ''; +print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; +Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; +Customers | project strcat_delim('-', '1', '2', 'A' , 1s) | take 1; +Customers | project strcat_delim('-', '1', '2', 'A' , 55) | take 1; +Customers | project strcat_delim('-', '1', '2', 'A' , 7.99) | take 1; +print strcat_delim(' ', "qqqqq", "fffffff", "'asd bcd'", "\"moo moo \""); +print ''; +print '-- base64_encode_fromguid()'; +-- print base64_encode_fromguid(guid(null)); +print base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')); +print base64_encode_fromguid(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print base64_encode_fromguid("abcd1231"); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +print '-- base64_decode_toarray()'; +print base64_decode_toarray(''); +print base64_decode_toarray('S3VzdG8='); +print base64_decode_toarray('S3VzdG8==='); +print '-- base64_decode_toguid()'; +print base64_decode_toguid("JpbpECu8dUy7Pv5gbeJXAA=="); +print base64_decode_toguid(base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'))) == guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); +print '-- base64_encode_tostring'; +print base64_encode_tostring(''); +print base64_encode_tostring('Kusto1'); +print '-- base64_decode_tostring'; +print base64_decode_tostring(''); +print base64_decode_tostring('S3VzdG8x'); +print base64_decode_tostring('S3VzdG8===='); +print base64_decode_tostring('U3RyaW5n0KHR0tGA0L7Rh9C60LA='); +print '-- parse_url() same as ADX'; +print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); +print parse_url(''); +print parse_url("http://[2001:db8:3333:4444:5555:6666:7777:8888]:1234/filepath/index.htm") +print parse_url("http://host"); +print parse_url("http://host:1234"); +print parse_url("http:///this/is/a/path/index.htm"); +print parse_url("http://#fragment"); +print parse_url("http://host:abcd"); +print parse_url('http://host/filepath?arg=:bogus@some'); +print parse_url("http://username:password@"); +print parse_url(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print '-- parse_url() differs from ADX'; +print parse_url("http://host:1234/"); +print parse_url("http://:1234/"); +print parse_url("http://?arg=value"); +print parse_url("http://host:1234?arg=value"); +print parse_url("http:///"); +print parse_url("http:///filepath"); +print parse_url("http://:port:/anything?arg=value"); +print parse_url("http://:port:port/anything?arg=value"); +print parse_url("http://host/"); +print '-- parse_url() invalid IPV6'; +print parse_url("http://2001:db8:3333:4444:5555:6666:7777:8888:1234/filepath/index.htm"); +print parse_url("http://2001:db8:3333:4444:5555:6666:7777:8888/filepath/index.htm"); +print '-- parse_urlquery()'; +print parse_urlquery('k1=v1&k2=v2&k3=v3'); +print '-- strcat --'; +print strcat('a', 1, 2, 3, timespan(5d)); +print strcat('a', null, 9 + 2, 1h + 1d); +print strcat('a', "b", "'c"); +print '-- strcmp()'; +print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); +print '-- substring()'; +print substring("ABCD", -2, 2); +print '-- translate()'; +print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); +print '-- trim()'; +print trim("--", "--https://www.ibm.com--"); +print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim("", " asd "); +print trim("a$", "asd"); +print trim("^a", "asd"); +print '-- trim_start()'; +print trim_start("https://", "https://www.ibm.com"); +print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_start("asd$", "asdw"); +print trim_start("asd$", "asd"); +print trim_start("d$", "asd"); +print '-- trim_end()'; +print trim_end("://www.ibm.com", "https://www.ibm.com"); +print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); +print trim_end("^asd", "wasd"); +print trim_end("^asd", "asd"); +print trim_end("^a", "asd"); +print '-- trim, trim_start, trim_end all at once'; +print str = "--https://bing.com--", pattern = '--' | extend start = trim_start(pattern, str), end = trim_end(pattern, str), both = trim(pattern, str); +print '-- replace_regex'; +print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); +print '-- has_any_index()'; +print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); +print '-- parse_version()'; +print parse_version(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_version(''); -> NULL +print parse_version('1.2.3.40'); +print parse_version('1.2'); +print parse_version(strcat('1.', '2')); +print parse_version('1.2.4.5.6'); +print parse_version('moo'); +print parse_version('moo.boo.foo'); +print parse_version(strcat_delim('.', 'moo', 'boo', 'foo')); +print parse_version(''); +print parse_version('....'); +Versions | project parse_version(Version); +print '-- parse_json()'; +print parse_json(dynamic([1, 2, 3])); +print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}'); +print '-- parse_command_line()'; +print parse_command_line(55, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- print parse_command_line((52 + 3) * 4 % 2, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_command_line('', 'windows'); +print parse_command_line(strrep(' ', 6), 'windows'); +-- print parse_command_line('echo \"hello world!\" print$?', 'windows'); -> ["echo","hello world!","print$?"] +-- print parse_command_line("yolo swag 'asd bcd' \"moo moo \"", 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +-- print parse_command_line(strcat_delim(' ', "yolo", "swag", "\'asd bcd\'", "\"moo moo \""), 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] +print '-- reverse()'; +print reverse(123); +print reverse(123.34); +print reverse(''); +print reverse("asd"); +print reverse(dynamic([])); +print reverse(dynamic([1, 2, 3])); +print reverse(dynamic(['Darth', "Vader"])); +print reverse(datetime(2017-10-15 12:00)); +print reverse(timespan(3h)); +Customers | where Education contains 'degree' | order by reverse(FirstName); +print '-- parse_csv()'; +print parse_csv(''); +print parse_csv(65); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +print parse_csv('aaa'); +print result=parse_csv('aa,b,cc'); +print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z'); +-- print result=parse_csv('aa,"b,b,b",cc,"Escaping quotes: ""Title""","line1\nline2"'); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +-- print parse_csv(strcat(strcat_delim(',', 'aa', '"b,b,b"', 'cc', '"Escaping quotes: ""Title"""', '"line1\nline2"'), '\r\n', strcat_delim(',', 'asd', 'qcf'))); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] +print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction)'; + +Customers | project indexof('abcdefgabcdefg', 'cde', 1, 10, 2) | take 1; +print indexof('abcdefg','cde'); +print idx2 = indexof('abcdefg','cde',0,3); +print idx3 = indexof('abcdefg','cde',1,2); +print idx4 = indexof('abcdefg','cde',3,4); +print idx5 = indexof('abcdefg','cde',-5); +print idx6 = indexof(1234567,5,1,4); +print idx7 = indexof('abcdefg','cde',2,-1); +print idx8 = indexof('abcdefgabcdefg', 'cde', 1, 10, 2); +print idx9 = indexof('abcdefgabcdefg', 'cde', 1, -1, 3); +print idx10 = indexof('abcdefgabcdefg','cde', -1); +print idx11 = indexof('abcdefgabcdefg','cde', -4); +print idx12 = indexof('abcdefgabcdefg','cde', -5); +print idx13 = indexof('abcdefgabcdefg','cde', -105); +print idx14 = indexof(1d, '.'); + +print '-- indexof_regex --'; +print idx1 = indexof_regex("abcabc", "a.c"); +print idx2 = indexof_regex("abcabcdefg", "a.c", 0, 9, 2); +print idx3 = indexof_regex("abcabc", "a.c", 1, -1, 2); +print idx4 = indexof_regex("ababaa", "a.a", 0, -1, 2); +print idx5 = indexof_regex("abcabc", "a|ab", -1); +print indexof_regex('adsasdasasd', 'sas'); +print indexof_regex('adsasdasasd', 'sas', -1); +print indexof_regex('adsasdasasd', 'sas', 99); +print indexof_regex('adsasdasasd', 'sas', 0, -1); +print indexof_regex('adsasdasasd', 'sas', 0, -2); +print indexof_regex('adsasdasasd', 'sas', 0, 0); +print indexof_regex('adsasdasasd', 'sas', 0, 4); +print indexof_regex('adsasdasasd', 'sas', 0, 5); +print indexof_regex('adsasdasasd', 'sas', 0, 99); +print indexof_regex('adsasdasasd', 'sas', 0, -1, 1); +-- the following case differs from ADX, but conforms to the specification (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/indexofregexfunction#returns) +print indexof_regex('adsasdasasd', 'sas', 0, -1, 0); +print indexof_regex('adsasdasasd', 'sas', 0, -1, 2); +print indexof_regex('adsasdasasd', 'sas', 0, -1, 3); +print indexof_regex(123456789, '67'); +print indexof_regex(12345.6789, 67); +print now = now() | project indexof_regex(strcat('blabla', now, 'blablabla'), now); +print indexof_regex(dynamic([1, 2, 3]), 2); +print indexof_regex(true, 'rue'); +print indexof_regex(guid(74be27de-1e4e-49d9-b579-fe0b331d3642), 42); +print indexof_regex(1d + 1h + 1m + 1s, '\\d?\\..*:\\d+:\\d{2}'); +print indexof_regex("abcabc", "*a|ab", -1); -- { serverError CANNOT_COMPILE_REGEXP } +print indexof_regex("abcabc", strcat("a", "b", "c")); +Customers | project indexof_regex(LastName, Occupation); -- { serverError ILLEGAL_COLUMN } +Customers | project indexof_regex(LastName, "Diaz", Age, Age, Age); -- { serverError ILLEGAL_COLUMN } + +print '-- indexof_regex tabular #1 --'; +Customers | order by LastName asc | project indexof_regex(LastName, "Diaz", Age * 0, -1 * int(Age / Age), 1); + +print '-- has --'; +print 'svchost.exe1' has ''; +print 'svchost.exe1' has 'svchost.exe'; +print 'svchost.exe' has 'svchost.exe'; +print 'svchost.exe' has 'svchost.exe1'; +print '' has 'svchost.exe1'; +print '' has ''; +print '.' has ''; +print '.' has ','; +print '.' has '.'; +print '.ex.e' has 'ex'; +print '.ex.e' has 'exe'; +print ''; +StringTest | where Text has 'asdf'; +print ''; +StringTest | where Text has 'asdf.qwer'; +print ''; +StringTest | where Text has 'qwer'; + +print '-- !has --'; +StringTest | where Text !has 'asdf'; +print ''; +StringTest | where Text !has 'asdf.qwer'; + +print '-- has_all --'; +StringTest | where Text has_all ('asdf', 'qwer'); + +print '-- has_any --'; +StringTest | where Text has_any ('asdf', 'qwer'); + +print '-- string_size --'; +print t = string_size('Kusto'); +print t = string_size('⒦⒰⒮⒯⒪'); + +print '-- to_utf8 --'; +print arr = to_utf8("⒦⒰⒮⒯⒪"); +print arr = to_utf8("קוסטו - Kusto"); + +print '-- make_string --'; +print str = make_string(75, 117, 115, 116, 111); +print str = make_string(to_utf8("Kusto")); +print str = make_string(dynamic([75, 117, 115]), 116, 111); +print str = make_string(dynamic([75, 117, 115, 116, 111])); +MyTable | project t = make_string(col_arr, col1, col2); +MyTable | project t = strcat(make_string(col1), '-', make_string(col2), '-', make_string(strlen('abcd') * 20), '-', make_string(col_arr)); +print str = make_string(range(80, 85), 86, 87, range(88, 90)); +print str = make_string(dynamic([]), 80, 81); +print str = make_string(123, 1605, 29233 ,128002, 2437); + +print '-- isutf8 --'; +print t = isutf8('🐂'); +print t = isutf8('؄'); + +print '-- isascii --'; +print str = isascii('ab১২ufghi🐂🐇🐒'); +print str = isascii('abc'); diff --git a/tests/queries/0_stateless/02366_kql_getschema.reference b/tests/queries/0_stateless/02366_kql_getschema.reference new file mode 100644 index 000000000000..d2eeb638bedd --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_getschema.reference @@ -0,0 +1,53 @@ +-- #1 -- +a 0 Array(String) Array(String) +s 1 String String +i 2 Int64 Int64 +-- #2 -- +a 0 Array(String) Array(String) +s 1 String String +i 2 Int64 Int64 +x 3 UInt64 UInt64 +-- #3 -- +a Array(String) +s String +-- #4 -- +ColumnName 0 String String +ColumnOrdinal 1 Int32 Int32 +DataType 2 String String +ColumnType 3 String String +-- #5 -- +a 0 Array(String) dynamic +s 1 String string +i 2 Int64 long +-- #6 -- +a 0 Array(String) dynamic +s 1 String string +i 2 Int64 long +x 3 UInt64 long +-- #7 -- +a dynamic +s string +-- #8 -- +ColumnName 0 String string +ColumnOrdinal 1 Int32 int +DataType 2 String string +ColumnType 3 String string +-- #9 -- +a 0 Array(String) dynamic +s 1 String string +-- #10 -- +avg_i 0 Float64 real +-- #11 -- +s 0 String string +approximate_sum_i 1 Int64 long +-- #12 -- +Count 0 UInt64 long +-- #13 -- +s 0 String string +aggregated_s 1 Int64 long +-- #14 -- +x 0 IntervalNanosecond timespan +l 1 Nullable(Int64) long +a 2 String string +s 3 String string +i 4 Int64 long diff --git a/tests/queries/0_stateless/02366_kql_getschema.sql b/tests/queries/0_stateless/02366_kql_getschema.sql new file mode 100644 index 000000000000..074266f95ad4 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_getschema.sql @@ -0,0 +1,47 @@ +-- datatable (a:dynamic, s:string, i:long) [] + +drop table if exists test; +create table test (a Array(String), s String, i Int64) engine = Memory; + +select '-- #1 --'; +select * from getschema(select * from test); + +select '-- #2 --'; +select * from getschema(select *, length(s) as x from test); + +select '-- #3 --'; +select ColumnName, ColumnType from getschema(select * from test) where DataType like '%String%'; + +select '-- #4 --'; +select * from getschema(select * from getschema(select 1)); + +set dialect = 'kusto'; +print '-- #5 --'; +test | getschema; + +print '-- #6 --'; +test | extend x = strlen(s) | getschema; + +print '-- #7 --'; +test | getschema | where DataType like '%String%' | project ColumnName, ColumnType; + +print '-- #8 --'; +print 1 | getschema | getschema; + +print '-- #9 --'; +test | project a, s | getschema; + +print '-- #10 --'; +test | summarize avg(i) | getschema; + +print '-- #11 --'; +test | take 1000 | extend x = strlen(s) | top 500 by x asc | where x > 0 or i > 0 | distinct s, i, x | top-hitters 100 of s by i | getschema; + +print '-- #12 --'; +test | project a, i | count | getschema; + +print '-- #13 --'; +test | top-nested 50 of s with others = "Others" by sum(i) | getschema; + +print '-- #14 --'; +range x from 0d to 365d step 1d | extend l = tolong(x) | lookup kind = inner (test) on $left.l == $right.i | mv-expand a | getschema; diff --git a/tests/queries/0_stateless/02366_kql_lookup_join.reference b/tests/queries/0_stateless/02366_kql_lookup_join.reference new file mode 100644 index 000000000000..397be3efd901 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_lookup_join.reference @@ -0,0 +1,74 @@ +-- lookup 1 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 2 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 3 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer +5 Tim Cook +-- lookup 4 -- +1 Bill Gates billg +2 Bill Clinton billc +3 Bill Clinton billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- lookup 5 -- +1 Bill Gates billg billg +2 Bill Clinton billc billc +3 Bill Clinton billc billc +4 Steve Ballmer steveb +5 Tim Cook timc +-- Default join -- +c 4 30 +c 4 20 +b 3 10 +-- Default join 2-- +c 4 30 +c 4 20 +b 3 10 +-- Inner-join -- +c 4 30 +c 4 20 +b 3 10 +b 2 10 +-- Left outer-join -- +c 4 30 +c 4 20 +b 3 10 +b 2 10 +a 1 0 +-- Right outer-join -- +d 0 40 +c 4 30 +c 4 20 +b 3 10 +b 2 10 +-- Full outer-join -- +d 0 40 +c 4 30 +c 4 20 +b 3 10 +b 2 10 +a 1 0 +-- Left anti-join -- +a 1 0 +-- Right anti-join -- +d 0 40 +-- Left semi-join -- +c 4 +b 3 +b 2 +-- Right semi-join -- +c 30 +c 20 +b 10 diff --git a/tests/queries/0_stateless/02366_kql_lookup_join.sql b/tests/queries/0_stateless/02366_kql_lookup_join.sql new file mode 100644 index 000000000000..11f557265f06 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_lookup_join.sql @@ -0,0 +1,78 @@ +DROP TABLE IF EXISTS FactTable; +CREATE TABLE FactTable (Row String, Personal String, Family String) ENGINE = Memory; +INSERT INTO FactTable VALUES ('1', 'Bill', 'Gates'); +INSERT INTO FactTable VALUES ('2', 'Bill', 'Clinton'); +INSERT INTO FactTable VALUES ('3', 'Bill', 'Clinton'); +INSERT INTO FactTable VALUES ('4', 'Steve', 'Ballmer'); +INSERT INTO FactTable VALUES ('5', 'Tim', 'Cook'); + +DROP TABLE IF EXISTS DimTable; +CREATE TABLE DimTable (Personal String, Family String, Alias String) ENGINE = Memory; +INSERT INTO DimTable VALUES ('Bill', 'Gates', 'billg'); +INSERT INTO DimTable VALUES ('Bill', 'Clinton', 'billc'); +INSERT INTO DimTable VALUES ('Steve', 'Ballmer', 'steveb'); +INSERT INTO DimTable VALUES ('Tim', 'Cook', 'timc'); + +-- datatable (Key:string, Value1:long) [ +-- 'a', 1, +-- 'b', 2, +-- 'b', 3, +-- 'c', 4 +-- ] + +DROP TABLE IF EXISTS X; +CREATE TABLE X (Key String, Value1 Int64) ENGINE = Memory; +INSERT INTO X VALUES ('a',1); +INSERT INTO X VALUES ('b',2); +INSERT INTO X VALUES ('b',3); +INSERT INTO X VALUES ('c',4); + +-- datatable (Key:string, Value2:long) [ +-- 'b', 10, +-- 'c', 20, +-- 'c', 30, +-- 'd', 40 +-- ] + +DROP TABLE IF EXISTS Y; +CREATE TABLE Y (Key String, Value2 Int64) ENGINE = Memory; +INSERT INTO Y VALUES ('b',10); +INSERT INTO Y VALUES ('c',20); +INSERT INTO Y VALUES ('c',30); +INSERT INTO Y VALUES ('d',40); + +set dialect='kusto'; + +print '-- lookup 1 --'; +FactTable | lookup kind=leftouter DimTable on Personal, Family | order by Row asc; +print '-- lookup 2 --'; +FactTable | lookup kind=inner DimTable on Personal, Family | order by Row asc; +print '-- lookup 3 --'; +FactTable | lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family | order by Row asc; +print '-- lookup 4 --'; +FactTable | project Row, Personal , Family| lookup kind=leftouter DimTable on Personal, Family | order by Row asc; +print '-- lookup 5 --'; +FactTable |project Row, Personal , Family| lookup kind=leftouter (DimTable | where Personal == 'Bill') on Personal, Family| lookup kind=inner DimTable on Personal, Family | order by Row asc; + +print '-- Default join --'; +X | order by Key, Value1 | join ( Y | order by Key, Value2 ) on $left.Key == $right.Key | order by Key, Value1, Value2; +print '-- Default join 2--'; +X | order by Key, Value1 | join kind=innerunique ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Inner-join --'; +X | order by Key, Value1 | join kind=inner ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left outer-join --'; +X | order by Key, Value1 | join kind=leftouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right outer-join --'; +X | order by Key, Value1 | join kind=rightouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Full outer-join --'; +X | order by Key, Value1 | join kind=fullouter ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left anti-join --'; +X | order by Key, Value1 | join kind=leftanti ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Right anti-join --'; +X | order by Key, Value1 | join kind=rightanti ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2; +print '-- Left semi-join --'; +-- projecting at the end shouldn't be necessary, since Value2 shouldn't make it into the result set in the first place as per KQL specification +X | order by Key, Value1 | join kind=leftsemi ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2 | project Key, Value1; +print '-- Right semi-join --'; +-- projecting at the end shouldn't be necessary, since Value1 shouldn't make it into the result set in the first place as per KQL specification +X | order by Key, Value1 | join kind=rightsemi ( Y | order by Key, Value2 ) on Key | order by Key, Value1, Value2 | project Key, Value2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_makeseries.reference b/tests/queries/0_stateless/02366_kql_makeseries.reference new file mode 100644 index 000000000000..7f196516bd98 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_makeseries.reference @@ -0,0 +1,91 @@ +-- from to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,2,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,500,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- from +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,2] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [2,0] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [500,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- without from/to +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +-- without by +['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [70,334,54] +-- without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +-- assign group alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] +Costco Apple ['2016-09-11 00:00:00.000000000'] [2] +-- 3d step +Costco Snargaluff ['2016-09-10 00:00:00.000000000'] [134.66666666666666] +Costco Apple ['2016-09-10 00:00:00.000000000'] [2] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000'] [500] +Aldi Apple ['2016-09-10 00:00:00.000000000'] [5.5] +-- numeric column +-- from to +Costco Snargaluff [10,11,12,13,14] [200,0,102,0,0] +Aldi Snargaluff [10,11,12,13,14] [0,500,0,0,0] +Aldi Apple [10,11,12,13,14] [5,0,6,0,0] +Costco Apple [10,11,12,13,14] [0,2,0,0,0] +-- from +Costco Snargaluff [10,11,12] [200,0,102] +Aldi Snargaluff [10,11] [0,500] +Aldi Apple [10,11,12] [5,0,6] +Costco Apple [10,11] [0,2] +-- to +Costco Snargaluff [8,12,16] [200,102,0] +Aldi Snargaluff [8,12,16] [500,0,0] +Aldi Apple [8,12,16] [5,6,0] +Costco Apple [8,12,16] [2,0,0] +-- without from/to +Costco Snargaluff [10,12] [200,102] +Aldi Snargaluff [10] [500] +Aldi Apple [10,12] [5,6] +Costco Apple [10] [2] +-- without by +[10,12] [202,54] +['2017-01-01 00:00:00.000000000','2017-01-02 00:00:00.000000000','2017-01-03 00:00:00.000000000','2017-01-04 00:00:00.000000000','2017-01-05 00:00:00.000000000','2017-01-06 00:00:00.000000000','2017-01-07 00:00:00.000000000','2017-01-08 00:00:00.000000000','2017-01-09 00:00:00.000000000'] [4,3,5,0,10.5,4,3,8,7] +--aggregation function 1 without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [201,0,103] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,3,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,501,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [6,0,7] +--aggregation function 1 without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [202,0,104] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,4,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,502,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [7,0,8] +--other function and aggregation function 1 without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [202,0,104] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,4,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,502,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [7,0,8] +-- defaut with expression +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [202,4.5,104] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [4.5,4,4.5] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [4.5,502,4.5] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [7,4.5,8] +--other function and aggregation function 1 without aggregation alias +Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [202,0,104] +Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,4,0] +Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,502,0] +Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [7,0,8] +--from , to expression +Costco Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000','2016-09-13 00:00:00.000000000'] [0,102,0] +Costco Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000','2016-09-13 00:00:00.000000000'] [2,0,0] +Aldi Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000','2016-09-13 00:00:00.000000000'] [500,0,0] +Aldi Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000','2016-09-13 00:00:00.000000000'] [0,6,0] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql new file mode 100644 index 000000000000..f22507f16a16 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_makeseries.sql @@ -0,0 +1,84 @@ +-- Azure Data Explore Test Data +-- let make_series_test_table = datatable (Supplier:string, Fruit:string, Price: real, Purchase:datetime) +-- [ +-- 'Aldi','Apple',4,'2016-09-10', +-- 'Costco','Apple',2,'2016-09-11', +-- 'Aldi','Apple',6,'2016-09-10', +-- 'Costco','Snargaluff',100,'2016-09-12', +-- 'Aldi','Apple',7,'2016-09-12', +-- 'Aldi','Snargaluff',400,'2016-09-11', +-- 'Costco','Snargaluff',104,'2016-09-12', +-- 'Aldi','Apple',5,'2016-09-12', +-- 'Aldi','Snargaluff',600,'2016-09-11', +-- 'Costco','Snargaluff',200,'2016-09-10', +-- ]; +DROP TABLE IF EXISTS make_series_test_table; +CREATE TABLE make_series_test_table +( + Supplier Nullable(String), + Fruit String, + Price Float64, + Purchase Date +) ENGINE = Memory; +INSERT INTO make_series_test_table VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); +DROP TABLE IF EXISTS make_series_test_table2; +CREATE TABLE make_series_test_table2 +( + Supplier Nullable(String), + Fruit String , + Price Int32, + Purchase Int32 +) ENGINE = Memory; +INSERT INTO make_series_test_table2 VALUES ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10); +DROP TABLE IF EXISTS make_series_test_table3; +CREATE TABLE make_series_test_table3 +( + timestamp datetime, + metric Float64, +) ENGINE = Memory; +INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00'), 50), (parseDateTimeBestEffort('2017-01-01'), 4), (parseDateTimeBestEffort('2017-01-02'), 3), (parseDateTimeBestEffort('2017-01-03'), 4), (parseDateTimeBestEffort('2017-01-03T03:00'), 6), (parseDateTimeBestEffort('2017-01-05'), 8), (parseDateTimeBestEffort('2017-01-05T13:40'), 13), (parseDateTimeBestEffort('2017-01-06'), 4), (parseDateTimeBestEffort('2017-01-07'), 3), (parseDateTimeBestEffort('2017-01-08'), 8), (parseDateTimeBestEffort('2017-01-08T21:00'), 8), (parseDateTimeBestEffort('2017-01-09'), 2), (parseDateTimeBestEffort('2017-01-09T12:00'), 11), (parseDateTimeBestEffort('2017-01-10T05:00'), 5); + +set dialect = 'kusto'; +print '-- from to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- from'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- without from/to'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- without by'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d; +print '-- without aggregation alias'; +make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit; +print '-- assign group alias'; +make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit; +print '-- 3d step'; +make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit | order by Supplier, Fruit; + +print '-- numeric column'; +print '-- from to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; +print '-- from'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; +print '-- to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; +print '-- without from/to'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; +print '-- without by'; +make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; + +make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d; + +print '--aggregation function 1 without aggregation alias'; +make_series_test_table | make-series avg(Price+1) on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '--aggregation function 1 without aggregation alias'; +make_series_test_table | make-series avg(Price+1)+1 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '--other function and aggregation function 1 without aggregation alias'; +make_series_test_table | make-series ceiling(avg(Price+1)+1) on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '-- defaut with expression'; +make_series_test_table | make-series ceiling(avg(Price+1)+1) default = strlen('123')+1.5 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '--other function and aggregation function 1 without aggregation alias'; +make_series_test_table | make-series ceiling(avg(Price+1)+1) on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; +print '--from , to expression'; +make_series_test_table | make-series avg(Price) on Purchase from datetime(2016-09-10)+1d to datetime(2016-09-13)+1d step 1d by Supplier, Fruit | order by Supplier, Fruit; diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.reference b/tests/queries/0_stateless/02366_kql_mvexpand.reference new file mode 100644 index 000000000000..25be070eb0b7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.reference @@ -0,0 +1,65 @@ +-- mv-expand -- +-- mv_expand_test_table | mv-expand c -- +1 ['Salmon','Steak','Chicken'] 1 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 2 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 3 [5,6,7,8] +1 ['Salmon','Steak','Chicken'] 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand c, d -- +1 ['Salmon','Steak','Chicken'] 1 5 +1 ['Salmon','Steak','Chicken'] 2 6 +1 ['Salmon','Steak','Chicken'] 3 7 +1 ['Salmon','Steak','Chicken'] 4 8 +-- mv_expand_test_table | mv-expand b | mv-expand c -- +1 Salmon 1 [5,6,7,8] +1 Salmon 2 [5,6,7,8] +1 Salmon 3 [5,6,7,8] +1 Salmon 4 [5,6,7,8] +1 Steak 1 [5,6,7,8] +1 Steak 2 [5,6,7,8] +1 Steak 3 [5,6,7,8] +1 Steak 4 [5,6,7,8] +1 Chicken 1 [5,6,7,8] +1 Chicken 2 [5,6,7,8] +1 Chicken 3 [5,6,7,8] +1 Chicken 4 [5,6,7,8] +-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d -- +0 1 Salmon 1 5 +1 1 Steak 2 6 +2 1 Chicken 3 7 +3 1 4 8 +-- mv_expand_test_table | mv-expand array_concat(c,d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand x = c, y = d -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) -- +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 +1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 +-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy -- +1 1 +2 1 +-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) -- +0 1 ['Salmon','Steak','Chicken'] 1 true +1 1 ['Salmon','Steak','Chicken'] 2 true +2 1 ['Salmon','Steak','Chicken'] 3 true +3 1 ['Salmon','Steak','Chicken'] 4 true +-- mv_expand_test_table | mv-expand c to typeof(bool) -- +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true +1 ['Salmon','Steak','Chicken'] [5,6,7,8] true diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql new file mode 100644 index 000000000000..e77986096463 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_mvexpand.sql @@ -0,0 +1,35 @@ +-- datatable(a: int, b: dynamic, c: dynamic, d: dynamic) [ +-- 1, dynamic(['Salmon', 'Steak', 'Chicken']), dynamic([1, 2, 3, 4]), dynamic([5, 6, 7, 8]) +-- ] + +DROP TABLE IF EXISTS mv_expand_test_table; +CREATE TABLE mv_expand_test_table +( + a UInt8, + b Array(String), + c Array(Int8), + d Array(Int8) +) ENGINE = Memory; +INSERT INTO mv_expand_test_table VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]); +set dialect='kusto'; +print '-- mv-expand --'; +print '-- mv_expand_test_table | mv-expand c --'; +mv_expand_test_table | mv-expand c; +print '-- mv_expand_test_table | mv-expand c, d --'; +mv_expand_test_table | mv-expand c, d; +print '-- mv_expand_test_table | mv-expand b | mv-expand c --'; +mv_expand_test_table | mv-expand b | mv-expand c; +print '-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d --'; +mv_expand_test_table | mv-expand with_itemindex=index b, c, d; +print '-- mv_expand_test_table | mv-expand array_concat(c,d) --'; +mv_expand_test_table | mv-expand array_concat(c,d); +print '-- mv_expand_test_table | mv-expand x = c, y = d --'; +mv_expand_test_table | mv-expand x = c, y = d; +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d); +print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy --'; +mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy; +print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) --'; +mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); +print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; +mv_expand_test_table | mv-expand c to typeof(bool); diff --git a/tests/queries/0_stateless/02366_kql_native_bin.reference b/tests/queries/0_stateless/02366_kql_native_bin.reference new file mode 100644 index 000000000000..901fc2c687db --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_bin.reference @@ -0,0 +1,33 @@ +-- kql_bin -- +-- Numbers -- +4.5 +4 +3 +4.5 +\N +nan +-- Intervals -- +0 Nullable(IntervalWeek) +2000000000 +2000000000 Nullable(IntervalNanosecond) +\N +-- DateTime64 -- +2022-11-08 12:34:56.789012300 +2022-11-08 12:34:56.789012000 +2022-11-08 12:34:56.789000000 +2022-11-08 12:34:56.000000000 +2022-11-08 12:34:56.000000000 +2022-11-08 12:34:00.000000000 +2022-11-08 12:34:00.000000000 +\N +-- Date -- +2022-11-06 00:00:00.000000000 +-- Date32 -- +2022-11-06 00:00:00.000000000 +-- DateTime -- +2022-11-08 12:34:00.000000000 +-- kql_bin_at -- +4.5 +-43200000000000 +2017-05-14 12:00:00.000000000 +2017-05-14 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02366_kql_native_bin.sql b/tests/queries/0_stateless/02366_kql_native_bin.sql new file mode 100644 index 000000000000..0b82dbcd03e7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_native_bin.sql @@ -0,0 +1,47 @@ +select '-- kql_bin --'; +select '-- Numbers --'; +select kql_bin(4.5, 1.5); +select kql_bin(4.5, 2); +select kql_bin(4, 3); +select kql_bin(5, 1.5); +select kql_bin(5, 0); +select kql_bin(4.5, 0); + +select kql_bin(5, toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select kql_bin(5, toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Intervals --'; +select kql_bin(toIntervalWeek(1), toIntervalWeek(2)) as result, toTypeName(result); +select kql_bin(toIntervalNanosecond(2500000000), toIntervalNanosecond(1000000000)); +select kql_bin(toIntervalNanosecond(2500000000), 1) as result, toTypeName(result); +select kql_bin(toIntervalNanosecond(2500000000), toIntervalNanosecond(0)); + +select kql_bin(toIntervalWeek(2), toIntervalHour(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select kql_bin(toIntervalWeek(2), toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- DateTime64 --'; +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(100)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(1000000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), 1); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalNanosecond(60000000000)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalMinute(1)); +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toIntervalMinute(0)); + +select kql_bin(toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC'), toDateTime64('2022-11-08 12:34:56.7890123', 7, 'UTC')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Date --'; +select kql_bin(toDate('2022-11-07'), toIntervalNanosecond(172800000000000)); + +select '-- Date32 --'; +select kql_bin(toDate32('2022-11-07'), toIntervalNanosecond(172800000000000)); + +select '-- DateTime --'; +select kql_bin(toDateTime('2022-11-08 12:34:56', 'UTC'), toIntervalNanosecond(60000000000)); + +select '-- kql_bin_at --'; +select kql_bin_at(6.5, 2.5, 7); +select kql_bin_at(toIntervalNanosecond(3600000000000), toIntervalNanosecond(86400000000000), toIntervalNanosecond(43200000000000)); +select kql_bin_at(toDateTime64('2017-05-15 10:20:00.123', 5, 'UTC'), toIntervalNanosecond(86400000000000), toDateTime('1970-01-01 12:00:00', 'UTC')); +select kql_bin_at(toDate('2017-05-17'), toIntervalNanosecond(604800000000000), toDate32('2017-06-04')); diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference new file mode 100644 index 000000000000..0f03d87cb72a --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference @@ -0,0 +1,246 @@ +-- #1 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 +-- #2 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #3 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #4 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #5 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #6 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #7 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #8 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #9 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #10 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #11 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #12 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #13 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #14 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #15 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #16 -- +1 +-- #17 -- +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 +-- #18 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #19 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #20 -- +0 +-- #21 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +-- #22 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #23 -- +Stephanie Cox Management abcd defg Bachelors 33 +-- #24 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #25 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #26 -- +2.00:00:00 +-- #27 -- +4 +-- #28 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #29 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #30 -- +4 +-- operator has, !has, has_cs, !has_cs, has_all, has_any -- +50000 +0 +20000 +20000 +0 +30000 +-- HereDoc -- +-- #31 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Apple Skilled Manual Bachelors 28 +-- #32 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #33 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #34 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #35 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #36 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #37 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #38 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #39 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #40 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #41 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #42 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #43 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #44 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #45 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #46 -- +1 +-- #47 -- +Peter Nara Skilled Manual Graduate Degree 26 +Apple Skilled Manual Bachelors 28 +-- #48 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #49 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #50 -- +0 +-- #51 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +-- #52 -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors 28 +-- #53 -- +Stephanie Cox Management abcd defg Bachelors 33 +-- #54 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #55 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #56 -- +2.00:00:00 +-- #57 -- +4 +-- #58 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #59 -- +Peter Nara Skilled Manual Graduate Degree 26 +-- #60 -- +4 +-- HereDoc operator has, !has, has_cs, !has_cs, has_all, has_any -- +50000 +0 +20000 +20000 +0 +30000 +30000 diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql new file mode 100644 index 000000000000..58efa5901e93 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql @@ -0,0 +1,177 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +drop table if exists StormEventsLite; +create table StormEventsLite +( + Id UUID materialized generateUUIDv4(), + EventType String, + index EventTypeIndex EventType TYPE tokenbf_v1(512, 3, 0) GRANULARITY 1, + primary key(Id) +) engine = MergeTree; + +insert into StormEventsLite select 'iddqd strong wind iddqd' from numbers(10000); +insert into StormEventsLite select 'Strong Wind' from numbers(10000); +insert into StormEventsLite select 'strong wind' from numbers(10000); +insert into StormEventsLite select 'iddqd Strong wind iddqd' from numbers(10000); +insert into StormEventsLite select 'iddqd Strong Wind iddqd' from numbers(10000); + +DROP TABLE IF EXISTS TableWithVariousDataTypes; +CREATE TABLE TableWithVariousDataTypes +( + Name String, + Age Nullable(UInt8), + Height Float64, + JoinDate DateTime64(9, 'UTC') +) engine = Memory; + +INSERT INTO TableWithVariousDataTypes VALUES ('A', 12, 5.2, '2020-01-01'), ('B', 22, 7.2, '2020-01-02'), ('C', 32, 9.3, '2021-12-31'); +-- explain indexes = 1 select count(*) from StormEventsLite where hasToken(EventType, 'strong'); + +select '-- #1 --' ; +select * from kql(Customers | where FirstName !in ('Peter', 'Latoya')); +select '-- #2 --' ; +select * from kql(Customers | where FirstName !in ("test", "test2")); +select '-- #3 --' ; +select * from kql(Customers | where FirstName !contains 'Pet'); +select '-- #4 --' ; +select * from kql(Customers | where FirstName !contains_cs 'Pet'); +select '-- #5 --' ; +select * from kql(Customers | where FirstName !endswith 'ter'); +select '-- #6 --' ; +select * from kql(Customers | where FirstName !endswith_cs 'ter'); +select '-- #7 --' ; +select * from kql(Customers | where FirstName != 'Peter'); +select '-- #8 --' ; +select * from kql(Customers | where FirstName !has 'Peter'); +select '-- #9 --' ; +select * from kql(Customers | where FirstName !has_cs 'peter'); +select '-- #10 --' ; +select * from kql(Customers | where FirstName !hasprefix 'Peter'); +select '-- #11 --' ; +select * from kql(Customers | where FirstName !hasprefix_cs 'Peter'); +select '-- #12 --' ; +select * from kql(Customers | where FirstName !hassuffix 'Peter'); +select '-- #13 --' ; +select * from kql(Customers | where FirstName !hassuffix_cs 'Peter'); +select '-- #14 --' ; +select * from kql(Customers | where FirstName !startswith 'Peter'); +select '-- #15 --' ; +select * from kql(Customers | where FirstName !startswith_cs 'Peter'); +select '-- #16 --' ; +select * from kql(print t = 'a' in~ ('A', 'b', 'c')); +select '-- #17 --' ; +select * from kql(Customers | where FirstName in~ ('peter', 'apple')); +select '-- #18 --' ; +select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))); +select '-- #19 --' ; +select * from kql(Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30))); +select '-- #20 --' ; +select * from kql(print t = 'a' !in~ ('A', 'b', 'c')); +select '-- #21 --' ; +select * from kql(Customers | where FirstName !in~ ('peter', 'apple')); +select '-- #22 --' ; +select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter'))); +select '-- #23 --' ; +select * from kql(Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30))); +select '-- #24 --' ; +select * from kql(Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'); +select '-- #25 --' ; +select * from kql(Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'); +select '-- #26 --' ; +select * from kql(print x = tostring(1d + 1d) | where tostring(x) !~ "asd"); +select '-- #27 --' ; +select * from kql(Customers | where FirstName !in~ ('nEyMaR', 'Peter') | count); +select '-- #28 --' ; +select * from kql(Customers | where FirstName =~ "peter" and LastName =~ "naRA"); +select '-- #29 --' ; +select * from kql(Customers | where FirstName !~ "nEyMaR" and LastName =~ "naRA"); +select '-- #30 --' ; +select * from kql(Customers | where FirstName !in~ ("nEyMaR", "Peter") | count); +select '-- operator has, !has, has_cs, !has_cs, has_all, has_any --'; +select * from kql(StormEventsLite | where EventType has 'strong' | count); +select * from kql(StormEventsLite | where EventType !has 'strong wind' | count); +select * from kql(StormEventsLite | where EventType has_cs 'Strong Wind' | count); +select * from kql(StormEventsLite | where EventType !has_cs 'iddqd' | count); +select * from kql(StormEventsLite | where EventType has_all ('iddqd', 'string') | count); +select * from kql(StormEventsLite | where EventType has_any ('iddqd', 'string') | count); +Select '-- HereDoc --'; +select '-- #31 --' ; +select * from kql($$Customers | where FirstName !in ('Peter', 'Latoya')$$); +select '-- #32 --' ; +select * from kql($$Customers | where FirstName !in ("test", "test2")$$); +select '-- #33 --' ; +select * from kql($$Customers | where FirstName !contains 'Pet'$$); +select '-- #34 --' ; +select * from kql($$Customers | where FirstName !contains_cs 'Pet'$$); +select '-- #35 --' ; +select * from kql($$Customers | where FirstName !endswith 'ter'$$); +select '-- #36 --' ; +select * from kql($$Customers | where FirstName !endswith_cs 'ter'$$); +select '-- #37 --' ; +select * from kql($$Customers | where FirstName != 'Peter'$$); +select '-- #38 --' ; +select * from kql($$Customers | where FirstName !has 'Peter'$$); +select '-- #39 --' ; +select * from kql($$Customers | where FirstName !has_cs 'peter'$$); +select '-- #40 --' ; +select * from kql($$Customers | where FirstName !hasprefix 'Peter'$$); +select '-- #41 --' ; +select * from kql($$Customers | where FirstName !hasprefix_cs 'Peter'$$); +select '-- #42 --' ; +select * from kql($$Customers | where FirstName !hassuffix 'Peter'$$); +select '-- #43 --' ; +select * from kql($$Customers | where FirstName !hassuffix_cs 'Peter'$$); +select '-- #44 --' ; +select * from kql($$Customers | where FirstName !startswith 'Peter'$$); +select '-- #45 --' ; +select * from kql($$Customers | where FirstName !startswith_cs 'Peter'$$); +select '-- #46 --' ; +select * from kql($$print t = 'a' in~ ('A', 'b', 'c')$$); +select '-- #47 --' ; +select * from kql($$Customers | where FirstName in~ ('peter', 'apple')$$); +select '-- #48 --' ; +select * from kql($$Customers | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter'))$$); +select '-- #49 --' ; +select * from kql($$Customers | where FirstName in~ ((Customers | project FirstName | where Age < 30))$$); +select '-- #50 --' ; +select * from kql($$print t = 'a' !in~ ('A', 'b', 'c')$$); +select '-- #51 --' ; +select * from kql($$Customers | where FirstName !in~ ('peter', 'apple')$$); +select '-- #52 --' ; +select * from kql($$Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName == 'Peter'))$$); +select '-- #53 --' ; +select * from kql($$Customers | where FirstName !in~ ((Customers | project FirstName | where Age < 30))$$); +select '-- #54 --' ; +select * from kql($$Customers | where FirstName =~ 'peter' and LastName =~ 'naRA'$$); +select '-- #55 --' ; +select * from kql($$Customers | where FirstName !~ 'nEyMaR' and LastName =~ 'naRA'$$); +select '-- #56 --' ; +select * from kql($$print x = tostring(1d + 1d) | where tostring(x) !~ "asd"$$); +select '-- #57 --' ; +select * from kql($$Customers | where FirstName !in~ ('nEyMaR', 'Peter') | count$$); +select '-- #58 --' ; +select * from kql($$Customers | where FirstName =~ "peter" and LastName =~ "naRA"$$); +select '-- #59 --' ; +select * from kql($$Customers | where FirstName !~ "nEyMaR" and LastName =~ "naRA"$$); +select '-- #60 --' ; +select * from kql($$Customers | where FirstName !in~ ("nEyMaR", "Peter") | count$$); +select '-- HereDoc operator has, !has, has_cs, !has_cs, has_all, has_any --'; +select * from kql($$StormEventsLite | where EventType has 'strong' | count$$); +select * from kql($$StormEventsLite | where EventType !has 'strong wind' | count$$); +select * from kql($$StormEventsLite | where EventType has_cs 'Strong Wind' | count$$); +select * from kql($$StormEventsLite | where EventType !has_cs 'iddqd' | count$$); +select * from kql($$StormEventsLite | where EventType has_all ('iddqd', 'string') | count$$); +select * from kql($$StormEventsLite | where EventType has_any ('iddqd', 'string') | count$$); +select * from kql($IBM$StormEventsLite | where EventType has_any ('iddqd', 'string') | count$IBM$); +DROP TABLE IF EXISTS Customers; +drop table if exists StormEventsLite; diff --git a/tests/queries/0_stateless/02366_kql_operators.reference b/tests/queries/0_stateless/02366_kql_operators.reference new file mode 100644 index 000000000000..d9fe7ea46b9b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operators.reference @@ -0,0 +1,22 @@ +-- operator between, !between -- +12 +32 +5.2 +5.2 +9.3 +2020-01-01 00:00:00.000000000 +2020-01-02 00:00:00.000000000 +2021-12-31 00:00:00.000000000 +2021-12-31 00:00:00.000000000 +2020-01-01 00:00:00.000000000 +2020-01-02 00:00:00.000000000 +12 +32 +12 +32 +19 +20 +0.4 +0.5 +0.6 +0.7 diff --git a/tests/queries/0_stateless/02366_kql_operators.sql b/tests/queries/0_stateless/02366_kql_operators.sql new file mode 100644 index 000000000000..fa76a6be5b68 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_operators.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS TableWithVariousDataTypes; +CREATE TABLE TableWithVariousDataTypes +( + Name String, + Age Nullable(UInt8), + Height Float64, + JoinDate DateTime64(9, 'UTC') +) engine = Memory; + +INSERT INTO TableWithVariousDataTypes VALUES ('A', 12, 5.2, '2020-01-01'), ('B', 22, 7.2, '2020-01-02'), ('C', 32, 9.3, '2021-12-31'); + +set dialect='kusto'; + +print '-- operator between, !between --'; +TableWithVariousDataTypes | project Age | where Age between (10 .. 12); +TableWithVariousDataTypes | project Age | where Age !between (10 .. 30); +TableWithVariousDataTypes | project Height | where Height between (5.2 .. 6.6); +TableWithVariousDataTypes | project Height | where Height !between (5.3 .. 7.6); +TableWithVariousDataTypes | project JoinDate | where todatetime(JoinDate) between (datetime('2020-01-01') .. 2d); +TableWithVariousDataTypes | project JoinDate | where JoinDate !between (datetime('2020-01-01') .. 2d); +TableWithVariousDataTypes | project JoinDate | where JoinDate between (datetime('2020-06-30') .. datetime('2025-06-30')); +TableWithVariousDataTypes | project JoinDate | where JoinDate !between (datetime('2020-06-30') .. datetime('2025-06-30')); +TableWithVariousDataTypes | project Age | where Age between (10 .. 12) or Age > 30; +TableWithVariousDataTypes | project Age | where Age between (10 .. 12) or Age between (30 .. 50); +range x from 1 to 100 step 1 | where x between ( 8 + (4 * 2) / 8 + ((5 * 5 - 20) * 2) .. 20); +range x from 1 to 100 step 1 | where x between ( 8 + (4 * 2) / 8 + ((5 * 5 - 20) * 2) ... 20); -- { clientError SYNTAX_ERROR } +range x from 1 to 100 step 1 | where x between ( 8 + (4 * 2) / 8 + ((5 * 5 - 20) * 2) . 20); -- { clientError SYNTAX_ERROR } +range x from 1 to 100 step 1 | where x between ( 8 + (4 * 2) / 8 + ((5 * 5 - 20) * 2) 20); -- { clientError SYNTAX_ERROR } +range x from 1 to 100 step 1 | where x between (50 .. '58'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +range x from 1 to 100 step 1 | where x between ('50' .. 58); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +range x from 1 to 100 step 1 | where x between ('50' .. '58'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +range x from 0.1 to 1 step 0.1 | where x between (0.4 .. .7); +range x from 1 to 100 step 1 | where x between (50 .. datetime(2007-07-27)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +range x from 1 to 100 step 1 | where x between (datetime(2007-07-27) .. datetime(2007-07-30)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02366_kql_print.reference b/tests/queries/0_stateless/02366_kql_print.reference new file mode 100644 index 000000000000..d5efdf199fe8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_print.reference @@ -0,0 +1,10 @@ +-- #1 -- +1 2 3 +print_0 0 UInt8 int +print_1 1 UInt8 int +print_2 2 UInt8 int +-- #2 -- +1 2 3 +print_0 0 UInt8 int +y 1 UInt8 int +print_2 2 UInt8 int diff --git a/tests/queries/0_stateless/02366_kql_print.sql b/tests/queries/0_stateless/02366_kql_print.sql new file mode 100644 index 000000000000..f22c2099e668 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_print.sql @@ -0,0 +1,9 @@ +set dialect='kusto'; + +print '-- #1 --'; +print 1, 2, 3; +print 1, 2, 3 | getschema; + +print '-- #2 --'; +print 1, y = 2, 3; +print 1, y = 2, 3 | getschema; diff --git a/tests/queries/0_stateless/02366_kql_project.reference b/tests/queries/0_stateless/02366_kql_project.reference new file mode 100644 index 000000000000..da5aac3a1f3c --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_project.reference @@ -0,0 +1,11 @@ +-- #1 -- +Column2 +Column3 +Column4 +-- #2 -- +Column3 +-- #3 -- +Column3 +-- #4 -- +Column1 +s diff --git a/tests/queries/0_stateless/02366_kql_project.sql b/tests/queries/0_stateless/02366_kql_project.sql new file mode 100644 index 000000000000..44732bccaae5 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_project.sql @@ -0,0 +1,17 @@ +drop table if exists test; +create table test (s String) engine = Memory; +insert into test values ('127.0.0.1'), ('127.0.0.0'); + +set dialect = 'kusto'; + +print '-- #1 --'; +print 1 | extend 2 | project ipv4_compare('127.0.0.1', '127.0.0.0'), 4, 5 | getschema | project ColumnName; + +print '-- #2 --'; +print parse_ipv4('1.2.3.4') | project 2 | extend 4 | project 3 | getschema | project ColumnName; + +print '-- #3 --'; +test | extend ipv4_compare(s, '127.0.0.0') | extend Column2 = 'c' | project ipv4_is_in_range(s, '127.1.2.3/8') | getschema | project ColumnName; + +print '-- #4 --'; +test | project ipv4_is_in_range(s, '127.1.2.3/8'), s | getschema | project ColumnName; diff --git a/tests/queries/0_stateless/02366_kql_projectaway.reference b/tests/queries/0_stateless/02366_kql_projectaway.reference new file mode 100644 index 000000000000..b40ead1fa206 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_projectaway.reference @@ -0,0 +1,38 @@ +1-- remove one column +Diaz Skilled Manual Bachelors 28 + +2-- remove two columns +Skilled Manual Bachelors 28 + +3-- remove columns by one wildcard +Skilled Manual Bachelors 28 + +4-- remove columns by two wildcards +28 + +5-- remove columns by one wildcard, one regular column +Skilled Manual Bachelors + +6-- remove columns by one wildcard, two regular column +Skilled Manual + +7-- remove columns by two wildcard, two regular column +Skilled Manual + +8-- remove one column from previous piple result +Diaz Skilled Manual Bachelors 28 + +9-- remove one column from summized piple result +Theodore 28 + +10-- remove columns after extend +Skilled Manual Bachelors 28 Theodore Diaz + +11-- remove two columns by one wildcard in postfix +abc_Qwerty abc_Qwerty_xyz 1 + +12-- remove two columns by one wildcard in prefix +Qwerty_xyz abc_Qwerty_xyz 1 + +13-- remove four columns by wildcard in prefix and postfix +1 diff --git a/tests/queries/0_stateless/02366_kql_projectaway.sql b/tests/queries/0_stateless/02366_kql_projectaway.sql new file mode 100644 index 000000000000..879452cba90b --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_projectaway.sql @@ -0,0 +1,63 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); + +DROP TABLE IF EXISTS Events; +CREATE TABLE Events +( + Test Nullable(String), + abc_Test String, + Test_xyz String, + abc_Test_xyz String, + test_lowercase Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Events VALUES ('Qwerty','abc_Qwerty','Qwerty_xyz','abc_Qwerty_xyz',1); + +set dialect = 'kusto'; +print '1-- remove one column'; +Customers | project-away FirstName; +print ''; +print '2-- remove two columns'; +Customers | project-away FirstName, LastName; +print ''; +print '3-- remove columns by one wildcard'; +Customers | project-away *Name; +print ''; +print '4-- remove columns by two wildcards'; +Customers | project-away *Name, *tion; +print ''; +print '5-- remove columns by one wildcard, one regular column'; +Customers | project-away *Name, Age; +print ''; +print '6-- remove columns by one wildcard, two regular column'; +Customers | project-away *Name, Age, Education; +print ''; +print '7-- remove columns by two wildcard, two regular column'; +Customers | project-away *irstName, Age, *astName, Education; +print ''; +print '8-- remove one column from previous piple result'; +Customers | where Age< 30 | limit 2 | project-away FirstName; +print ''; +print '9-- remove one column from summized piple result'; +Customers|summarize sum(Age), avg(Age) by FirstName | project-away sum_Age; +print ''; +print '10-- remove columns after extend'; +Customers|extend FullName = strcat(FirstName,' ',LastName) | project-away FirstName, LastName; +print ''; +print '11-- remove two columns by one wildcard in postfix'; +Events | project-away Test*; +print ''; +print '12-- remove two columns by one wildcard in prefix'; +Events | project-away *Test; +print ''; +print '13-- remove four columns by wildcard in prefix and postfix'; +Events | project-away *Test*; diff --git a/tests/queries/0_stateless/02366_kql_projectrename.reference b/tests/queries/0_stateless/02366_kql_projectrename.reference new file mode 100644 index 000000000000..3ecc769d34a6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_projectrename.reference @@ -0,0 +1,8 @@ +1-- rename one column +Diaz Skilled Manual Bachelors 28 Theodore +2-- rename two columns +Skilled Manual Bachelors 28 Theodore Diaz +3-- rename printed columns +Theodore Diaz +4-- nested query +9 8 diff --git a/tests/queries/0_stateless/02366_kql_projectrename.sql b/tests/queries/0_stateless/02366_kql_projectrename.sql new file mode 100644 index 000000000000..3f2ac6cee1d0 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_projectrename.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28); + +set dialect = 'kusto'; + +print '1-- rename one column'; +Customers | project-rename FN=FirstName; + +print '2-- rename two columns'; +Customers | project-rename FN=FirstName, LN=LastName; + +print '3-- rename printed columns'; +print FN='Theodore', LN='Diaz' | project-rename FirstName=FN, LastName=LN; + +print '4-- nested query'; +print a = 9 | project-rename b = a | extend c = toscalar(print d = 8 | project-rename e = d) | project-rename f = c; + diff --git a/tests/queries/0_stateless/02366_kql_range.reference b/tests/queries/0_stateless/02366_kql_range.reference new file mode 100644 index 000000000000..62a4e7a7f581 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_range.reference @@ -0,0 +1,112 @@ +-- range function int1 < int2, default int3 -- +[1,2,3,4,5,6,7,8,9,10] +-- range function int1 < int2, int3 > 0 -- +[1,3,5,7,9] +-- range function int1 < int2, int3 == 0 -- +[] +-- range function int1 < int2, int3 < 0 -- +[] +-- range function -int1 < int2, int3 > 0 -- +[-1,1,3,5,7,9] +-- range function int1 > int2 default int3-- +[] +-- range function int1 > int2, int3 > 0 -- +[] +-- range function int1 > int2, int3 == 0 -- +[] +-- range function int1 > int2, int3 < 0 -- +[17,15,13,11] +-- range function int1 > -int2, int3 < 0 -- +[17,15,13,11,9,7,5,3,1,-1,-3,-5,-7,-9] +-- range function int1 == int2 default int3-- +[10] +-- range function int1 == int2, int3 > 0 -- +[10] +-- range function int1 == int2, int3 == 0 -- +[] +-- range function int1 == int2, int3 < 0 -- +[10] +-- range function int1 == int2, float3 > 0 -- +[10] +-- range function float1 < float2, default step -- +[1.2,2.2,3.2,4.2,5.2,6.2,7.2,8.2,9.2,10.2] +-- range function float1 < float2, float3 > 0-- +[1.2,3.4000000000000004,5.6000000000000005,7.800000000000001,10] +-- range function float1 < float2, float3 == 0 -- +[] +-- range function float1 < float2, float3 < 0 -- +[] +-- range function float1 > float2, default step -- +[] +-- range function float1 > float2, float3 > 0-- +[] +-- range function float1 > float2, float3 == 0 -- +[] +-- range function float1 > float2, float3 < 0 -- +[21.2,19,16.8,14.600000000000001,12.400000000000002] +-- range function float1 == float2, default step -- +[21.2] +-- range function float1 == float2, float3 > 0-- +[21.2] +-- range function float1 == float2, float3 == 0 -- +[] +-- range function float1 == float2, float3 < 0 -- +[21.2] +-- range function postive float, int, float -- +[1.2,3.4000000000000004,5.6000000000000005,7.800000000000001,10] +-- range function postive integer, int, float -- +[1,3.2,5.4,7.6000000000000005,9.8] +-- range function postive intger, float, float -- +[1,3.2,5.4,7.6000000000000005,9.8] +-- range function postive float, int, int -- +[1.2,3.2,5.2,7.2,9.2] +-- range function postive int, int, negative int -- +[12,10,8,6,4] +-- range function postive float, int, negative float -- +[12.8,10.5,8.2,5.8999999999999995,3.5999999999999996] +-- range function datetime, datetime, timespan -- +['2001-01-01 00:00:00.000000000','2001-01-01 05:00:00.000000000','2001-01-01 10:00:00.000000000','2001-01-01 15:00:00.000000000','2001-01-01 20:00:00.000000000'] +-- range function datetime, datetime, negative timespan -- +['2001-01-03 00:00:00.000000000','2001-01-02 19:00:00.000000000','2001-01-02 14:00:00.000000000','2001-01-02 09:00:00.000000000','2001-01-02 04:00:00.000000000'] +-- range function datetime, datetime -- +['2001-01-01 00:00:00.000000000','2001-01-01 01:00:00.000000000','2001-01-01 02:00:00.000000000','2001-01-01 03:00:00.000000000','2001-01-01 04:00:00.000000000','2001-01-01 05:00:00.000000000','2001-01-01 06:00:00.000000000','2001-01-01 07:00:00.000000000','2001-01-01 08:00:00.000000000','2001-01-01 09:00:00.000000000','2001-01-01 10:00:00.000000000','2001-01-01 11:00:00.000000000','2001-01-01 12:00:00.000000000','2001-01-01 13:00:00.000000000','2001-01-01 14:00:00.000000000','2001-01-01 15:00:00.000000000','2001-01-01 16:00:00.000000000','2001-01-01 17:00:00.000000000','2001-01-01 18:00:00.000000000','2001-01-01 19:00:00.000000000','2001-01-01 20:00:00.000000000','2001-01-01 21:00:00.000000000','2001-01-01 22:00:00.000000000','2001-01-01 23:00:00.000000000','2001-01-02 00:00:00.000000000'] +-- range function timespan, timespan, timespan -- +['01:00:00','03:00:00','05:00:00'] +-- range function -timespan, timespan, timespan -- +['-01:00:00','01:00:00','03:00:00','05:00:00'] +-- range function timespan, timespan -- +['01:00:00','02:00:00','03:00:00','04:00:00','05:00:00'] +-- range function timespan1 > timespan2, negative timespan3 < 0 -- +['11:00:00','09:00:00','07:00:00','05:00:00'] +-- range function float timespan1 < timespan2, timespan3 > 0 -- +['01:30:00','03:30:00'] +-- range function timespan1 < timespan2, timespan3 = 0 -- +[] +-- range function timespan1 == timespan2, timespan3 = 0 -- +['05:00:00'] +-- range function datetime1 < datetime2 , timespan > 0 -- +['2017-01-01 23:59:59.999999900','2017-01-02 23:59:59.999999900','2017-01-03 23:59:59.999999900'] +-- range function datetime1 > datetime2 , timespan > 0 -- +[] +-- range function datetime1 > datetime2 , timespan < 0 -- +['2017-01-05 23:59:59.999999900','2017-01-04 23:59:59.999999900','2017-01-03 23:59:59.999999900'] +-- range orerator int, int, int -- +20 +21 +22 +23 +24 +25 +-- range orerator float, float, float -- +20.5 +22 +23.5 +25 +-- range orerator datetime, datetime, timespan -- +2023-01-01 00:00:00.000000000 +2023-01-02 00:00:00.000000000 +2023-01-03 00:00:00.000000000 +2023-01-04 00:00:00.000000000 +2023-01-05 00:00:00.000000000 +2023-01-06 00:00:00.000000000 +2023-01-07 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02366_kql_range.sql b/tests/queries/0_stateless/02366_kql_range.sql new file mode 100644 index 000000000000..c1f31d953a93 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_range.sql @@ -0,0 +1,149 @@ +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +print '-- range function int1 < int2, default int3 --'; +print range(1, 10); + +print '-- range function int1 < int2, int3 > 0 --'; +print range(1, 10, 2); + +print '-- range function int1 < int2, int3 == 0 --'; +print range(1, 10, 0); + +print '-- range function int1 < int2, int3 < 0 --'; +print range(1, 10, -2); + +print '-- range function -int1 < int2, int3 > 0 --'; +print range(-1, 10, 2); + +print '-- range function int1 > int2 default int3--'; +print range(17, 10); + +print '-- range function int1 > int2, int3 > 0 --'; +print range(17, 10, 2); + +print '-- range function int1 > int2, int3 == 0 --'; +print range(17, 10, 0); + +print '-- range function int1 > int2, int3 < 0 --'; +print range(17, 10, -2); + +print '-- range function int1 > -int2, int3 < 0 --'; +print range(17, -10, -2); + +print '-- range function int1 == int2 default int3--'; +print range(10, 10); + +print '-- range function int1 == int2, int3 > 0 --'; +print range(10, 10, 2); + +print '-- range function int1 == int2, int3 == 0 --'; +print range(10, 10, 0); + +print '-- range function int1 == int2, int3 < 0 --'; +print range(10, 10, -2); + +print '-- range function int1 == int2, float3 > 0 --'; +print range(10, 10, 2.2); + +print '-- range function float1 < float2, default step --'; +print range(1.2, 10.3); + +print '-- range function float1 < float2, float3 > 0--'; +print range(1.2, 10.3, 2.2); + +print '-- range function float1 < float2, float3 == 0 --'; +print range(1.2, 10.3, 0); + +print '-- range function float1 < float2, float3 < 0 --'; +print range(1.2, 10.3, -2.2); + +print '-- range function float1 > float2, default step --'; +print range(21.2, 10.3); + +print '-- range function float1 > float2, float3 > 0--'; +print range(21.2, 10.3, 2.2); + +print '-- range function float1 > float2, float3 == 0 --'; +print range(21.2, 10.3, 0); + +print '-- range function float1 > float2, float3 < 0 --'; +print range(21.2, 10.3, -2.2); + +print '-- range function float1 == float2, default step --'; +print range(21.2, 21.2); + +print '-- range function float1 == float2, float3 > 0--'; +print range(21.2, 21.2, 2.2); + +print '-- range function float1 == float2, float3 == 0 --'; +print range(21.2, 21.2, 0); + +print '-- range function float1 == float2, float3 < 0 --'; +print range(21.2, 21.2, -2.2); + +print '-- range function postive float, int, float --'; +print range(1.2, 10, 2.2); + +print '-- range function postive integer, int, float --'; +print range(1, 10, 2.2); + +print '-- range function postive intger, float, float --'; +print range(1, 10.5, 2.2); + +print '-- range function postive float, int, int --'; +print range(1.2, 10, 2); + +print '-- range function postive int, int, negative int --'; +print range(12, 3, -2); + +print '-- range function postive float, int, negative float --'; +print range(12.8, 3, -2.3); + +print '-- range function datetime, datetime, timespan --'; +print range(datetime('2001-01-01'), datetime('2001-01-02'), 5h); + +print '-- range function datetime, datetime, negative timespan --'; +print range(datetime('2001-01-03'), datetime('2001-01-02'), -5h); + +print '-- range function datetime, datetime --'; +print range(datetime('2001-01-01'), datetime('2001-01-02')); + +print '-- range function timespan, timespan, timespan --'; +print range(1h, 5h, 2h); + +print '-- range function -timespan, timespan, timespan --'; +print range(-1h, 5h, 2h); + +print '-- range function timespan, timespan --'; +print range(1h, 5h); + +print '-- range function timespan1 > timespan2, negative timespan3 < 0 --'; +print range(11h, 5h, -2h); + +print '-- range function float timespan1 < timespan2, timespan3 > 0 --'; +print range(1.5h, 5h, 2h); + +print '-- range function timespan1 < timespan2, timespan3 = 0 --'; +print range(1.5h, 5h, 0h); + +print '-- range function timespan1 == timespan2, timespan3 = 0 --'; +print range(5h, 5h, 2h); + +print '-- range function datetime1 < datetime2 , timespan > 0 --'; +print range(endofday(datetime(2017-01-01 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); + +print '-- range function datetime1 > datetime2 , timespan > 0 --'; +print range(endofday(datetime(2017-01-05 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), 1d); + +print '-- range function datetime1 > datetime2 , timespan < 0 --'; +print range(endofday(datetime(2017-01-05 10:10:17)), endofday(datetime(2017-01-03 10:10:17)), -1d); + +print '-- range orerator int, int, int --'; +range Age from 20 to 25 step 1; + +print '-- range orerator float, float, float --'; +range temp from 20.5 to 25.5 step 1.5; + +print '-- range orerator datetime, datetime, timespan --'; +range FirstWeek from datetime('2023-01-01') to datetime('2023-01-07') step 1d; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_subquery.reference b/tests/queries/0_stateless/02366_kql_subquery.reference new file mode 100644 index 000000000000..51fd985276b8 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_subquery.reference @@ -0,0 +1,5 @@ +-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Theodore Diaz Skilled Manual Bachelors 28 +Apple Skilled Manual Bachelors 28 diff --git a/tests/queries/0_stateless/02366_kql_subquery.sql b/tests/queries/0_stateless/02366_kql_subquery.sql new file mode 100644 index 000000000000..68aa58de26c6 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_subquery.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +set dialect='kusto'; + +print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; +Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; + diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference new file mode 100644 index 000000000000..25c8673ba394 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.reference @@ -0,0 +1,141 @@ +-- test summarize -- +12 25 46 32.416666666666664 389 +Skilled Manual 5 26 36 30.2 151 +Professional 6 25 46 34.166666666666664 205 +Management abcd defg 1 33 33 33 33 +Skilled Manual 0 +Professional 2 +Management abcd defg 0 +Skilled Manual 36 +Professional 38 +Management abcd defg 33 +Skilled Manual 26 +Professional 25 +Management abcd defg 33 +Skilled Manual 30.2 +Professional 29.25 +Management abcd defg 33 +Skilled Manual 151 +Professional 117 +Management abcd defg 33 +4 +4 +2 +2 +40 2 +30 4 +20 6 +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +Cameron Rodriguez 28 +Christine Nara 33 +Latoya Shen 25 +Joshua Lee 26 +Dalton Wood 42 +Peter Nara 26 +Edward Hernandez 36 +\N why 38 +Apple 28 +Stephanie Cox 33 +Angel Stewart 46 +Theodore Diaz 28 +-- make_list() -- +Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Bachelors','Graduate Degree'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_list_if() -- +Skilled Manual ['Edward','Christine'] +Professional ['Dalton','Angel'] +Management abcd defg ['Stephanie'] +Skilled Manual ['Edward'] +Professional ['Dalton'] +Management abcd defg ['Stephanie'] +-- make_set() -- +Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['Graduate Degree','Bachelors'] +Professional ['Graduate Degree','Partial College'] +Management abcd defg ['Bachelors'] +-- make_set_if() -- +Skilled Manual ['Partial College','High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +Skilled Manual ['High School'] +Professional ['Partial College'] +Management abcd defg ['Bachelors'] +-- stdev() -- +6.855102059227432 +-- stdevif() -- +7.557189365836421 +-- binary_all_and -- +42 +-- binary_all_or -- +46 +-- binary_all_xor -- +4 +43.8 +[25.549999999999997,30.5,43.8] +30.5 +35 +[25,35,45] +-- Summarize following sort -- +Skilled Manual 5 +Professional 6 +Management abcd defg 1 +-- summarize with bin -- +0 1 +245000 2 +1970-01-01 00:00:00.000000000 1 +1970-01-01 00:04:05.000000000 2 +00:00:00 1 +00:04:05 2 +2015-10-12 00:00:00.000000000 +2016-10-12 00:00:00.000000000 +-- make_list_with_nulls -- +['Theodore','Stephanie','Peter','Latoya','Joshua','Edward','Dalton','Christine','Cameron','Angel','Apple',NULL] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] +Management abcd defg ['Stephanie'] +Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] [28,26,36,33,28] +Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] [25,26,42,28,46,38] +Management abcd defg ['Stephanie'] [33] +-- count_distinct -- +4 +-- count_distinctif -- +3 +-- format_datetime -- +70-01-01 1 +70-01-03 2 +-- take_any -- +Theodore +Theodore Diaz +Cameron Rodriguez Cameron Rodriguez +Christine Nara Christine Nara +TheodoreDiaz +-- take_anyif -- +Theodore +Theodore 11 +-- variance/variancep/varianceif -- +46.992424242424185 +43.076388888888836 +1.766666666666606 +-- arg_max -- +Stewart 46 +Skilled Manual Edward Hernandez 36 +Professional Angel Stewart 46 +Management abcd defg Stephanie Cox 33 +-- arg_min -- +Shen 25 +Skilled Manual Peter Nara 26 +Professional Latoya Shen 25 +Management abcd defg Stephanie Cox 33 +-- hll, hll_if, hll_merge, dcount_hll -- +4 +3 +7 +7 diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql new file mode 100644 index 000000000000..111dcee793d3 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_summarize.sql @@ -0,0 +1,153 @@ +-- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ +-- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, +-- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, +-- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, +-- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, +-- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, +-- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, +-- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, +-- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, +-- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, +-- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, +-- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, +-- dynamic(null), 'why', 'Professional', 'Partial College', 38 +-- ] + +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); + +-- datatable (LogEntry:string, Created:long) [ +-- 'Darth Vader has entered the room.', 546, +-- 'Rambo is suspciously looking at Darth Vader.', 245234, +-- 'Darth Sidious electrocutes both using Force Lightning.', 245554 +-- ] + +drop table if exists EventLog; +create table EventLog +( + LogEntry String, + Created Int64 +) ENGINE = Memory; + +insert into EventLog values ('Darth Vader has entered the room.', 546), ('Rambo is suspciously looking at Darth Vader.', 245234), ('Darth Sidious electrocutes both using Force Lightning.', 245554); + +drop table if exists Dates; +create table Dates +( + EventTime DateTime('UTC'), +) ENGINE = Memory; + +insert into Dates values ('2015-10-12'), ('2016-10-12'); + +select '-- test summarize --'; +set dialect = 'kusto'; +set interval_output_format = 'kusto'; + +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); +Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation; +Customers | summarize countif(Age>40) by Occupation; +Customers | summarize MyMax = maxif(Age, Age<40) by Occupation; +Customers | summarize MyMin = minif(Age, Age<40) by Occupation; +Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation; +Customers | summarize MySum = sumif(Age, Age<40) by Occupation; +Customers | summarize dcount(Education); +Customers | summarize dcount(Education, 2); +Customers | summarize dcount(Education, 10); -- { clientError 36 } +Customers | summarize dcountif(Education, Occupation=='Professional'); +Customers | summarize dcountif(Education, Occupation=='Professional', 2); +Customers | summarize dcountif(Education, Occupation=='Professional', -1); -- { clientError 36 } +Customers | summarize count_ = count() by bin(Age, 10) | order by count_ asc; +Customers | summarize job_count = count() by Occupation | where job_count > 0; +Customers | summarize 'Edu Count'=count() by Education | sort by 'Edu Count' desc; -- { clientError 62 } +Customers | summarize by FirstName, LastName, Age; + +print '-- make_list() --'; +Customers | summarize f_list = make_list(Education) by Occupation; +Customers | summarize f_list = make_list(Education, 2) by Occupation; +print '-- make_list_if() --'; +Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation; +Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation; +print '-- make_set() --'; +Customers | summarize f_list = make_set(Education) by Occupation; +Customers | summarize f_list = make_set(Education, 2) by Occupation; +print '-- make_set_if() --'; +Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation; +Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation; +print '-- stdev() --'; +Customers | project Age | summarize stdev(Age); +print '-- stdevif() --'; +Customers | project Age | summarize stdevif(Age, Age%2==0); +print '-- binary_all_and --'; +Customers | project Age | where Age > 40 | summarize binary_all_and(Age); +print '-- binary_all_or --'; +Customers | project Age | where Age > 40 | summarize binary_all_or(Age); +print '-- binary_all_xor --'; +Customers | project Age | where Age > 40 | summarize binary_all_xor(Age); + +Customers | project Age | summarize percentile(Age, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95); +Customers | project Age | summarize percentiles(Age, 5, 50, 95)[1]; +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilew(AgeBucket, w, 75); +Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilesw(AgeBucket, w, 50, 75, 99.9); + +print '-- Summarize following sort --'; +Customers | sort by FirstName | summarize count() by Occupation; + +print '-- summarize with bin --'; +EventLog | summarize count=count() by bin(Created, 1000); +EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s); +EventLog | summarize count=count() by time_label=bin(Created / 1000 * 1s, 1s); +Dates | project bin(EventTime, 1m); +print '-- make_list_with_nulls --'; +Customers | summarize t = make_list_with_nulls(FirstName); +Customers | summarize f_list = make_list_with_nulls(FirstName) by Occupation; +Customers | summarize f_list = make_list_with_nulls(FirstName), a_list = make_list_with_nulls(Age) by Occupation; +print '-- count_distinct --'; +Customers | summarize count_distinct(Education); +print '-- count_distinctif --'; +Customers | summarize count_distinctif(Education, Age > 30); + +print '-- format_datetime --'; +EventLog | summarize count() by dt = format_datetime(bin(unixtime_seconds_todatetime(Created), 1d), 'yy-MM-dd') | order by dt asc; + +print '-- take_any --'; +Customers | summarize take_any(FirstName); +Customers | summarize take_any(FirstName), take_any(LastName); +Customers | where FirstName startswith 'C' | summarize take_any(FirstName, LastName) by FirstName, LastName; +Customers | summarize take_any(strcat(FirstName,LastName)); +print '-- take_anyif --'; +Customers | summarize take_anyif(FirstName, LastName has 'Diaz'); +Customers | summarize take_anyif(FirstName, LastName has 'Diaz'), dcount(FirstName); + +print '-- variance/variancep/varianceif --'; +Customers | summarize variance(Age); +Customers | summarize variancep(Age); +Customers | summarize varianceif(Age, Age < 30); +Customers | summarize variance(null); -- { clientError Code: 395 } +Customers | summarize variancep(null); -- { clientError Code: 395 } +Customers | summarize varianceif(null, Age < 30); -- { clientError Code: 395 } + +print '-- arg_max --'; +Customers | summarize arg_max(Age); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +Customers | summarize arg_max(Age, LastName); +Customers | summarize z=arg_max(Age, FirstName, LastName) by Occupation; + +print '-- arg_min --'; +Customers | summarize arg_min(Age); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +Customers | summarize arg_min(Age, LastName); +Customers | summarize z=arg_min(Age, FirstName, LastName) by Occupation; + +print '-- hll, hll_if, hll_merge, dcount_hll --'; +Customers | summarize x = hll(Education) | project dcount_hll(x); +Customers | summarize y = hll(Occupation) | project dcount_hll(y); +Customers | summarize x = hll(Education), y = hll(Occupation) | project xy = hll_merge(x, y) | project dcount_hll(xy); +Customers | summarize x = hll(Education), y = hll(Occupation) | summarize xy = hll_merge(x, y) | project dcount_hll(xy); diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference new file mode 100644 index 000000000000..dea0e8878bb9 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.reference @@ -0,0 +1,140 @@ +-- test Query only has table name: -- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Joshua Lee Professional Partial College 26 +Edward Hernandez Skilled Manual High School 36 +Dalton Wood Professional Partial College 42 +Christine Nara Skilled Manual Partial College 33 +Cameron Rodriguez Professional Partial College 28 +Angel Stewart Professional Partial College 46 +-- Query has Column Selection -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Edward Hernandez Skilled Manual +Dalton Wood Professional +Christine Nara Skilled Manual +Cameron Rodriguez Professional +Angel Stewart Professional +-- Query has limit -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with bigger value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +Latoya Shen Professional +Joshua Lee Professional +-- Query has second limit with smaller value -- +Theodore Diaz Skilled Manual +Stephanie Cox Management +Peter Nara Skilled Manual +-- Query has second Column selection -- +Theodore Diaz +Stephanie Cox +Peter Nara +-- Query has second Column selection with extra column -- +-- Query with desc sort -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with asc sort -- +Management +Professional +Professional +Skilled Manual +Skilled Manual +-- Query with sort (without keyword asc desc) -- +Theodore +Stephanie +Peter +Latoya +Joshua +Skilled Manual +Skilled Manual +Professional +Professional +Management +-- Query with sort 2 Columns with different direction -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Query with second sort -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Peter Nara Skilled Manual +Theodore Diaz Skilled Manual +-- Test String Equals (==) -- +Theodore Diaz Skilled Manual +Peter Nara Skilled Manual +Edward Hernandez Skilled Manual +Christine Nara Skilled Manual +-- Test String Not equals (!=) -- +Stephanie Cox Management +Latoya Shen Professional +Joshua Lee Professional +Dalton Wood Professional +Cameron Rodriguez Professional +Angel Stewart Professional +-- Test Filter using a list (in) -- +Theodore Diaz Skilled Manual Bachelors +Stephanie Cox Management Bachelors +Edward Hernandez Skilled Manual High School +-- Test Filter using a list (!in) -- +Peter Nara Skilled Manual Graduate Degree +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (contains_cs) -- +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Christine Nara Skilled Manual Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (startswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +Dalton Wood Professional Partial College +Cameron Rodriguez Professional Partial College +Angel Stewart Professional Partial College +-- Test Filter using common string operations (endswith_cs) -- +Latoya Shen Professional Graduate Degree +Joshua Lee Professional Partial College +-- Test Filter using numerical equal (==) -- +Peter Nara Skilled Manual Graduate Degree 26 +Joshua Lee Professional Partial College 26 +-- Test Filter using numerical great and less (> , <) -- +Stephanie Cox Management Bachelors 33 +Edward Hernandez Skilled Manual High School 36 +Christine Nara Skilled Manual Partial College 33 +-- Test Filter using multi where -- +Dalton Wood Professional Partial College 42 +Angel Stewart Professional Partial College 46 +-- Complex query with unknown function -- +-- Missing column in front of startsWith -- +-- Missing table in the pipeline -- diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql new file mode 100644 index 000000000000..95751df8df30 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_tabular.sql @@ -0,0 +1,91 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); + +set dialect='kusto'; +print '-- test Query only has table name: --'; +Customers; + +print '-- Query has Column Selection --'; +Customers | project FirstName,LastName,Occupation; + +print '-- Query has limit --'; +Customers | project FirstName,LastName,Occupation | take 5; +Customers | project FirstName,LastName,Occupation | limit 5; + +print '-- Query has second limit with bigger value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 7; + +print '-- Query has second limit with smaller value --'; +Customers | project FirstName,LastName,Occupation | take 5 | take 3; + +print '-- Query has second Column selection --'; +Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; + +print '-- Query has second Column selection with extra column --'; +Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education;-- { serverError 47 } + +print '-- Query with desc sort --'; +Customers | project FirstName | take 5 | sort by FirstName desc; +Customers | project Occupation | take 5 | order by Occupation desc; + +print '-- Query with asc sort --'; +Customers | project Occupation | take 5 | sort by Occupation asc; + +print '-- Query with sort (without keyword asc desc) --'; +Customers | project FirstName | take 5 | sort by FirstName; +Customers | project Occupation | take 5 | order by Occupation; + +print '-- Query with sort 2 Columns with different direction --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; + +print '-- Query with second sort --'; +Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; + +print '-- Test String Equals (==) --'; +Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; + +print '-- Test String Not equals (!=) --'; +Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; + +print '-- Test Filter using a list (in) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); + +print '-- Test Filter using a list (!in) --'; +set dialect='kusto'; +Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); + +print '-- Test Filter using common string operations (contains_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; + +print '-- Test Filter using common string operations (startswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; + +print '-- Test Filter using common string operations (endswith_cs) --'; +Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; + +print '-- Test Filter using numerical equal (==) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; + +print '-- Test Filter using numerical great and less (> , <) --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; + +print '-- Test Filter using multi where --'; +Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; + +print '-- Complex query with unknown function --'; +hits | where CounterID == 62 and EventDate >= '2013-07-14' and EventDate <= '2013-07-15' and IsRefresh == 0 and DontCountHits == 0 | summarize count() by d=bin(poopoo(EventTime), 1m) | order by d | limit 10; -- { clientError UNKNOWN_FUNCTION } + +print '-- Missing column in front of startsWith --'; +StormEvents | where startswith "W" | summarize Count=count() by State; -- { clientError SYNTAX_ERROR } + +print '-- Missing table in the pipeline --'; +| where State contains "enn" | where event_count > 10 | project State, event_count; -- { clientError SYNTAX_ERROR } \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_test_sort.reference b/tests/queries/0_stateless/02366_kql_test_sort.reference new file mode 100644 index 000000000000..aef53864c00e --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_test_sort.reference @@ -0,0 +1,140 @@ +--1-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--2-- +\N why Professional Partial College 38 +Apple Skilled Manual Bachelors \N +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Theodore Diaz Skilled Manual Bachelors 28 +--3-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--4-- +\N why Professional Partial College 38 +Apple Skilled Manual Bachelors \N +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Theodore Diaz Skilled Manual Bachelors 28 +--5-- +Apple Skilled Manual Bachelors \N +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Theodore Diaz Skilled Manual Bachelors 28 +\N why Professional Partial College 38 +--6-- +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +--7-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--8-- +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +--9-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--10-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--11-- +\N why Professional Partial College 38 +Apple Skilled Manual Bachelors \N +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Theodore Diaz Skilled Manual Bachelors 28 +--12-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--13-- +\N why Professional Partial College 38 +Apple Skilled Manual Bachelors \N +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Theodore Diaz Skilled Manual Bachelors 28 +--14-- +Apple Skilled Manual Bachelors \N +Latoya Shen Professional Graduate Degree 25 +Peter Nara Skilled Manual Graduate Degree 26 +Stephanie Cox Management abcd defg Bachelors 33 +Theodore Diaz Skilled Manual Bachelors 28 +\N why Professional Partial College 38 +--15-- +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +--16-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--17-- +\N why Professional Partial College 38 +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +--18-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--19-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 +--20-- +Theodore Diaz Skilled Manual Bachelors 28 +Stephanie Cox Management abcd defg Bachelors 33 +Peter Nara Skilled Manual Graduate Degree 26 +Latoya Shen Professional Graduate Degree 25 +Apple Skilled Manual Bachelors \N +\N why Professional Partial College 38 diff --git a/tests/queries/0_stateless/02366_kql_test_sort.sql b/tests/queries/0_stateless/02366_kql_test_sort.sql new file mode 100644 index 000000000000..d438156febdd --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_test_sort.sql @@ -0,0 +1,53 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',NULL),(NULL,'why','Professional','Partial College',38); + +set dialect = 'kusto'; +print '--1--'; +Customers | order by FirstName; +print '--2--'; +Customers | order by FirstName asc; +print '--3--'; +Customers | order by FirstName desc; +print '--4--'; +Customers | order by FirstName asc nulls first; +print '--5--'; +Customers | order by FirstName asc nulls last; +print '--6--'; +Customers | order by FirstName desc nulls first; +print '--7--'; +Customers | order by FirstName desc nulls last; +print '--8--'; +Customers | order by FirstName nulls first; +print '--9--'; +Customers | order by FirstName nulls last; +print '--10--'; +Customers | order by FirstName, Age; +print '--11--'; +Customers | order by FirstName asc, Age desc; +print '--12--'; +Customers | order by FirstName desc, Age asc ; +print '--13--'; +Customers | order by FirstName asc nulls first, Age asc nulls first; +print '--14--'; +Customers | order by FirstName asc nulls last, Age asc nulls last; +print '--15--'; +Customers | order by FirstName desc nulls first, Age desc nulls first; +print '--16--'; +Customers | order by FirstName desc nulls last, Age desc nulls last; +print '--17--'; +Customers | order by FirstName nulls first, Age nulls first; +print '--18--'; +Customers | order by FirstName nulls last, Age nulls last; +print '--19--'; +Customers | order by FirstName , Age asc nulls last, LastName nulls first; +print '--20--'; +Customers | order by strcat(FirstName, ' ',LastName), Age asc nulls last, LastName nulls first; diff --git a/tests/queries/0_stateless/02366_kql_test_subquery.reference b/tests/queries/0_stateless/02366_kql_test_subquery.reference new file mode 100644 index 000000000000..492c5ca3d595 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_test_subquery.reference @@ -0,0 +1,84 @@ +-- test negetivate operator in kql subuquery -- +-- #1 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #2 -- +Peter Cox Management abcd defg Bachelors 33 +-- #3 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #4 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #5 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #6 -- +Peter Cox Management abcd defg Bachelors 33 +-- #7 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +Peter Cox Management abcd defg Bachelors 33 +-- #8 -- +Peter Cox Management abcd defg Bachelors 33 +Peter Cox Management abcd defg Bachelors 33 +-- #9 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #10 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #11 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #12 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #13 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #14 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #15 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #16 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #17 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #18 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #19 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #20 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 + +-- test case-insensitive in operator kql subuquery -- +-- #21 -- +Peter Cox Management abcd defg Bachelors 33 +-- #22 -- +Peter Cox Management abcd defg Bachelors 33 +-- #23 -- +Peter Cox Management abcd defg Bachelors 33 +-- #24 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #25 -- +Peter Cox Management abcd defg Bachelors 33 +-- #26 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #27 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #28 -- +Peter Cox Management abcd defg Bachelors 33 +-- #29 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #30 -- +Theodore Diaz Skilled Manual Bachelors 28 + +-- test multi columns in operator kql subuquery -- +-- #32 -- +Theodore Diaz Skilled Manual Bachelors 28 +Peter Cox Management abcd defg Bachelors 33 +-- #33 -- +Theodore Diaz Skilled Manual Bachelors 28 +-- #34 -- +Peter Cox Management abcd defg Bachelors 33 +-- #35 -- +Peter Cox Management abcd defg Bachelors 33 diff --git a/tests/queries/0_stateless/02366_kql_test_subquery.sql b/tests/queries/0_stateless/02366_kql_test_subquery.sql new file mode 100644 index 000000000000..9976d22d4cd7 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_test_subquery.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8) +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Peter','Cox','Management abcd defg','Bachelors',33); + +set dialect = 'kusto'; +print '-- test negetivate operator in kql subuquery --' ; +print '-- #1 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !in ('Peter', 'Latoya'))); +print '-- #2 --' ; +Customers | where FirstName in ((Customers | project FirstName, Age | where Age !in (28, 29))); +print '-- #3 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains 'ste')); +print '-- #4 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'Ste')); +print '-- #5 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !contains_cs 'ste')); +print '-- #6 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith 'ore')); +print '-- #7 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith_cs 'Ore')); +Customers | where FirstName in ((Customers | project FirstName | where FirstName !endswith_cs 'ore')); +print '-- #8 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName != 'Theodore')); +Customers | where FirstName in ((Customers | project FirstName | where FirstName !~ 'theodore')); +print '-- #9 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !has 'Peter')); +print '-- #10 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !has_cs 'Peter')); +print '-- #11 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !has_cs 'peter')); +print '-- #12 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix 'Peter')); +print '-- #13 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix_cs 'Peter')); +print '-- #14 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hasprefix_cs 'peter')); +print '-- #15 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix 'Peter')); +print '-- #16 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix_cs 'Peter')); +print '-- #17 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !hassuffix_cs 'peter')); +print '-- #18 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith 'Peter')); +print '-- #19 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith_cs 'Peter')); +print '-- #20 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !startswith_cs 'peter')); +print ''; +print '-- test case-insensitive in operator kql subuquery --' ; +print '-- #21 --' ; +Customers | where FirstName !in~ ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple'))); +print '-- #22 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ('peter', 'apple'))); +print '-- #23 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ((Customers | project FirstName | where FirstName == 'Peter')))); +print '-- #24 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName in~ ((Customers | project FirstName, Age | where Age < 30)))); +print '-- #25 --' ; +Customers | where substring(FirstName,0,3) in~ ((Customers | project substring(FirstName,0,3) | where FirstName in~ ('peter', 'apple'))); +print '-- #26 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple'))); +print '-- #27 --' ; +Customers | where FirstName in ((Customers |where Age <30 | project FirstName | where FirstName !in~ ((Customers | project FirstName | where FirstName =~ 'peter')))); +print '-- #28 --' ; +Customers | where FirstName in ((Customers | project FirstName | where FirstName !in~ ((Customers | project FirstName, Age | where Age < 30)))); +print '-- #29 --' ; +Customers | where FirstName in~ ((Customers | project FirstName | where FirstName !in~ ('peter', 'apple'))); +print '-- #30 --' ; +Customers | where FirstName in~ ((Customers | where FirstName !in~ ('peter', 'apple')| project FirstName)); +print ''; +print '-- test multi columns in operator kql subuquery --' ; +print '-- #32 --' ; +Customers | where FirstName in ((Customers | project FirstName, LastName, Age)); +print '-- #33 --' ; +Customers | where FirstName in~ ((Customers | project FirstName, LastName, Age|where Age <30)); +print '-- #34 --' ; +Customers | where FirstName !in ((Customers | project FirstName, LastName, Age |where Age <30 )); +print '-- #35 --' ; +Customers | where FirstName !in~ ((Customers | project FirstName, LastName, Age |where Age <30)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_top_hitters.reference b/tests/queries/0_stateless/02366_kql_top_hitters.reference new file mode 100644 index 000000000000..72e1e27c88b1 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_top_hitters.reference @@ -0,0 +1,38 @@ +--top 1-- +Angel Stewart Professional Partial College 46 100 +Dalton Wood Professional Partial College 42 70 +\N why Professional Partial College 38 120 +--top 2-- +Angel Stewart Professional Partial College 46 100 +Dalton Wood Professional Partial College 42 70 +\N why Professional Partial College 38 120 +--top 3-- +Peter Nara Skilled Manual Graduate Degree 26 30 +Latoya Shen Professional Graduate Degree 25 40 +Joshua Lee Professional Partial College 26 50 +--top 4-- +\N why Professional Partial College 38 120 +Theodore Diaz Skilled Manual Bachelors 28 10 +Stephanie Cox Management Bachelors 31 20 +--top 5-- +Theodore Diaz Skilled Manual Bachelors 28 10 +Stephanie Cox Management Bachelors 31 20 +Peter Nara Skilled Manual Graduate Degree 26 30 +--top 6-- +Dalton Wood Professional Partial College 42 70 +Angel Stewart Professional Partial College 46 100 +--top hitters 1-- +28 210 +38 120 +--top hitters 2-- +28 3 +26 2 +--top hitters 3-- +38 1 +28 1 +--top hitters 4-- +38 120 +--top hitters 5-- +38 120 +--top hitters 6-- +28 3 diff --git a/tests/queries/0_stateless/02366_kql_top_hitters.sql b/tests/queries/0_stateless/02366_kql_top_hitters.sql new file mode 100644 index 000000000000..60ac6a537f6f --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_top_hitters.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS Customers; +CREATE TABLE Customers +( + FirstName Nullable(String), + LastName String, + Occupation String, + Education String, + Age Nullable(UInt8), + extra Int16 +) ENGINE = Memory; + +INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28,10),('Stephanie','Cox','Management','Bachelors',31,20),('Peter','Nara','Skilled Manual','Graduate Degree',26,30),('Latoya','Shen','Professional','Graduate Degree',25,40),('Joshua','Lee','Professional','Partial College',26,50),('Edward','Hernandez','Skilled Manual','High School',36,60),('Dalton','Wood','Professional','Partial College',42,70),('Christine','Nara','Skilled Manual','Partial College',33,80),('Cameron','Rodriguez','Professional','Partial College',28,90),('Angel','Stewart','Professional','Partial College',46,100),('Apple','B','Skilled Manual','Bachelors',28,110),(NULL,'why','Professional','Partial College',38,120); + +set dialect = 'kusto'; +print '--top 1--'; +Customers | top 3 by Age; +print '--top 2--'; +Customers | top 3 by Age desc; +print '--top 3--'; +Customers | top 3 by Age asc | order by FirstName; +print '--top 4--'; +Customers | top 3 by FirstName desc nulls first; +print '--top 5--'; +Customers | top 3 by FirstName desc nulls last; +print '--top 6--'; +Customers | top 3 by Age | top 2 by FirstName; +print '--top hitters 1--'; +Customers | top-hitters a = 2 of Age by extra; +print '--top hitters 2--'; +Customers | top-hitters 2 of Age; +print '--top hitters 3--'; +Customers | top-hitters 2 of Age by extra | top-hitters 2 of Age | order by Age; +print '--top hitters 4--'; +Customers | top-hitters 2 of Age by extra | where Age > 30; +print '--top hitters 5--'; +Customers | top-hitters 2 of Age by extra | where approximate_sum_extra < 200; +print '--top hitters 6--'; +Customers | top-hitters 2 of Age | where approximate_count_Age > 2; + diff --git a/tests/queries/0_stateless/02366_kql_topnested.reference b/tests/queries/0_stateless/02366_kql_topnested.reference new file mode 100644 index 000000000000..6a3e4f0ae124 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_topnested.reference @@ -0,0 +1,108 @@ +-- top nested 1 layer-- +QC 125 +ON 140 +MA 145 +--top nested 2 layers-- +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 Steven 38 +MA 145 Robert 42 +--top nested 3 layers-- +QC 125 Steven 41 03/30/1996 23 +QC 125 Steven 41 03/29/1996 13 +QC 125 Joseph 33 03/30/1996 20 +QC 125 Joseph 33 03/29/1996 10 +ON 140 Steven 64 03/31/1996 30 +ON 140 Steven 64 03/30/1996 22 +ON 140 Joseph 34 03/30/1996 18 +ON 140 Joseph 34 03/29/1996 14 +MA 145 Steven 38 03/30/1996 24 +MA 145 Steven 38 03/29/1996 14 +MA 145 Robert 42 03/31/1996 25 +MA 145 Robert 42 03/30/1996 17 +--top nested 1 layer with others-- +all other region 55 +QC 125 +ON 140 +MA 145 +--top nested 2 layers with 2 others-- +all other region 55 all other person 55 +QC 125 all other person 51 +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 all other person 42 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 all other person 65 +MA 145 Steven 38 +MA 145 Robert 42 +--top nested 2 layers with 1st others-- +all other region 55 \N \N +QC 125 Steven 41 +QC 125 Joseph 33 +QC 125 \N \N +ON 140 Steven 64 +ON 140 Joseph 34 +ON 140 \N \N +MA 145 Steven 38 +MA 145 Robert 42 +MA 145 \N \N +--top nested 2 layer with 2nd others-- +QC 125 all other person 51 +QC 125 Steven 41 +QC 125 Joseph 33 +ON 140 all other person 42 +ON 140 Steven 64 +ON 140 Joseph 34 +MA 145 all other person 65 +MA 145 Steven 38 +MA 145 Robert 42 +\N \N all other person 55 +--top nested 3 layers with 3 others-- +all other region 55 all other person 55 all other date 55 +QC 125 all other person 51 all other date 51 +QC 125 Steven 41 all other date 5 +QC 125 Steven 41 03/30/1996 23 +QC 125 Steven 41 03/29/1996 13 +QC 125 Joseph 33 all other date 3 +QC 125 Joseph 33 03/30/1996 20 +QC 125 Joseph 33 03/29/1996 10 +ON 140 all other person 42 all other date 42 +ON 140 Steven 64 all other date 12 +ON 140 Steven 64 03/31/1996 30 +ON 140 Steven 64 03/30/1996 22 +ON 140 Joseph 34 all other date 2 +ON 140 Joseph 34 03/30/1996 18 +ON 140 Joseph 34 03/29/1996 14 +MA 145 all other person 65 all other date 65 +MA 145 Steven 38 all other date 0 +MA 145 Steven 38 03/30/1996 24 +MA 145 Steven 38 03/29/1996 14 +MA 145 Robert 42 all other date 0 +MA 145 Robert 42 03/31/1996 25 +MA 145 Robert 42 03/30/1996 17 +--top nested use expression as aggregation-- +QC 255 +ON 285 +MA 295 +--top nested use expression as top n-- +QC 125 +ON 140 +MA 145 +--top nested use expression as others-- +all other region 55 +QC 125 +ON 140 +MA 145 +--top nested use expression as column-- +Q 125 +O 140 +M 145 +B 55 +--top nested without top n-- +QC 125 +ON 140 +MA 145 +BC 55 diff --git a/tests/queries/0_stateless/02366_kql_topnested.sql b/tests/queries/0_stateless/02366_kql_topnested.sql new file mode 100644 index 000000000000..11b9d7e37338 --- /dev/null +++ b/tests/queries/0_stateless/02366_kql_topnested.sql @@ -0,0 +1,76 @@ +DROP TABLE IF EXISTS sales; +CREATE TABLE sales +(salesdate String,salesperson String,region String,amount UInt32) ENGINE = Memory; + +INSERT INTO sales VALUES ( '12/31/1995','Robert','ON',1); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','ON',2); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','QC',3); +INSERT INTO sales VALUES ( '12/31/1995','Joseph','MA',4); +INSERT INTO sales VALUES ( '12/31/1995','Steven','QC',5); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON',6); +INSERT INTO sales VALUES ( '03/29/1996','Robert','QC',7); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','ON',8); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','BC',9); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','QC',10); +INSERT INTO sales VALUES ( '03/29/1996','Joseph','MA',11); +INSERT INTO sales VALUES ( '03/29/1996','Steven','ON',12); +INSERT INTO sales VALUES ( '03/29/1996','Steven','QC',13); +INSERT INTO sales VALUES ( '03/29/1996','Steven','MA',14); +INSERT INTO sales VALUES ( '03/30/1996','Robert','ON',15); +INSERT INTO sales VALUES ( '03/30/1996','Robert','QC',16); +INSERT INTO sales VALUES ( '03/30/1996','Robert','MA',17); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','ON',18); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','BC',19); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','QC',20); +INSERT INTO sales VALUES ( '03/30/1996','Joseph','MA',21); +INSERT INTO sales VALUES ( '03/30/1996','Steven','ON',22); +INSERT INTO sales VALUES ( '03/30/1996','Steven','QC',23); +INSERT INTO sales VALUES ( '03/30/1996','Steven','MA',24); +INSERT INTO sales VALUES ( '03/31/1996','Robert','MA',25); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','ON',26); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','BC',27); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','QC',28); +INSERT INTO sales VALUES ( '03/31/1996','Thomas','MA',29); +INSERT INTO sales VALUES ( '03/31/1996','Steven','ON',30); + + +set dialect = 'kusto'; + +print '-- top nested 1 layer--'; +sales | top-nested 3 of region by sum(amount)|order by region; + +print '--top nested 2 layers--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount)|order by region, salesperson; + +print '--top nested 3 layers--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson by sum(amount), top-nested 2 of salesdate by sum(amount)|order by region, salesperson, salesdate; + +print '--top nested 1 layer with others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount)|order by region; + +print '--top nested 2 layers with 2 others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)|order by region, salesperson; + +print '--top nested 2 layers with 1st others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson by sum(amount)|order by region, salesperson; + +print '--top nested 2 layer with 2nd others--'; +sales | top-nested 3 of region by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount)|order by region, salesperson; + +print '--top nested 3 layers with 3 others--'; +sales | top-nested 3 of region with others = 'all other region' by sum(amount), top-nested 2 of salesperson with others = 'all other person' by sum(amount), top-nested 2 of salesdate with others = 'all other date' by sum(amount)|order by region, salesperson, salesdate; + +print '--top nested use expression as aggregation--'; +sales | top-nested 3 of region by sum(amount)*2 + 5|order by region; + +print '--top nested use expression as top n--'; +sales | top-nested strlen('abc') of region by sum(amount)|order by region; + +print '--top nested use expression as others--'; +sales | top-nested 3 of region with others = strcat("all other"," region") by sum(amount)|order by region; + +print '--top nested use expression as column--'; +sales | top-nested of substring(region,0,1) by sum(amount)|order by Column1; + +print '--top nested without top n--'; +sales | top-nested of region by sum(amount)|order by region; \ No newline at end of file diff --git a/tests/queries/0_stateless/02455_dateTime64Diff.reference b/tests/queries/0_stateless/02455_dateTime64Diff.reference new file mode 100644 index 000000000000..db9adebf1b3a --- /dev/null +++ b/tests/queries/0_stateless/02455_dateTime64Diff.reference @@ -0,0 +1,5 @@ +-- dateTime64Diff -- +28200397123456789 +-28200397123456789 +-- DateTime64 arithmetic -- +28200397123456789 diff --git a/tests/queries/0_stateless/02455_dateTime64Diff.sql b/tests/queries/0_stateless/02455_dateTime64Diff.sql new file mode 100644 index 000000000000..06e13df465f1 --- /dev/null +++ b/tests/queries/0_stateless/02455_dateTime64Diff.sql @@ -0,0 +1,11 @@ +-- dateTime64Diff +select '-- dateTime64Diff --'; +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDateTime64('2022-01-01', 0)); +select dateTime64Diff(toDateTime64('2022-01-01', 0), toDateTime64('2022-11-23 09:26:37.123456789', 9)); +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDate('2022-01-01')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDate32('2022-01-01')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select dateTime64Diff(toDateTime64('2022-11-23 09:26:37.123456789', 9), toDateTime('2022-01-01 01:02:03')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- DateTime64 - DateTime64 +select '-- DateTime64 arithmetic --'; +select toDateTime64('2022-11-23 09:26:37.123456789', 9) - toDateTime64('2022-01-01', 0); diff --git a/tests/queries/0_stateless/02455_interval.reference b/tests/queries/0_stateless/02455_interval.reference new file mode 100644 index 000000000000..83c880e8aca9 --- /dev/null +++ b/tests/queries/0_stateless/02455_interval.reference @@ -0,0 +1,21 @@ +-- Interval -- +1000 IntervalNanosecond +-- Unary Operations -- +-1000 IntervalNanosecond +-1 Int8 +-- Binary Operations -- +-1000 Nullable(IntervalNanosecond) +3000 Nullable(IntervalNanosecond) +2.5 Nullable(Float64) +500 Nullable(IntervalNanosecond) +\N +inf +2500 IntervalNanosecond +2 Nullable(Int64) +0 Nullable(Int64) +1000 IntervalNanosecond +0 Nullable(IntervalNanosecond) +-- Conversion -- +1000 +1000 +1000 diff --git a/tests/queries/0_stateless/02455_interval.sql b/tests/queries/0_stateless/02455_interval.sql new file mode 100644 index 000000000000..9cc110106417 --- /dev/null +++ b/tests/queries/0_stateless/02455_interval.sql @@ -0,0 +1,51 @@ +select '-- Interval --'; +select toIntervalNanosecond(1000) as i, toTypeName(i); + +select '-- Unary Operations --'; +select -toIntervalNanosecond(1000) as i, toTypeName(i); + +select sign(toIntervalNanosecond(-1000)) as i, toTypeName(i); + +select abs(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitCount(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitNot(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitSwapLastTwo(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitWrapperFunc(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select intExp2(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select intExp10(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundAge(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundDuration(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select roundToExp2(toIntervalNanosecond(-1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Binary Operations --'; +select (toIntervalNanosecond(1000) - toIntervalNanosecond(2000)) as i, toTypeName(i); +select (toIntervalNanosecond(1000) + toIntervalNanosecond(2000)) as i, toTypeName(i); +select (toIntervalNanosecond(2500) / toIntervalNanosecond(1000)) as i, toTypeName(i); +select (toIntervalNanosecond(2500) % toIntervalNanosecond(1000)) as i, toTypeName(i); +select toIntervalNanosecond(1000) / 0; +select toIntervalNanosecond(1000) / toIntervalNanosecond(0); + +select (toIntervalNanosecond(2500) * toIntervalNanosecond(1000)) as i, toTypeName(i); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select greatest(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select intDiv(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select intDivOrZero(toIntervalNanosecond(2500), toIntervalNanosecond(0)) as i, toTypeName(i); +select least(toIntervalNanosecond(2500), toIntervalNanosecond(1000)) as i, toTypeName(i); +select moduloOrZero(toIntervalNanosecond(2500), toIntervalNanosecond(0)) as i, toTypeName(i); + +select bitAnd(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitBoolMaskAnd(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select __bitBoolMaskOr(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitHammingDistance(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitOr(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitRotateLeft(toIntervalNanosecond(1000), 1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitRotateRight(toIntervalNanosecond(1000), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitShiftLeft(toIntervalNanosecond(1000), 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitShiftRight(toIntervalNanosecond(1000), 4); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitTest(toIntervalNanosecond(1000), 5); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select bitXor(toIntervalNanosecond(1000), toIntervalNanosecond(1000)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select '-- Conversion --'; +select toFloat64(toIntervalNanosecond(1000)); +select toInt64(toIntervalNanosecond(1000)); +select toString(toIntervalNanosecond(1000)); diff --git a/tests/queries/0_stateless/02531_ipv4_arithmetic.reference b/tests/queries/0_stateless/02531_ipv4_arithmetic.reference index 6f03e4e6903e..28d6f76e9e9d 100644 --- a/tests/queries/0_stateless/02531_ipv4_arithmetic.reference +++ b/tests/queries/0_stateless/02531_ipv4_arithmetic.reference @@ -1,3 +1,5 @@ +-- { echoOn } +SELECT number, ip, ip % number FROM (SELECT number, toIPv4('1.2.3.4') as ip FROM numbers(10, 20)); 10 1.2.3.4 0 11 1.2.3.4 3 12 1.2.3.4 4 @@ -18,3 +20,24 @@ 27 1.2.3.4 13 28 1.2.3.4 0 29 1.2.3.4 1 +SELECT number, ip, number % ip FROM (SELECT number, toIPv4OrNull('0.0.0.3') as ip FROM numbers(10, 20)); +10 0.0.0.3 1 +11 0.0.0.3 2 +12 0.0.0.3 0 +13 0.0.0.3 1 +14 0.0.0.3 2 +15 0.0.0.3 0 +16 0.0.0.3 1 +17 0.0.0.3 2 +18 0.0.0.3 0 +19 0.0.0.3 1 +20 0.0.0.3 2 +21 0.0.0.3 0 +22 0.0.0.3 1 +23 0.0.0.3 2 +24 0.0.0.3 0 +25 0.0.0.3 1 +26 0.0.0.3 2 +27 0.0.0.3 0 +28 0.0.0.3 1 +29 0.0.0.3 2 diff --git a/tests/queries/0_stateless/02531_ipv4_arithmetic.sql b/tests/queries/0_stateless/02531_ipv4_arithmetic.sql index 59a99842d61a..88c8cf936dde 100644 --- a/tests/queries/0_stateless/02531_ipv4_arithmetic.sql +++ b/tests/queries/0_stateless/02531_ipv4_arithmetic.sql @@ -1 +1,4 @@ -SELECT number, ip, ip % number FROM (SELECT number, toIPv4('1.2.3.4') as ip FROM numbers(10, 20)); \ No newline at end of file +-- { echoOn } +SELECT number, ip, ip % number FROM (SELECT number, toIPv4('1.2.3.4') as ip FROM numbers(10, 20)); +SELECT number, ip, number % ip FROM (SELECT number, toIPv4OrNull('0.0.0.3') as ip FROM numbers(10, 20)); + diff --git a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh index a43cd6deb9e2..333bc1bc25d5 100755 --- a/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh +++ b/tests/queries/0_stateless/02808_custom_disk_with_user_defined_name.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_s3_disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +SETTINGS disk = disk(name = 's3_disk', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ 2>&1 | grep -q "Disk with name \`s3_disk\` already exist" && echo 'OK' || echo 'FAIL' disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" @@ -25,7 +25,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); """ $CLICKHOUSE_CLIENT -nm --query """ diff --git a/tests/queries/0_stateless/02811_ip_dict_attribute.reference b/tests/queries/0_stateless/02811_ip_dict_attribute.reference new file mode 100644 index 000000000000..a36cf2e3d8aa --- /dev/null +++ b/tests/queries/0_stateless/02811_ip_dict_attribute.reference @@ -0,0 +1,2 @@ +('::2.2.2.2','2.2.2.2') +('::1.1.1.1','1.1.1.1') diff --git a/tests/queries/0_stateless/02811_ip_dict_attribute.sql b/tests/queries/0_stateless/02811_ip_dict_attribute.sql new file mode 100644 index 000000000000..0ffff6e4a530 --- /dev/null +++ b/tests/queries/0_stateless/02811_ip_dict_attribute.sql @@ -0,0 +1,13 @@ +CREATE TABLE src ( id UInt64, ip4 IPv4, ip6 IPv6 ) Engine=Memory AS + SELECT * FROM VALUES( (1, '1.1.1.1', '::1.1.1.1'), (2, '2.2.2.2', '::2.2.2.2') ); + +CREATE DICTIONARY dict ( id UInt64, ip4 IPv4, ip6 IPv6 ) + PRIMARY KEY id + LAYOUT(HASHED()) + SOURCE (CLICKHOUSE ( table src)) + lifetime ( 10); + +SELECT dictGet('dict', ('ip6', 'ip4'), arrayJoin([2,1])); + +DROP DICTIONARY dict; +DROP TABLE src; diff --git a/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference new file mode 100644 index 000000000000..85573e2ed496 --- /dev/null +++ b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.reference @@ -0,0 +1,4 @@ +(0,0) +(0,0) +(0,1) +(0,NULL) diff --git a/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql new file mode 100644 index 000000000000..776dd35ddba6 --- /dev/null +++ b/tests/queries/0_stateless/02833_sparse_columns_tuple_function.sql @@ -0,0 +1,14 @@ +drop table if exists t_tuple_sparse; + +create table t_tuple_sparse (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.0; + +insert into t_tuple_sparse values (0, 0); + +select (a, b) from t_tuple_sparse; +select (a, 0) from t_tuple_sparse; +select (a, 1) from t_tuple_sparse; +select (a, NULL) from t_tuple_sparse; + +drop table if exists t_tuple_sparse; diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.reference b/tests/queries/0_stateless/02833_url_without_path_encoding.reference new file mode 100644 index 000000000000..d5626230d716 --- /dev/null +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.reference @@ -0,0 +1,2 @@ +4 +test%2Fa.tsv diff --git a/tests/queries/0_stateless/02833_url_without_path_encoding.sh b/tests/queries/0_stateless/02833_url_without_path_encoding.sh new file mode 100755 index 000000000000..b71586099cf8 --- /dev/null +++ b/tests/queries/0_stateless/02833_url_without_path_encoding.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=1" + +# Grep 'test%2Fa.tsv' to ensure that path wasn't encoded/decoded +$CLICKHOUSE_CLIENT -q "select count() from url('http://localhost:11111/test%2Fa.tsv') settings enable_url_encoding=0" 2>&1 | grep -o "test%2Fa.tsv" -m1 + diff --git a/tests/queries/0_stateless/uuid b/tests/queries/0_stateless/uuid new file mode 100644 index 000000000000..8b23e066a9ca --- /dev/null +++ b/tests/queries/0_stateless/uuid @@ -0,0 +1 @@ +896c3cde-455c-4bf1-94d5-31f12f21de9c \ No newline at end of file diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index 27e08de80eee..48abdbafcb82 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -32,3 +32,5 @@ nam ubuntu toolchain vie +Iif +iif