From b75274f24c3e813877fc32b8369cdad6aa8b5ded Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 7 Oct 2025 15:38:00 +0100 Subject: [PATCH 01/18] GH-47705: [R][CI] Migrate rhub debian-gcc-release to equivalent supported image (#47730) ### Rationale for this change Old image fails due to debian update ### What changes are included in this PR? Use newer image ### Are these changes tested? Will submit crossbow run ### Are there any user-facing changes? No * GitHub Issue: #47705 Authored-by: Nic Crane Signed-off-by: Nic Crane --- dev/tasks/tasks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 11831e646c3..9d225e3b702 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -733,12 +733,12 @@ tasks: ci: github template: r/github.linux.offline.build.yml - test-r-rhub-debian-gcc-release-custom-ccache: + test-r-rhub-ubuntu-gcc12-custom-ccache: ci: azure template: r/azure.linux.yml params: r_org: rhub - r_image: debian-gcc-release + r_image: ubuntu-gcc12 r_tag: latest r_custom_ccache: true From d57e4d46ecf7ccc2e8ec1fb613ba38b453c87c95 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 8 Oct 2025 00:43:29 +0900 Subject: [PATCH 02/18] GH-47704: [R] Update paths in nightly libarrow upload job (#47727) ### Rationale for this change #45964 changed paths of pre-built Apache Arrow C++ binaries for R. But we forgot to update the nightly upload job. ### What changes are included in this PR? Update paths in the nightly upload job. ### Are these changes tested? No... ### Are there any user-facing changes? Yes. * GitHub Issue: #47704 Authored-by: Sutou Kouhei Signed-off-by: Nic Crane --- .github/workflows/r_nightly.yml | 408 +++++++++++++++++--------------- 1 file changed, 216 insertions(+), 192 deletions(-) diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index 7c52672c1cd..4e12fce9545 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -1,193 +1,217 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Upload R Nightly builds -# This workflow downloads the (nightly) binaries created in crossbow and uploads them -# to nightlies.apache.org. Due to authorization requirements, this upload can't be done -# from the crossbow repository. - -on: - workflow_dispatch: - inputs: - prefix: - description: Job prefix to use. - required: false - default: '' - keep: - description: Number of versions to keep. - required: false - default: 14 - - schedule: - #Crossbow packaging runs at 0 8 * * * - - cron: '0 14 * * *' - -permissions: - contents: read - -jobs: - upload: - if: github.repository == 'apache/arrow' - runs-on: ubuntu-latest - steps: - - name: Checkout Arrow - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - fetch-depth: 1 - path: arrow - repository: apache/arrow - ref: main - submodules: recursive - - name: Checkout Crossbow - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - fetch-depth: 0 - path: crossbow - repository: ursacomputing/crossbow - ref: main - - name: Set up Python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Upload R Nightly builds +# This workflow downloads the (nightly) binaries created in crossbow and uploads them +# to nightlies.apache.org. Due to authorization requirements, this upload can't be done +# from the crossbow repository. + +on: + workflow_dispatch: + inputs: + prefix: + description: Job prefix to use. + required: false + default: '' + keep: + description: Number of versions to keep. + required: false + default: 14 + + schedule: + #Crossbow packaging runs at 0 8 * * * + - cron: '0 14 * * *' + +permissions: + contents: read + +jobs: + upload: + if: github.repository == 'apache/arrow' + runs-on: ubuntu-latest + steps: + - name: Checkout Arrow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 1 + path: arrow + repository: apache/arrow + ref: main + submodules: recursive + - name: Checkout Crossbow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + path: crossbow + repository: ursacomputing/crossbow + ref: main + - name: Set up Python uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 - with: - cache: 'pip' - python-version: 3.12 - - name: Install Archery - shell: bash - run: pip install -e arrow/dev/archery[all] - - run: mkdir -p binaries - - name: Download Artifacts - env: - PREFIX: ${{ github.event.inputs.prefix || ''}} - run: | - if [ -z $PREFIX ]; then - PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0 - fi - echo $PREFIX - - archery crossbow download-artifacts -f r-binary-packages -t binaries $PREFIX - - if [ -n "$(ls -A binaries/*/*/)" ]; then - echo "Found files!" - else - echo "No files found. Stopping upload." - exit 1 - fi - - name: Cache Repo - uses: actions/cache@v4 - with: - path: repo - key: r-nightly-${{ github.run_id }} - restore-keys: r-nightly- - - name: Sync from Remote - uses: ./arrow/.github/actions/sync-nightlies - with: - switches: -avzh --update --delete --progress - local_path: repo - remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r - remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} - remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} - remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} - remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} - remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} - - run: tree repo - - uses: r-lib/actions/setup-r@v2 - - name: Build Repository - shell: Rscript {0} - run: | - # folder that we sync to nightlies.apache.org - repo_root <- "repo" - # The binaries are in a nested dir - # so we need to find the correct path. - art_path <- list.files("binaries", - recursive = TRUE, - include.dirs = TRUE, - pattern = "r-binary-packages$", - full.names = TRUE - ) - - current_path <- list.files(art_path, full.names = TRUE, recursive = TRUE) - files <- sub("r-(pkg|lib)", repo_root, current_path) - - # decode contrib.url from artifact name: - # bin__windows__contrib__4.1 -> bin/windows/contrib/4.1 - new_paths <- gsub("__", "/", files) - # strip superfluous nested dirs - new_paths <- sub(art_path, ".", new_paths) - dirs <- dirname(new_paths) - sapply(dirs, dir.create, recursive = TRUE, showWarnings = FALSE) - - # overwrite allows us to "force push" a new version with the same name - copy_result <- file.copy(current_path, new_paths, overwrite = TRUE) - - if (!all(copy_result)) { - stop("There was an issue while copying the files!") - } - - name: Prune Repository - shell: bash - env: - KEEP: ${{ github.event.inputs.keep || 14 }} - run: | - prune() { - # list files | retain $KEEP newest files | delete everything else - ls -t $1/arrow* | tail -n +$((KEEP + 1)) | xargs --no-run-if-empty rm - } - - # find leaf sub dirs - repo_dirs=$(find repo -type d -links 2) - - # We want to retain $keep (14) versions of each pkg/lib so we call - # prune on each leaf dir and not on repo/. - for dir in ${repo_dirs[@]}; do - prune $dir - done - - name: Update Repository Index - shell: Rscript {0} - run: | - # folder that we sync to nightlies.apache.org - repo_root <- "repo" - tools::write_PACKAGES(file.path(repo_root, "src/contrib"), - type = "source", - verbose = TRUE, - latestOnly = FALSE - ) - - repo_dirs <- list.dirs(repo_root) - # find dirs with binary R packages: e.g. */contrib/4.1 - pkg_dirs <- grep(".+contrib\\/\\d.+", repo_dirs, value = TRUE) - - - for (dir in pkg_dirs) { - on_win <- grepl("windows", dir) - tools::write_PACKAGES(dir, - type = ifelse(on_win, "win.binary", "mac.binary"), - verbose = TRUE, - latestOnly = FALSE - ) - } - - name: Show repo contents - run: tree repo - - name: Sync to Remote - uses: ./arrow/.github/actions/sync-nightlies - with: - upload: true - switches: -avzh --update --delete --progress - local_path: repo - remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r - remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} - remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} - remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} - remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} - remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} + with: + cache: 'pip' + python-version: 3.12 + - name: Install Archery + shell: bash + run: pip install -e arrow/dev/archery[all] + - run: mkdir -p binaries + - name: Download Artifacts + env: + PREFIX: ${{ github.event.inputs.prefix || ''}} + run: | + if [ -z $PREFIX ]; then + PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0 + fi + echo $PREFIX + + archery crossbow download-artifacts -f r-binary-packages -t binaries $PREFIX + + if [ -n "$(ls -A binaries/*/*/)" ]; then + echo "Found files!" + else + echo "No files found. Stopping upload." + exit 1 + fi + - name: Cache Repo + uses: actions/cache@v4 + with: + path: repo + key: r-nightly-${{ github.run_id }} + restore-keys: r-nightly- + - name: Sync from Remote + uses: ./arrow/.github/actions/sync-nightlies + with: + switches: -avzh --update --delete --progress + local_path: repo + remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r + remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} + remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} + remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} + remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} + remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} + - run: tree repo + - uses: r-lib/actions/setup-r@v2 + - name: Build Repository + shell: Rscript {0} + run: | + # folder that we sync to nightlies.apache.org + repo_root <- "repo" + # The binaries are in a nested dir + # so we need to find the correct path. + art_path <- list.files("binaries", + recursive = TRUE, + include.dirs = TRUE, + pattern = "r-binary-packages$", + full.names = TRUE + ) + + current_pkg_path <- list.files(art_path, + full.names = TRUE, + pattern = "r-pkg", + recursive = TRUE + ) + current_lib_path <- list.files(art_path, + full.names = TRUE, + pattern = "r-lib", + recursive = TRUE + ) + files <- c( + sub("r-pkg", repo_root, current_pkg_path), + sub("r-lib", paste0(repo_root, "__r-lib"), current_lib_path), + ) + + # decode contrib.url from artifact name: + # bin__windows__contrib__4.1 -> bin/windows/contrib/4.1 + new_paths <- gsub("__", "/", files) + # strip superfluous nested dirs + new_paths <- sub(art_path, ".", new_paths) + dirs <- dirname(new_paths) + sapply(dirs, dir.create, recursive = TRUE, showWarnings = FALSE) + + # overwrite allows us to "force push" a new version with the same name + copy_result <- file.copy(current_path, new_paths, overwrite = TRUE) + + if (!all(copy_result)) { + stop("There was an issue while copying the files!") + } + - name: Prune Repository + shell: bash + env: + KEEP: ${{ github.event.inputs.keep || 14 }} + run: | + prune() { + # list files | retain $KEEP newest files | delete everything else + ls -t "$@" | tail -n +$((KEEP + 1)) | xargs --no-run-if-empty rm + } + + # find leaf sub dirs + repo_dirs=$(find repo -type d -links 2) + + # Old packages: repo/libarrow/bin/${TARGET}/arrow-${VERSION}.zip + # + # We want to retain $keep (14) versions of each pkg/lib so we call + # prune on each leaf dir and not on repo/. + for dir in "${repo_dirs[@]}"; do + prune $dir/arrow* + done + + # New packages: repo/libarrow/${TARGET}-arrow-${VERSION}.zip + prune repo/libarrow/r-libarrow-darwin-arm64-openssl-1.1-* || : + prune repo/libarrow/r-libarrow-darwin-arm64-openssl-3.0-* || : + prune repo/libarrow/r-libarrow-darwin-x86_64-openssl-1.1-* || : + prune repo/libarrow/r-libarrow-darwin-x86_64-openssl-3.0-* || : + prune repo/libarrow/r-libarrow-linux-x86_64-openssl-1.0-* || : + prune repo/libarrow/r-libarrow-linux-x86_64-openssl-1.1-* || : + prune repo/libarrow/r-libarrow-linux-x86_64-openssl-3.0-* || : + prune repo/libarrow/r-libarrow-windows-x86_64-* || : + - name: Update Repository Index + shell: Rscript {0} + run: | + # folder that we sync to nightlies.apache.org + repo_root <- "repo" + tools::write_PACKAGES(file.path(repo_root, "src/contrib"), + type = "source", + verbose = TRUE, + latestOnly = FALSE + ) + + repo_dirs <- list.dirs(repo_root) + # find dirs with binary R packages: e.g. */contrib/4.1 + pkg_dirs <- grep(".+contrib\\/\\d.+", repo_dirs, value = TRUE) + + + for (dir in pkg_dirs) { + on_win <- grepl("windows", dir) + tools::write_PACKAGES(dir, + type = ifelse(on_win, "win.binary", "mac.binary"), + verbose = TRUE, + latestOnly = FALSE + ) + } + - name: Show repo contents + run: tree repo + - name: Sync to Remote + uses: ./arrow/.github/actions/sync-nightlies + with: + upload: true + switches: -avzh --update --delete --progress + local_path: repo + remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r + remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} + remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} + remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} + remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} + remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} From 314348a88328fa7ac10c694906ca33f3389656df Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 7 Oct 2025 22:18:01 +0200 Subject: [PATCH 03/18] GH-47742: [C++][CI] Silence Valgrind leak on protobuf initialization (#47743) ### Rationale for this change Valgrind would report memory leaks induced by protobuf initialization on library load, for example: ``` ==14628== 414 bytes in 16 blocks are possibly lost in loss record 22 of 26 ==14628== at 0x4914EFF: operator new(unsigned long) (vg_replace_malloc.c:487) ==14628== by 0x8D0B6CA: void std::__cxx11::basic_string, std::allocator >::_M_construct(char*, char*, std::forward_iterator_tag) [clone .isra.0] (in /opt/conda/envs/arrow/lib/libprotobuf.so.25.3.0) ==14628== by 0x8D33E62: google::protobuf::DescriptorPool::Tables::Tables() (in /opt/conda/envs/arrow/lib/libprotobuf.so.25.3.0) ==14628== by 0x8D340E2: google::protobuf::DescriptorPool::DescriptorPool(google::protobuf::DescriptorDatabase*, google::protobuf::DescriptorPool::ErrorCollector*) (in /opt/conda/envs/arrow/lib/libprotobuf.so.25.3.0) ==14628== by 0x8D341A2: google::protobuf::DescriptorPool::internal_generated_pool() (in /opt/conda/envs/arrow/lib/libprotobuf.so.25.3.0) ==14628== by 0x8D34277: google::protobuf::DescriptorPool::InternalAddGeneratedFile(void const*, int) (in /opt/conda/envs/arrow/lib/libprotobuf.so.25.3.0) ==14628== by 0x8D9C56F: google::protobuf::internal::AddDescriptorsRunner::AddDescriptorsRunner(google::protobuf::internal::DescriptorTable const*) (in /opt/conda/envs/arrow/lib/libprotobuf.so.25.3.0) ==14628== by 0x40D147D: call_init.part.0 (dl-init.c:70) ==14628== by 0x40D1567: call_init (dl-init.c:33) ==14628== by 0x40D1567: _dl_init (dl-init.c:117) ==14628== by 0x40EB2C9: ??? (in /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2) ``` This was triggered by the `libprotobuf` upgrade on conda-forge from 3.21.12 to 4.25.3. ### What changes are included in this PR? Add a Valgrind suppression for these leak reports, as there is probably not much we can do about them. ### Are these changes tested? Yes, by existing CI test. ### Are there any user-facing changes? No. * GitHub Issue: #47742 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/valgrind.supp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/valgrind.supp b/cpp/valgrind.supp index 2289e819e3d..6a2ad3f6b3b 100644 --- a/cpp/valgrind.supp +++ b/cpp/valgrind.supp @@ -82,3 +82,15 @@ fun:curl_share_init fun:*Azure*CurlConnection* } +{ + :leak on library initialization + Memcheck:Leak + ... + fun:*google*protobuf*AddDescriptorsRunner* +} +{ + :leak on library initialization + Memcheck:Leak + ... + fun:*google*protobuf*InternalRegisterGeneratedFile* +} From 7b4dd0511162ca985365fbde9e633ca33de57215 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 Oct 2025 08:43:12 +0200 Subject: [PATCH 04/18] GH-47740: [C++][Parquet] Fix undefined behavior when reading invalid Parquet data (#47741) ### Rationale for this change Fix issues found by OSS-Fuzz when invalid Parquet data is fed to the Parquet reader: * https://issues.oss-fuzz.com/issues/447262173 * https://issues.oss-fuzz.com/issues/447480433 * https://issues.oss-fuzz.com/issues/447490896 * https://issues.oss-fuzz.com/issues/447693724 * https://issues.oss-fuzz.com/issues/447693728 * https://issues.oss-fuzz.com/issues/449498800 ### Are these changes tested? Yes, using the updated fuzz regression files from https://github.com/apache/arrow-testing/pull/115 ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** (If the changes fix either (a) a security vulnerability, (b) a bug that caused incorrect or invalid data to be produced, or (c) a bug that causes a crash (even when the API contract is upheld), please provide explanation. If not, you can remove this.) * GitHub Issue: #47740 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/CMakePresets.json | 3 ++- cpp/src/arrow/util/rle_encoding_internal.h | 16 ++++++++++------ cpp/src/parquet/decoder.cc | 7 +++++-- testing | 2 +- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index c9e2444389f..0c3f85d0917 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -444,7 +444,8 @@ "CMAKE_CXX_COMPILER": "clang++", "ARROW_IPC": "ON", "ARROW_PARQUET": "ON", - "ARROW_FUZZING": "ON" + "ARROW_FUZZING": "ON", + "ARROW_WITH_SNAPPY": "ON" } }, { diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index c231c9a63eb..a7917483bbc 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -657,13 +657,14 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const const auto header_bytes = bit_util::ParseLeadingLEB128(data_, kMaxSize, &run_len_type); if (ARROW_PREDICT_FALSE(header_bytes == 0)) { - // Malfomrmed LEB128 data + // Malformed LEB128 data return {0, ControlFlow::Break}; } const bool is_bit_packed = run_len_type & 1; const uint32_t count = run_len_type >> 1; if (is_bit_packed) { + // Bit-packed run constexpr auto kMaxCount = bit_util::CeilDiv(internal::max_size_for_v, 8); if (ARROW_PREDICT_FALSE(count == 0 || count > kMaxCount)) { // Illegal number of encoded values @@ -672,17 +673,21 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const ARROW_DCHECK_LT(static_cast(count) * 8, internal::max_size_for_v); + // Count Already divided by 8 for byte size calculations + const auto bytes_read = header_bytes + static_cast(count) * value_bit_width_; + if (ARROW_PREDICT_FALSE(bytes_read > data_size_)) { + // Bit-packed run would overflow data buffer + return {0, ControlFlow::Break}; + } const auto values_count = static_cast(count * 8); - // Count Already divided by 8 - const auto bytes_read = - header_bytes + static_cast(count) * value_bit_width_; auto control = handler.OnBitPackedRun( BitPackedRun(data_ + header_bytes, values_count, value_bit_width_)); - return {bytes_read, control}; + return {static_cast(bytes_read), control}; } + // RLE run if (ARROW_PREDICT_FALSE(count == 0)) { // Illegal number of encoded values return {0, ControlFlow::Break}; @@ -1079,7 +1084,6 @@ auto RleBitPackedDecoder::GetSpaced(Converter converter, // There may be remaining null if they are not greedily filled by either decoder calls check_and_handle_fully_null_remaining(); - ARROW_DCHECK(batch.is_done() || exhausted()); return batch.total_read(); } diff --git a/cpp/src/parquet/decoder.cc b/cpp/src/parquet/decoder.cc index 46d1c201e92..b6d79665621 100644 --- a/cpp/src/parquet/decoder.cc +++ b/cpp/src/parquet/decoder.cc @@ -2082,9 +2082,12 @@ class DeltaByteArrayDecoderImpl : public TypedDecoderImpl { int64_t valid_bits_offset, typename EncodingTraits::Accumulator* out, int* out_num_values) { - std::vector values(num_values); + std::vector values(num_values - null_count); const int num_valid_values = GetInternal(values.data(), num_values - null_count); - DCHECK_EQ(num_values - null_count, num_valid_values); + if (ARROW_PREDICT_FALSE(num_values - null_count != num_valid_values)) { + throw ParquetException("Expected to decode ", num_values - null_count, + " values, but decoded ", num_valid_values, " values."); + } auto visit_binary_helper = [&](auto* helper) { auto values_ptr = reinterpret_cast(values.data()); diff --git a/testing b/testing index 6a7b02fac93..abf6d7ebde7 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit 6a7b02fac93d8addbcdbb213264e58bfdc3068e4 +Subproject commit abf6d7ebde7ab70b541c51859dad2bef71a0151e From 9beb0e0de31c3644c00463871203515e3269c172 Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Thu, 9 Oct 2025 16:00:19 +0800 Subject: [PATCH 05/18] GH-47229: [C++][Arm] Force mimalloc to generate armv8.0 binary (#47766) ### Rationale for this change Mimalloc default generates LSE atomic instructions only work on armv8.1. This causes illegal instruction on armv8.0 platforms like Raspberry4. This PR sets mimalloc build flag -DMI_NO_OPT_ARCH=ON to disable LSE instruction. Please note even with flag set, compiler and libc will replace the atmoic call with an ifunc that matches hardware best at runtime. That means LSE is used only if the running platform supports it. ### What changes are included in this PR? Force mimalloc build flag -DMI_NO_OPT_ARCH=ON. ### Are these changes tested? Manually tested. ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** Fixes crashes on Armv8.0 platform. * GitHub Issue: #47229 Lead-authored-by: Yibo Cai Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5b8b039357e..b815fb7f356 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2294,7 +2294,9 @@ if(ARROW_MIMALLOC) -DMI_LOCAL_DYNAMIC_TLS=ON -DMI_BUILD_OBJECT=OFF -DMI_BUILD_SHARED=OFF - -DMI_BUILD_TESTS=OFF) + -DMI_BUILD_TESTS=OFF + # GH-47229: Force mimalloc to generate armv8.0 binary + -DMI_NO_OPT_ARCH=ON) externalproject_add(mimalloc_ep ${EP_COMMON_OPTIONS} From 0bd7faa476003d0176b514824829664670e74b86 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 9 Oct 2025 11:47:04 +0200 Subject: [PATCH 06/18] GH-47588: [C++] Bump mimalloc version to 3.1.5 (#47589) ### Rationale for this change According to https://github.com/microsoft/mimalloc/issues/1073 , mimalloc v3 is preferred over v2 for production usage. There are reports of higher than expected memory consumption with mimalloc 2.2.x, notably when reading Parquet data (example: GH-47266). ### What changes are included in this PR? Bump to mimalloc 3.1.5, which is the latest mimalloc 3.1.x release as of this writing. ### Are these changes tested? Yes, by existing tests and CI. ### Are there any user-facing changes? Hopefully not, besides a potential reduction in memory usage due to improvements in mimalloc v3. * GitHub Issue: #47588 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- ci/conda_env_cpp.txt | 1 + ci/docker/alpine-linux-3.22-cpp.dockerfile | 1 + ci/docker/debian-12-cpp.dockerfile | 1 + ci/docker/debian-13-cpp.dockerfile | 1 + ci/docker/debian-experimental-cpp.dockerfile | 1 + ci/docker/fedora-42-cpp.dockerfile | 1 + ci/docker/ubuntu-22.04-cpp-minimal.dockerfile | 1 + ci/docker/ubuntu-22.04-cpp.dockerfile | 1 + ci/docker/ubuntu-24.04-cpp-minimal.dockerfile | 1 + ci/docker/ubuntu-24.04-cpp.dockerfile | 1 + ci/scripts/msys2_setup.sh | 1 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 13 ++++++-- cpp/cmake_modules/mimalloc-1138.patch | 33 +++++++++++++++++++ cpp/thirdparty/versions.txt | 4 +-- 14 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 cpp/cmake_modules/mimalloc-1138.patch diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 6e23e920a40..52b4cdbba81 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -41,6 +41,7 @@ meson ninja nodejs orc<2.1.0 +patch pkg-config python rapidjson diff --git a/ci/docker/alpine-linux-3.22-cpp.dockerfile b/ci/docker/alpine-linux-3.22-cpp.dockerfile index f03e1913e27..48907e61a4a 100644 --- a/ci/docker/alpine-linux-3.22-cpp.dockerfile +++ b/ci/docker/alpine-linux-3.22-cpp.dockerfile @@ -48,6 +48,7 @@ RUN apk add \ musl-locales \ nlohmann-json \ openssl-dev \ + patch \ perl \ pkgconfig \ protobuf-dev \ diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index 15716151fce..44c845bb17e 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -85,6 +85,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/debian-13-cpp.dockerfile b/ci/docker/debian-13-cpp.dockerfile index 3e5c645c81a..ca96b4177ff 100644 --- a/ci/docker/debian-13-cpp.dockerfile +++ b/ci/docker/debian-13-cpp.dockerfile @@ -87,6 +87,7 @@ RUN apt-get update -y -q && \ nlohmann-json3-dev \ npm \ opentelemetry-cpp-dev \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/debian-experimental-cpp.dockerfile b/ci/docker/debian-experimental-cpp.dockerfile index 2721b1d5f20..743f5ddd3be 100644 --- a/ci/docker/debian-experimental-cpp.dockerfile +++ b/ci/docker/debian-experimental-cpp.dockerfile @@ -79,6 +79,7 @@ RUN if [ -n "${gcc}" ]; then \ nlohmann-json3-dev \ npm \ opentelemetry-cpp-dev \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/fedora-42-cpp.dockerfile b/ci/docker/fedora-42-cpp.dockerfile index 82e3fa9f7aa..cabb066fec3 100644 --- a/ci/docker/fedora-42-cpp.dockerfile +++ b/ci/docker/fedora-42-cpp.dockerfile @@ -53,6 +53,7 @@ RUN dnf update -y && \ make \ ninja-build \ openssl-devel \ + patch \ protobuf-devel \ python \ python-devel \ diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index 2a90a5637d4..d38dd418e29 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \ git \ libssl-dev \ libcurl4-openssl-dev \ + patch \ python3-pip \ python3-venv \ tzdata \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 44c1a452c17..88a27efe335 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -107,6 +107,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile index a1fd178a2c7..5e114d5dcd9 100644 --- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \ git \ libssl-dev \ libcurl4-openssl-dev \ + patch \ python3-pip \ python3-venv \ tzdata \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 6bc49a4c842..0347d452d7b 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -108,6 +108,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index 0ce9343a7f7..b4634070a87 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -51,6 +51,7 @@ case "${target}" in packages+=("${MINGW_PACKAGE_PREFIX}-xsimd") packages+=("${MINGW_PACKAGE_PREFIX}-uriparser") packages+=("${MINGW_PACKAGE_PREFIX}-zstd") + packages+=("patch") if [ "${target}" != "ruby" ]; then # We don't update the exiting packages for Ruby because diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b815fb7f356..7b8cef5fb5e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2275,9 +2275,9 @@ if(ARROW_MIMALLOC) endif() set(MIMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/mimalloc_ep/src/mimalloc_ep") - set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/include/mimalloc-2.2") + set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/include") set(MIMALLOC_STATIC_LIB - "${MIMALLOC_PREFIX}/lib/mimalloc-2.2/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${MIMALLOC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" ) set(MIMALLOC_C_FLAGS ${EP_C_FLAGS}) @@ -2286,10 +2286,18 @@ if(ARROW_MIMALLOC) set(MIMALLOC_C_FLAGS "${MIMALLOC_C_FLAGS} -DERROR_COMMITMENT_MINIMUM=635") endif() + set(MIMALLOC_PATCH_COMMAND "") + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + find_program(PATCH patch REQUIRED) + set(MIMALLOC_PATCH_COMMAND ${PATCH} -p1 -i + ${CMAKE_CURRENT_LIST_DIR}/mimalloc-1138.patch) + endif() + set(MIMALLOC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_C_FLAGS=${MIMALLOC_C_FLAGS}" "-DCMAKE_INSTALL_PREFIX=${MIMALLOC_PREFIX}" + -DMI_INSTALL_TOPLEVEL=ON -DMI_OVERRIDE=OFF -DMI_LOCAL_DYNAMIC_TLS=ON -DMI_BUILD_OBJECT=OFF @@ -2302,6 +2310,7 @@ if(ARROW_MIMALLOC) ${EP_COMMON_OPTIONS} URL ${MIMALLOC_SOURCE_URL} URL_HASH "SHA256=${ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM}" + PATCH_COMMAND ${MIMALLOC_PATCH_COMMAND} CMAKE_ARGS ${MIMALLOC_CMAKE_ARGS} BUILD_BYPRODUCTS "${MIMALLOC_STATIC_LIB}") diff --git a/cpp/cmake_modules/mimalloc-1138.patch b/cpp/cmake_modules/mimalloc-1138.patch new file mode 100644 index 00000000000..1ffa4bffbba --- /dev/null +++ b/cpp/cmake_modules/mimalloc-1138.patch @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +Fix for https://github.com/microsoft/mimalloc/issues/1138 + +diff --git a/src/arena.c b/src/arena.c +index b26f4442..d7e99b55 100644 +--- a/src/arena.c ++++ b/src/arena.c +@@ -797,6 +797,9 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc + else { + page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment); + } ++ if mi_unlikely(page == NULL) { ++ return NULL; ++ } + // mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); + mi_assert_internal(_mi_ptr_page(page)==page); diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 9f16db79f12..10aad92785a 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -82,8 +82,8 @@ ARROW_JEMALLOC_BUILD_VERSION=5.3.0 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa ARROW_LZ4_BUILD_VERSION=v1.10.0 ARROW_LZ4_BUILD_SHA256_CHECKSUM=537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b -ARROW_MIMALLOC_BUILD_VERSION=v2.2.4 -ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM=754a98de5e2912fddbeaf24830f982b4540992f1bab4a0a8796ee118e0752bda +ARROW_MIMALLOC_BUILD_VERSION=v3.1.5 +ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM=1c6949032069d5ebea438ec5cedd602d06f40a92ddf0f0d9dcff0993e5f6635c ARROW_NLOHMANN_JSON_BUILD_VERSION=v3.12.0 ARROW_NLOHMANN_JSON_BUILD_SHA256_CHECKSUM=4b92eb0c06d10683f7447ce9406cb97cd4b453be18d7279320f7b2f025c10187 ARROW_OPENTELEMETRY_BUILD_VERSION=v1.21.0 From f1ff2e122ad807e31ab3555af169dad4ba98344e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 10 Oct 2025 18:44:58 +0900 Subject: [PATCH 07/18] GH-47748: [C++][Dataset] Fix link error on macOS (#47749) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change There are link errors with build options for JNI on macOS. ### What changes are included in this PR? `ARROW_BUNDLED_STATIC_LIBS` has CMake target names defined in Apache Arrow not `find_package()`-ed target names. So we should use `aws-c-common` not `AWS::aws-c-common`. Recent aws-c-common or something use the Network framework. So add `Network` to `Arrow::arrow_bundled_dependencies` dependencies. Don't use `compute/kernels/temporal_internal.cc` in `libarrow.dylib` and `libarrow_compute.dylib` to avoid duplicated symbols error. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #47748 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- cpp/src/arrow/ArrowConfig.cmake.in | 4 +++- cpp/src/arrow/CMakeLists.txt | 1 - cpp/src/arrow/compute/kernels/temporal_internal.h | 2 +- cpp/src/arrow/meson.build | 1 - 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/ArrowConfig.cmake.in b/cpp/src/arrow/ArrowConfig.cmake.in index ee462663d6b..cbadad4d742 100644 --- a/cpp/src/arrow/ArrowConfig.cmake.in +++ b/cpp/src/arrow/ArrowConfig.cmake.in @@ -124,11 +124,13 @@ if(TARGET Arrow::arrow_static AND NOT TARGET Arrow::arrow_bundled_dependencies) # https://cmake.org/cmake/help/latest/policy/CMP0057.html cmake_policy(PUSH) cmake_policy(SET CMP0057 NEW) - if("AWS::aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) + if("aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) if(APPLE) find_library(CORE_FOUNDATION CoreFoundation) target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${CORE_FOUNDATION}) + find_library(NETWORK Network) + target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${NETWORK}) find_library(SECURITY Security) target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${SECURITY}) elseif(WIN32) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 771505f8c90..e299e8f6167 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -780,7 +780,6 @@ if(ARROW_COMPUTE) compute/kernels/scalar_temporal_binary.cc compute/kernels/scalar_temporal_unary.cc compute/kernels/scalar_validity.cc - compute/kernels/temporal_internal.cc compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_cumulative_ops.cc diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h b/cpp/src/arrow/compute/kernels/temporal_internal.h index 68e09e91d72..3674c233dc9 100644 --- a/cpp/src/arrow/compute/kernels/temporal_internal.h +++ b/cpp/src/arrow/compute/kernels/temporal_internal.h @@ -80,7 +80,7 @@ inline int64_t GetQuarter(const year_month_day& ymd) { return static_cast((static_cast(ymd.month()) - 1) / 3); } -Result LocateZone(const std::string_view timezone); +ARROW_EXPORT Result LocateZone(const std::string_view timezone); static inline const std::string& GetInputTimezone(const DataType& type) { static const std::string no_timezone = ""; diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index 1872f96df6a..5590ba41c91 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -525,7 +525,6 @@ if needs_compute 'compute/kernels/scalar_temporal_binary.cc', 'compute/kernels/scalar_temporal_unary.cc', 'compute/kernels/scalar_validity.cc', - 'compute/kernels/temporal_internal.cc', 'compute/kernels/util_internal.cc', 'compute/kernels/vector_array_sort.cc', 'compute/kernels/vector_cumulative_ops.cc', From 1f4910b2028c50f028ade6df280938f28dd944d4 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 12 Oct 2025 23:57:32 +0900 Subject: [PATCH 08/18] GH-47632: [CI][C++] Add a CI job for JNI on Linux (#47746) ### Rationale for this change This is for preventing to break Apache Arrow Java JNI use case on Linux. ### What changes are included in this PR? * Add a CI job that uses build options for JNI use case * Install more packages in manylinux image that is also used by JNI build ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #47632 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .env | 4 + .github/workflows/cpp.yml | 8 +- .github/workflows/cpp_extra.yml | 63 +++++++++++++- ci/docker/cpp-jni.dockerfile | 111 ++++++++++++++++++++++++ ci/scripts/cpp_build.sh | 15 +++- ci/scripts/cpp_test.sh | 41 +++++++++ ci/vcpkg/vcpkg.json | 1 + cpp/CMakePresets.json | 31 +++++++ cpp/src/arrow/testing/process.cc | 2 +- cpp/src/gandiva/tests/projector_test.cc | 4 + docker-compose.yml | 34 ++++++++ 11 files changed, 305 insertions(+), 9 deletions(-) create mode 100644 ci/docker/cpp-jni.dockerfile diff --git a/.env b/.env index 1735b7163b4..e9c92c1a357 100644 --- a/.env +++ b/.env @@ -37,6 +37,10 @@ DOCKER_BUILDKIT=1 ARCH=amd64 ARCH_ALIAS=x86_64 ARCH_SHORT=amd64 +# For aarch64 +# ARCH=arm64v8 +# ARCH_ALIAS=aarch64 +# ARCH_SHORT=arm64 # Default repository to pull and push images from REPO=apache/arrow-dev diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index b92c6fe8437..0122f01e757 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -321,7 +321,6 @@ jobs: BOOST_SOURCE: BUNDLED CMAKE_CXX_STANDARD: "17" CMAKE_GENERATOR: Ninja - CMAKE_INSTALL_LIBDIR: bin CMAKE_INSTALL_PREFIX: /usr CMAKE_UNITY_BUILD: ON steps: @@ -369,11 +368,12 @@ jobs: call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" - name: Test - shell: bash + shell: cmd run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 # For ORC - export TZDIR=/c/msys64/usr/share/zoneinfo - ci/scripts/cpp_test.sh $(pwd) $(pwd)/build + set TZDIR=C:\msys64\usr\share\zoneinfo + bash -c "ci/scripts/cpp_test.sh $(pwd) $(pwd)/build" windows-mingw: name: AMD64 Windows MinGW ${{ matrix.msystem_upper }} C++ diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 97dbadd25d5..5b054ddfb58 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -164,6 +164,67 @@ jobs: continue-on-error: true run: archery docker push ${{ matrix.image }} + jni-linux: + needs: check-labels + name: JNI ${{ matrix.platform.runs-on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs-on }} + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: C++') + timeout-minutes: 240 + permissions: + # This is for using GitHub Packages for vcpkg cache + packages: write + strategy: + fail-fast: false + matrix: + platform: + - arch: "amd64" + runs-on: ubuntu-latest + - arch: "arm64v8" + runs-on: ubuntu-24.04-arm + env: + ARCH: ${{ matrix.platform.arch }} + REPO: ghcr.io/${{ github.repository }}-dev + steps: + - name: Checkout Arrow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + submodules: recursive + - name: Cache Docker Volumes + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: .docker + key: jni-${{ matrix.platform.runs-on }}-${{ hashFiles('cpp/**') }} + restore-keys: jni-${{ matrix.platform.runs-on }}- + - name: Setup Python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: 3 + - name: Setup Archery + run: python3 -m pip install -e dev/archery[docker] + - name: Execute Docker Build + env: + ARCHERY_DOCKER_USER: ${{ github.actor }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VCPKG_BINARY_SOURCES: "clear;nuget,GitHub,readwrite" + run: | + source ci/scripts/util_enable_core_dumps.sh + archery docker run cpp-jni + - name: Docker Push + if: >- + success() && + github.event_name == 'push' && + github.ref_name == 'main' + env: + ARCHERY_DOCKER_USER: ${{ github.actor }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true + run: archery docker push cpp-jni + jni-macos: needs: check-labels name: JNI macOS @@ -177,7 +238,7 @@ jobs: MACOSX_DEPLOYMENT_TARGET: "14.0" steps: - name: Checkout Arrow - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: fetch-depth: 0 submodules: recursive diff --git a/ci/docker/cpp-jni.dockerfile b/ci/docker/cpp-jni.dockerfile new file mode 100644 index 00000000000..b21ec762d67 --- /dev/null +++ b/ci/docker/cpp-jni.dockerfile @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ARG arch +ARG arch_short + +SHELL ["/bin/bash", "-i", "-c"] +ENTRYPOINT ["/bin/bash", "-i", "-c"] + +# Install basic dependencies +RUN dnf install -y \ + autoconf \ + curl \ + flex \ + gdb \ + git \ + perl-IPC-Cmd \ + wget \ + zip + +# A system Python is required for Ninja and vcpkg in this Dockerfile. +# On manylinux_2_28 base images, no system Python is installed. +# We therefore override the PATH with Python 3.10 in /opt/python +# so that we have a consistent Python version across base images. +ENV CPYTHON_VERSION=cp310 +ENV PATH=/opt/python/${CPYTHON_VERSION}-${CPYTHON_VERSION}/bin:${PATH} + +# Install CMake +ARG cmake=3.29.2 +COPY ci/scripts/install_cmake.sh arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local + +# Install Ninja +ARG ninja=1.10.2 +COPY ci/scripts/install_ninja.sh arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_ninja.sh ${ninja} /usr/local + +# Install ccache +ARG ccache=4.1 +COPY ci/scripts/install_ccache.sh arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_ccache.sh ${ccache} /usr/local + +# Install vcpkg +ARG vcpkg +COPY ci/vcpkg/*.patch \ + ci/vcpkg/*linux*.cmake \ + ci/vcpkg/vcpkg.json \ + arrow/ci/vcpkg/ +COPY ci/scripts/install_vcpkg.sh \ + arrow/ci/scripts/ +ENV VCPKG_ROOT=/opt/vcpkg +ARG build_type=release +ENV CMAKE_BUILD_TYPE=${build_type} \ + PATH="${PATH}:${VCPKG_ROOT}" \ + VCPKG_DEFAULT_TRIPLET=${arch_short}-linux-static-${build_type} \ + VCPKG_FEATURE_FLAGS="manifests" \ + VCPKG_FORCE_SYSTEM_BINARIES=1 \ + VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg +# For --mount=type=secret: The GITHUB_TOKEN is the only real secret but we use +# --mount=type=secret for GITHUB_REPOSITORY_OWNER and +# VCPKG_BINARY_SOURCES too because we don't want to store them +# into the built image in order to easily reuse the built image cache. +# +# For vcpkg install: cannot use the S3 feature here because while +# aws-sdk-cpp=1.9.160 contains ssl related fixes as well as we can +# patch the vcpkg portfile to support arm machines it hits ARROW-15141 +# where we would need to fall back to 1.8.186 but we cannot patch +# those portfiles since vcpkg-tool handles the checkout of previous +# versions => use bundled S3 build +RUN --mount=type=secret,id=github_repository_owner \ + --mount=type=secret,id=github_token \ + --mount=type=secret,id=vcpkg_binary_sources \ + export GITHUB_REPOSITORY_OWNER=$(cat /run/secrets/github_repository_owner); \ + export GITHUB_TOKEN=$(cat /run/secrets/github_token); \ + export VCPKG_BINARY_SOURCES=$(cat /run/secrets/vcpkg_binary_sources); \ + arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg} && \ + vcpkg install \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=/arrow/ci/vcpkg \ + --x-feature=azure \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gandiva \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=orc \ + --x-feature=parquet \ + --x-feature=s3 && \ + rm -rf ~/.config/NuGet/ + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_CMAKE_ARGS="-DARROW_BUILD_TESTS=ON" \ + CMAKE_PRESET=ninja-${CMAKE_BUILD_TYPE}-jni-linux diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 3a4431239f1..fd550d8fb08 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -174,6 +174,11 @@ elif [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ ${ARROW_CMAKE_ARGS} \ ${source_dir} +elif [ -n "${CMAKE_PRESET}" ]; then + cmake \ + --preset="${CMAKE_PRESET}" \ + ${ARROW_CMAKE_ARGS} \ + ${source_dir} else cmake \ -Dabsl_SOURCE=${absl_SOURCE:-} \ @@ -308,10 +313,14 @@ fi popd if [ -x "$(command -v ldconfig)" ]; then - if [ -x "$(command -v sudo)" ]; then - SUDO=sudo - else + if [ "$(id --user)" -eq 0 ]; then SUDO= + else + if [ -x "$(command -v sudo)" ]; then + SUDO=sudo + else + SUDO= + fi fi ${SUDO} ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} fi diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 3d88b6f1cd5..4243e78bca7 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -47,6 +47,12 @@ ctest_options=() if ! type azurite >/dev/null 2>&1; then exclude_tests+=("arrow-azurefs-test") fi +if ! type storage-testbench >/dev/null 2>&1; then + exclude_tests+=("arrow-gcsfs-test") +fi +if ! type minio >/dev/null 2>&1; then + exclude_tests+=("arrow-s3fs-test") +fi case "$(uname)" in Linux) n_jobs=$(nproc) @@ -114,6 +120,41 @@ else "$@" fi +# This is for testing find_package(Arrow). +# +# Note that this is not a perfect solution. We should improve this +# later. +# +# * This is ad-hoc +# * This doesn't test other CMake packages such as ArrowDataset +if [ "${ARROW_USE_MESON:-OFF}" = "OFF" ] && \ + [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ] && \ + [ "${ARROW_USE_ASAN:-OFF}" = "OFF" ]; then + CMAKE_PREFIX_PATH="${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}}" + case "$(uname)" in + MINGW*) + # /lib/cmake/ isn't searched on Windows. + # + # See also: + # https://cmake.org/cmake/help/latest/command/find_package.html#config-mode-search-procedure + CMAKE_PREFIX_PATH+="/lib/cmake/" + ;; + esac + if [ -n "${VCPKG_ROOT}" ] && [ -n "${VCPKG_DEFAULT_TRIPLET}" ]; then + CMAKE_PREFIX_PATH+=";${VCPKG_ROOT}/installed/${VCPKG_DEFAULT_TRIPLET}" + fi + cmake \ + -S "${source_dir}/examples/minimal_build" \ + -B "${build_dir}/examples/minimal_build" \ + -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" + cmake --build "${build_dir}/examples/minimal_build" + pushd "${source_dir}/examples/minimal_build" + # PATH= is for Windows. + PATH="${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}}/bin:${PATH}" \ + "${build_dir}/examples/minimal_build/arrow-example" + popd +fi + if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then examples=$(find "${binary_output_dir}" -executable -name "*example") if [ "${examples}" == "" ]; then diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 0107153f018..5dfe61a0c60 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -19,6 +19,7 @@ "re2", "snappy", "utf8proc", + "xsimd", "zlib", "zstd", { diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 0c3f85d0917..e1cad83ae3f 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -583,6 +583,37 @@ "displayName": "Benchmarking build with everything enabled", "cacheVariables": {} }, + { + "name": "ninja-release-jni-linux", + "inherits": [ + "base-release" + ], + "displayName": "Build for JNI on Linux", + "cacheVariables": { + "ARROW_ACERO": "ON", + "ARROW_BUILD_SHARED": "OFF", + "ARROW_BUILD_STATIC": "ON", + "ARROW_CSV": "ON", + "ARROW_DATASET": "ON", + "ARROW_DEPENDENCY_SOURCE": "VCPKG", + "ARROW_DEPENDENCY_USE_SHARED": "OFF", + "ARROW_GANDIVA": "ON", + "ARROW_GANDIVA_STATIC_LIBSTDCPP": "ON", + "ARROW_GCS": "ON", + "ARROW_JSON": "ON", + "ARROW_ORC": "ON", + "ARROW_PARQUET": "ON", + "ARROW_RPATH_ORIGIN": "ON", + "ARROW_S3": "ON", + "ARROW_SUBSTRAIT": "ON", + "PARQUET_BUILD_EXAMPLES": "OFF", + "PARQUET_BUILD_EXECUTABLES": "OFF", + "PARQUET_REQUIRE_ENCRYPTION": "OFF", + "VCPKG_MANIFEST_MODE": "OFF", + "VCPKG_ROOT": "$env{VCPKG_ROOT}", + "VCPKG_TARGET_TRIPLET": "$env{VCPKG_TARGET_TRIPLET}" + } + }, { "name": "ninja-release-jni-macos", "inherits": [ diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc index e5632e47253..40538bd9862 100644 --- a/cpp/src/arrow/testing/process.cc +++ b/cpp/src/arrow/testing/process.cc @@ -176,7 +176,7 @@ class Process::Impl { for (const auto& kv : process::environment::current()) { env[kv.key()] = process::environment::value(kv.value()); } - env["PATH"] = process::environment::value(current_exe.parent_path()); + env["PATH"] = process::environment::value(current_exe.parent_path().string()); executable_ = process::environment::find_executable(name, env); # else executable_ = process::search_path(name, {current_exe.parent_path()}); diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 3fbe80d4cc3..dc1ac9dfd26 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -389,6 +389,10 @@ TEST_F(TestProjector, TestAllIntTypes) { } TEST_F(TestProjector, TestExtendedMath) { +#ifdef __aarch64__ + GTEST_SKIP() << "Failed on aarch64 with 'JIT session error: Symbols not found: [ " + "__multf3, __subtf3, __trunctfdf2, __extenddftf2, __divtf3 ]'"; +#endif // schema for input fields auto field0 = arrow::field("f0", arrow::float64()); auto field1 = arrow::field("f1", arrow::float64()); diff --git a/docker-compose.yml b/docker-compose.yml index 6d3babd4311..7cb7e03b2a9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -130,6 +130,7 @@ x-hierarchy: - conda-python-spark - conda-verify-rc - conan + - cpp-jni - debian-cpp: - debian-c-glib: - debian-ruby @@ -187,6 +188,8 @@ volumes: name: ${ARCH}-alpine-linux-ccache conda-ccache: name: ${ARCH}-conda-ccache + cpp-jni-ccache: + name: ${ARCH}-cpp-jni-ccache debian-ccache: name: ${ARCH}-debian-${DEBIAN}-ccache fedora-ccache: @@ -765,6 +768,37 @@ services: /arrow/ci/scripts/conan_setup.sh && /arrow/ci/scripts/conan_build.sh /arrow /build" + cpp-jni: + # Test for the build configuration for JNI. + # + # Usage: + # docker compose run --rm cpp-jni + # Parameters: + # ARCH: amd64, arm64v8 + # ARCH_ALIAS: x86_64, aarch64 + # ARCH_SHORT: amd64, arm64 + image: ${REPO}:${ARCH}-cpp-jni-${VCPKG} + build: + args: + arch: ${ARCH} + arch_short: ${ARCH_SHORT} + # See available versions at: + # https://quay.io/repository/pypa/manylinux_2_28_x86_64?tab=tags + # https://quay.io/repository/pypa/manylinux_2_28_aarch64?tab=tags + base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2025.10.09-1 + vcpkg: ${VCPKG} + context: . + dockerfile: ci/docker/cpp-jni.dockerfile + cache_from: + - ${REPO}:${ARCH}-cpp-jni-${VCPKG} + secrets: *vcpkg-build-secrets + environment: + <<: [*common, *ccache] + volumes: + - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}cpp-jni-ccache:/ccache:delegated + command: *cpp-command + ############################### C GLib ###################################### debian-c-glib: From 618f6090118606f83a93cdd88e47868db9c61b8f Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 13 Oct 2025 17:54:33 +0900 Subject: [PATCH 09/18] GH-47795: [Archery] Add support for custom Docker registry (#47796) ### Rationale for this change `archery docker push` doesn't support custom Docker registry such as ghcr.io. ### What changes are included in this PR? Parse Docker image tag and specify Docker registry name to `docker push` if it's specified in the tag. Docker image tag format: `[HOST[:PORT]/]NAMESPACE/REPOSITORY[:TAG]` See also: https://docs.docker.com/reference/cli/docker/image/tag/#description ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #47795 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/archery/archery/docker/core.py | 14 ++++-- .../archery/docker/tests/test_docker.py | 49 +++++++++++++------ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index 7bdc53a3df6..34ea42d6d2e 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -433,16 +433,24 @@ def _push(service): else: return self._execute_compose(*args, service['name']) + service = self.config.get(service_name) + if user is not None: + login_args = ['--username', user, '--password-stdin'] + login_kwargs = {'input': password.encode()} + image = service['image'] + # [[HOST[:PORT]/]NAMESPACE/]REPOSITORY[:TAG] + components = image.split('/', 3) + if len(components) == 3: + server = components[0] + login_args.append(server) try: - # TODO(kszucs): have an option for a prompt - self._execute_docker('login', '-u', user, '-p', password) + self._execute_docker('login', *login_args, **login_kwargs) except subprocess.CalledProcessError: # hide credentials msg = f'Failed to push `{service_name}`, check the passed credentials' raise RuntimeError(msg) from None - service = self.config.get(service_name) for ancestor in service['ancestors']: _push(self.config.get(ancestor)) _push(service) diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index e4a278712ea..631932d0fa2 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -140,39 +140,39 @@ services: conda-cpp: - image: org/conda-cpp + image: ${REPO}:conda-cpp build: context: . dockerfile: ci/docker/conda-cpp.dockerfile conda-python: - image: org/conda-python + image: ${REPO}:conda-python build: context: . dockerfile: ci/docker/conda-cpp.dockerfile args: python: 3.8 conda-python-pandas: - image: org/conda-python-pandas + image: ${REPO}:conda-python-pandas build: context: . dockerfile: ci/docker/conda-python-pandas.dockerfile conda-python-dask: - image: org/conda-python-dask + image: ${REPO}:conda-python-dask ubuntu-cpp: - image: org/ubuntu-cpp + image: ${REPO}:ubuntu-cpp build: context: . dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile ubuntu-cpp-cmake32: - image: org/ubuntu-cpp-cmake32 + image: ${REPO}:ubuntu-cpp-cmake32 ubuntu-c-glib: - image: org/ubuntu-c-glib + image: ${REPO}:ubuntu-c-glib environment: <<: [*sccache] ubuntu-ruby: - image: org/ubuntu-ruby + image: ${REPO}:ubuntu-ruby ubuntu-cuda: - image: org/ubuntu-cuda + image: ${REPO}:ubuntu-cuda environment: CUDA_ENV: 1 OTHER_ENV: 2 @@ -182,6 +182,7 @@ """ arrow_compose_env = { + 'REPO': 'apache/arrow', 'UBUNTU': '20.04', # overridden below 'PYTHON': '3.8', 'PANDAS': 'latest', @@ -484,7 +485,7 @@ def test_compose_run_with_resource_limits(arrow_compose_path): "--cpuset-cpus=0,1", "--memory=7g", "--memory-swap=7g", - "org/conda-cpp" + "apache/arrow:conda-cpp" ]), ] compose = DockerCompose(arrow_compose_path) @@ -493,10 +494,28 @@ def test_compose_run_with_resource_limits(arrow_compose_path): def test_compose_push(arrow_compose_path): - compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.9')) + compose = DockerCompose(arrow_compose_path, params=dict(PYTHON="3.9")) + expected_env = PartialEnv(PYTHON="3.9") + expected_calls = [ + mock.call(["docker", "login", "--username", "user", + "--password-stdin"], input=b"pass", check=True), + ] + for image in ["conda-cpp", "conda-python", "conda-python-pandas"]: + expected_calls.append( + mock.call(["docker", "compose", f"--file={compose.config.path}", + "push", image], check=True, env=expected_env) + ) + with assert_subprocess_calls(expected_calls): + compose.push("conda-python-pandas", user="user", password="pass") + + +def test_compose_push_custom_server(arrow_compose_path): + compose = DockerCompose(arrow_compose_path, params=dict( + PYTHON="3.9", REPO="ghcr.io/apache/arrow-dev")) expected_env = PartialEnv(PYTHON="3.9") expected_calls = [ - mock.call(["docker", "login", "-u", "user", "-p", "pass"], check=True), + mock.call(["docker", "login", "--username", "user", "--password-stdin", + "ghcr.io"], input=b"pass", check=True), ] for image in ["conda-cpp", "conda-python", "conda-python-pandas"]: expected_calls.append( @@ -504,7 +523,7 @@ def test_compose_push(arrow_compose_path): "push", image], check=True, env=expected_env) ) with assert_subprocess_calls(expected_calls): - compose.push('conda-python-pandas', user='user', password='pass') + compose.push("conda-python-pandas", user="user", password="pass") def test_compose_error(arrow_compose_path): @@ -533,7 +552,7 @@ def test_image_with_gpu(arrow_compose_path): "-e", "CUDA_ENV=1", "-e", "OTHER_ENV=2", "-v", "/host:/container", - "org/ubuntu-cuda", + "apache/arrow:ubuntu-cuda", "/bin/bash", "-c", "echo 1 > /tmp/dummy && cat /tmp/dummy", ] ] @@ -560,7 +579,7 @@ def test_service_info(arrow_compose_path): compose = DockerCompose(arrow_compose_path) service = compose.config.raw_config["services"]["conda-cpp"] assert compose.info(service) == [ - " image: org/conda-cpp", + " image: ${REPO}:conda-cpp", " build", " context: .", " dockerfile: ci/docker/conda-cpp.dockerfile" From d5b8e2b151730312ef80a975627ef7ff2423a232 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 14 Oct 2025 06:17:08 +0200 Subject: [PATCH 10/18] GH-47438: [Python][Packaging] Set up wheel building for Python 3.14 (#47616) ### Rationale for this change Python 3.14 is currently in a prerelease status and is expected to have a final release in October this year (https://peps.python.org/pep-0745/). We should ensure we are fully ready to support Python 3.14 for the PyArrow 22 release. ### What changes are included in this PR? This PR updates wheels for Python 3.14. ### Are these changes tested? Tested in the CI and with extended builds. ### Are there any user-facing changes? No, but users will be able to use PyArrow with Python 3.14. * GitHub Issue: #47438 --- Todo: - Update the image revision name in `.env` - Add 3.14 conda build ([arrow/dev/tasks/tasks.yml](https://github.com/apache/arrow/blob/d803afcc43f5d132506318fd9e162d33b2c3d4cd/dev/tasks/tasks.yml#L809)) when https://github.com/conda-forge/pyarrow-feedstock/pull/156 is merged Follow-ups: - https://github.com/apache/arrow/issues/47437 Authored-by: AlenkaF Signed-off-by: AlenkaF --- .env | 4 +-- ...ed-wheel-manylinux-test-imports.dockerfile | 6 ++-- ...-wheel-manylinux-test-unittests.dockerfile | 10 +++--- ...ed-wheel-musllinux-test-imports.dockerfile | 17 +++++----- ...-wheel-musllinux-test-unittests.dockerfile | 17 +++++----- ...eaded-wheel-windows-test-vs2022.dockerfile | 24 +++++++++----- ...e-threaded-wheel-windows-vs2022.dockerfile | 8 +++-- ci/docker/python-wheel-musllinux.dockerfile | 6 +--- ...ython-wheel-windows-test-vs2022.dockerfile | 5 +-- .../python-wheel-windows-vs2022.dockerfile | 5 +-- ci/scripts/install_python.sh | 19 +++++++----- ci/scripts/python_wheel_unix_test.sh | 8 ++--- dev/release/verify-release-candidate.sh | 6 ++-- dev/tasks/python-wheels/github.linux.yml | 11 ++++++- dev/tasks/python-wheels/github.osx.yml | 6 ++-- dev/tasks/tasks.yml | 6 ++-- docker-compose.yml | 31 +++++++++++++------ python/pyarrow/tests/test_array.py | 6 ++-- python/pyproject.toml | 2 ++ python/requirements-wheel-test.txt | 3 +- 20 files changed, 127 insertions(+), 73 deletions(-) diff --git a/.env b/.env index e9c92c1a357..5951b3c95f6 100644 --- a/.env +++ b/.env @@ -102,8 +102,8 @@ VCPKG="4334d8b4c8916018600212ab4dd4bbdc343065d1" # 2025.09.17 Release # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. # This is a workaround for our CI problem that "archery docker build" doesn't # use pulled built images in dev/tasks/python-wheels/github.windows.yml. -PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-09-04 -PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-09-04 +PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-10-13 +PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-10-13 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan". # See https://github.com/conan-io/conan-docker-tools#readme and diff --git a/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile b/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile index c884611ca39..e4149821de3 100644 --- a/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile +++ b/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile @@ -18,18 +18,20 @@ ARG base FROM ${base} +ARG python_version=3.13 + ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y -q && \ apt install -y -q --no-install-recommends software-properties-common gpg-agent && \ add-apt-repository -y ppa:deadsnakes/ppa && \ apt-get update -y -q && \ - apt install -y -q --no-install-recommends python3.13-dev python3.13-nogil python3.13-venv && \ + apt install -y -q --no-install-recommends python${python_version}-dev python${python_version}-nogil python${python_version}-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists* ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile b/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile index 3c4c53f805e..566f0c0402a 100644 --- a/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile +++ b/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile @@ -18,6 +18,8 @@ ARG base FROM ${base} +ARG python_version=3.13 + ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y -q && \ @@ -27,14 +29,14 @@ RUN apt-get update -y -q && \ apt install -y -q --no-install-recommends \ build-essential \ libffi-dev \ - python3.13-dev \ - python3.13-nogil \ - python3.13-venv && \ + python${python_version}-dev \ + python${python_version}-nogil \ + python${python_version}-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists* ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile b/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile index 526f345416b..e79facb4904 100644 --- a/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile +++ b/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile @@ -18,6 +18,9 @@ ARG base FROM ${base} +ARG python_version=3.13 +ARG python_patch_version=3.13.7 + RUN apk add --no-cache \ bash \ build-base \ @@ -34,19 +37,19 @@ RUN apk add --no-cache \ tzdata \ zlib-dev -# Install Python3.13.2 without GIL -RUN wget https://github.com/python/cpython/archive/refs/tags/v3.13.2.tar.gz && \ - tar -xzf v3.13.2.tar.gz && \ - rm v3.13.2.tar.gz && \ - cd cpython-3.13.2/ && \ +# Install Python without GIL +RUN wget https://github.com/python/cpython/archive/refs/tags/v${python_patch_version}.tar.gz && \ + tar -xzf v${python_patch_version}.tar.gz && \ + rm v${python_patch_version}.tar.gz && \ + cd cpython-${python_patch_version}/ && \ ./configure --disable-gil --with-ensurepip && \ make -j && \ make install && \ cd ../ && \ - rm -rf cpython-3.13.2/ + rm -rf cpython-${python_patch_version}/ ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile b/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile index fa2139b6ba5..3b170087ba8 100644 --- a/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile +++ b/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile @@ -18,6 +18,9 @@ ARG base FROM ${base} +ARG python_version=3.13 +ARG python_patch_version=3.13.7 + RUN apk add --no-cache \ bash \ build-base \ @@ -34,19 +37,19 @@ RUN apk add --no-cache \ tzdata \ zlib-dev -# Install Python3.13.2 without GIL -RUN wget https://github.com/python/cpython/archive/refs/tags/v3.13.2.tar.gz && \ - tar -xzf v3.13.2.tar.gz && \ - rm v3.13.2.tar.gz && \ - cd cpython-3.13.2/ && \ +# Install Python without GIL +RUN wget https://github.com/python/cpython/archive/refs/tags/v${python_patch_version}.tar.gz && \ + tar -xzf v${python_patch_version}.tar.gz && \ + rm v${python_patch_version}.tar.gz && \ + cd cpython-${python_patch_version}/ && \ ./configure --disable-gil --with-ensurepip && \ make -j && \ make install && \ cd ../ && \ - rm -rf cpython-3.13.2/ + rm -rf cpython-${python_patch_version}/ ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile index 5b27c786ff4..ab257b271e5 100644 --- a/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile +++ b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile @@ -26,24 +26,34 @@ FROM ${base} ARG python=3.13 +# hadolint ignore=SC1072 +RUN (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0") + SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] -RUN $filename = 'python-3.13.1-amd64.exe'; \ - $url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \ +RUN $version = $env:PYTHON_VERSION; \ + $filename = 'python-' + $version + '-amd64.exe'; \ + $url = 'https://www.python.org/ftp/python/' + $version + '/' + $filename; \ Invoke-WebRequest -Uri $url -OutFile $filename; \ Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait ENV PYTHON_CMD="py -${python}t" SHELL ["cmd", "/S", "/C"] -RUN %PYTHON_CMD% -m pip install -U pip setuptools - -COPY python/requirements-wheel-test-3.13t.txt C:/arrow/python/ -# Cython and Pandas wheels for 3.13 free-threaded are not released yet +RUN %PYTHON_CMD% -m pip install -U pip setuptools & \ + if "%python%"=="3.13" ( \ + setx REQUIREMENTS_FILE "requirements-wheel-test-3.13t.txt" \ + ) else ( \ + setx REQUIREMENTS_FILE "requirements-wheel-test.txt" \ + ) + +COPY python/requirements-wheel-test-3.13t.txt python/requirements-wheel-test.txt C:/arrow/python/ +# Cython and Pandas wheels for free-threaded are not released yet RUN %PYTHON_CMD% -m pip install \ --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ --pre \ --prefer-binary \ - -r C:/arrow/python/requirements-wheel-test-3.13t.txt + -r C:/arrow/python/%REQUIREMENTS_FILE% ENV PYTHON="${python}t" ENV PYTHON_GIL=0 diff --git a/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile b/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile index adbdccde71d..77a64fd5c24 100644 --- a/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile +++ b/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile @@ -26,9 +26,13 @@ FROM ${base} ARG python=3.13 +RUN (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0") + SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] -RUN $filename = 'python-3.13.1-amd64.exe'; \ - $url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \ +RUN $version = $env:PYTHON_VERSION; \ + $filename = 'python-' + $version + '-amd64.exe'; \ + $url = 'https://www.python.org/ftp/python/' + $version + '/' + $filename; \ Invoke-WebRequest -Uri $url -OutFile $filename; \ Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait diff --git a/ci/docker/python-wheel-musllinux.dockerfile b/ci/docker/python-wheel-musllinux.dockerfile index c1c4fd5bd28..d00d44bd093 100644 --- a/ci/docker/python-wheel-musllinux.dockerfile +++ b/ci/docker/python-wheel-musllinux.dockerfile @@ -82,6 +82,7 @@ RUN --mount=type=secret,id=github_repository_owner \ export GITHUB_REPOSITORY_OWNER=$(cat /run/secrets/github_repository_owner); \ export GITHUB_TOKEN=$(cat /run/secrets/github_token); \ export VCPKG_BINARY_SOURCES=$(cat /run/secrets/vcpkg_binary_sources); \ + export CMAKE_POLICY_VERSION_MINIMUM=3.5; \ arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg} && \ vcpkg install \ --clean-after-build \ @@ -110,10 +111,5 @@ RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-${PYTHON_ABI_TAG SHELL ["/bin/bash", "-i", "-c", "-l"] ENTRYPOINT ["/bin/bash", "-i", "-c", "-l"] -# Remove once there are released Cython wheels for 3.13 free-threaded available -RUN if [ "${python_abi_tag}" = "cp313t" ]; then \ - pip install cython --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" --prefer-binary ; \ - fi - COPY python/requirements-wheel-build.txt /arrow/python/ RUN pip install -r /arrow/python/requirements-wheel-build.txt diff --git a/ci/docker/python-wheel-windows-test-vs2022.dockerfile b/ci/docker/python-wheel-windows-test-vs2022.dockerfile index a520fac6215..1bed37eb001 100644 --- a/ci/docker/python-wheel-windows-test-vs2022.dockerfile +++ b/ci/docker/python-wheel-windows-test-vs2022.dockerfile @@ -30,8 +30,9 @@ FROM ${base} ARG python=3.10 RUN (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PYTHON_CMD "py -3.10") & \ (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PYTHON_CMD "py -3.11") & \ - (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.8" && setx PYTHON_CMD "py -3.12") & \ - (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1" && setx PYTHON_CMD "py -3.13") + (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.10" && setx PYTHON_CMD "py -3.12") & \ + (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.7" && setx PYTHON_CMD "py -3.13") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0" && setx PYTHON_CMD "py -3.14") # hadolint ignore=DL3059 RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% diff --git a/ci/docker/python-wheel-windows-vs2022.dockerfile b/ci/docker/python-wheel-windows-vs2022.dockerfile index c15970ca9b4..04750ff44c4 100644 --- a/ci/docker/python-wheel-windows-vs2022.dockerfile +++ b/ci/docker/python-wheel-windows-vs2022.dockerfile @@ -25,8 +25,9 @@ FROM ${base} ARG python=3.10 RUN (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PYTHON_CMD "py -3.10") & \ (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PYTHON_CMD "py -3.11") & \ - (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.8" && setx PYTHON_CMD "py -3.12") & \ - (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1" && setx PYTHON_CMD "py -3.13") + (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.10" && setx PYTHON_CMD "py -3.12") & \ + (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.7" && setx PYTHON_CMD "py -3.13") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0" && setx PYTHON_CMD "py -3.14") RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% RUN %PYTHON_CMD% -m pip install -U pip setuptools diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh index 915c58bbb47..fe0c4bcb931 100755 --- a/ci/scripts/install_python.sh +++ b/ci/scripts/install_python.sh @@ -27,9 +27,11 @@ platforms=([windows]=Windows declare -A versions versions=([3.10]=3.10.11 [3.11]=3.11.9 - [3.12]=3.12.9 - [3.13]=3.13.2 - [3.13t]=3.13.2) + [3.12]=3.12.10 + [3.13]=3.13.7 + [3.13t]=3.13.7 + [3.14]=3.14.0 + [3.14t]=3.14.0) if [ "$#" -ne 2 ]; then echo "Usage: $0 " @@ -50,7 +52,9 @@ if [ "$platform" = "macOS" ]; then wget "https://www.python.org/ftp/python/${full_version}/${fname}" echo "Installing Python..." - if [[ $2 == "3.13t" ]]; then + if [[ $2 == "3.13t" ]] || [[ $2 == "3.14t" ]]; then + # Extract the base version without 't' suffix + base_version="${version%t}" # See https://github.com/python/cpython/issues/120098#issuecomment-2151122033 for more info on this. cat > ./choicechanges.plist < @@ -63,7 +67,7 @@ if [ "$platform" = "macOS" ]; then choiceAttribute selected choiceIdentifier - org.python.Python.PythonTFramework-3.13 + org.python.Python.PythonTFramework-${base_version} @@ -76,8 +80,9 @@ EOF rm "$fname" python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}" - if [[ $2 == "3.13t" ]]; then - python="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t" + if [[ $2 == "3.13t" ]] || [[ $2 == "3.14t" ]]; then + base_version="${version%t}" + python="/Library/Frameworks/PythonT.framework/Versions/${base_version}/bin/python${base_version}t" fi echo "Installing Pip..." diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh index e18c0c740ca..2b8ee7be745 100755 --- a/ci/scripts/python_wheel_unix_test.sh +++ b/ci/scripts/python_wheel_unix_test.sh @@ -31,9 +31,9 @@ source_dir=${1} : "${ARROW_AZURE:=ON}" : "${ARROW_FLIGHT:=ON}" : "${ARROW_GCS:=ON}" +: "${CHECK_IMPORTS:=ON}" : "${ARROW_S3:=ON}" : "${ARROW_SUBSTRAIT:=ON}" -: "${CHECK_IMPORTS:=ON}" : "${CHECK_WHEEL_CONTENT:=ON}" : "${CHECK_UNITTESTS:=ON}" : "${INSTALL_PYARROW:=ON}" @@ -106,10 +106,10 @@ is_free_threaded() { if [ "${CHECK_UNITTESTS}" == "ON" ]; then # Install testing dependencies - if [ "$(is_free_threaded)" = "ON" ]; then - echo "Free-threaded Python build detected" + if [ "$(is_free_threaded)" = "ON" ] && [[ "${PYTHON:-}" == *"3.13"* ]]; then + echo "Free-threaded Python 3.13 build detected" python -m pip install -U -r "${source_dir}/python/requirements-wheel-test-3.13t.txt" - elif [ "$(is_free_threaded)" = "OFF" ]; then + else echo "Regular Python build detected" python -m pip install -U -r "${source_dir}/python/requirements-wheel-test.txt" fi diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e202c7b1e1e..56c671910f0 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -852,7 +852,7 @@ test_linux_wheels() { local arch="x86_64" fi - local python_versions="${TEST_PYTHON_VERSIONS:-3.10 3.11 3.12 3.13}" + local python_versions="${TEST_PYTHON_VERSIONS:-3.10 3.11 3.12 3.13 3.14}" local platform_tags="${TEST_WHEEL_PLATFORM_TAGS:-manylinux_2_28_${arch}}" if [ "${SOURCE_KIND}" != "local" ]; then @@ -891,11 +891,11 @@ test_macos_wheels() { # apple silicon processor if [ "$(uname -m)" = "arm64" ]; then - local python_versions="3.10 3.11 3.12 3.13" + local python_versions="3.10 3.11 3.12 3.13 3.14" local platform_tags="macosx_12_0_arm64" local check_flight=OFF else - local python_versions="3.10 3.11 3.12 3.13" + local python_versions="3.10 3.11 3.12 3.13 3.14" local platform_tags="macosx_12_0_x86_64" fi diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml index 09822ed2ffc..871e044fa21 100644 --- a/dev/tasks/python-wheels/github.linux.yml +++ b/dev/tasks/python-wheels/github.linux.yml @@ -42,16 +42,25 @@ jobs: {% endif %} PYTHON: "{{ python_version }}" PYTHON_ABI_TAG: "{{ python_abi_tag }}" + {% if python_version == "3.14" %} + PYTHON_IMAGE_TAG: "3.14" + PYTHON_PATCH_VERSION: "3.14.0" + {% elif python_version == "3.13" %} PYTHON_IMAGE_TAG: "{{ python_version }}" + PYTHON_PATCH_VERSION: "3.13.7" + {% else %} + PYTHON_IMAGE_TAG: "{{ python_version }}" + {% endif %} steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} {{ macros.github_login_dockerhub()|indent }} + # Detect free-threaded Python builds - name: Prepare run: | - if [ "${PYTHON_ABI_TAG}" = "cp313t" ]; then + if [[ "${PYTHON_ABI_TAG}" == *t ]]; then test_image_prefix=python-free-threaded else test_image_prefix=python diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index 140971626bf..ef8e90f4129 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -27,7 +27,7 @@ PYARROW_VERSION: "{{ arrow.no_rc_version }}" PYTHON_VERSION: "{{ python_version }}" PYTHON_ABI_TAG: "{{ python_abi_tag }}" - {% if python_abi_tag == "cp313t" %} + {% if python_abi_tag.endswith('t') %} PYTHON: "/Library/Frameworks/PythonT.framework/Versions/{{ python_version }}/bin/python{{ python_version }}t" {% else %} PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}" @@ -94,7 +94,7 @@ jobs: --x-feature=s3 - name: Install Python {{ python_version }} - run: sudo arrow/ci/scripts/install_python.sh macos {{ "3.13t" if python_abi_tag == "cp313t" else python_version }} + run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version + "t" if python_abi_tag.endswith('t') else python_version }} - name: Build Wheel run: | @@ -131,7 +131,7 @@ jobs: - name: Test Wheel env: PYTEST_ADDOPTS: "-k 'not test_cancellation'" - PYTHON_GIL: {{ 0 if python_abi_tag == "cp313t" else 1 }} + PYTHON_GIL: {{ 0 if python_abi_tag.endswith('t') else 1 }} run: | $PYTHON -m venv test-env source test-env/bin/activate diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 9d225e3b702..749042779ed 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -184,7 +184,9 @@ tasks: ("3.11", "cp311", "cp311"), ("3.12", "cp312", "cp312"), ("3.13", "cp313", "cp313"), - ("3.13", "cp313", "cp313t")] %} + ("3.13", "cp313", "cp313t"), + ("3.14", "cp314", "cp314"), + ("3.14", "cp314", "cp314t")] %} {############################## Wheel Linux ##################################} @@ -555,7 +557,7 @@ tasks: UBUNTU: 22.04 image: ubuntu-cpp-emscripten -{% for python_version in ["3.10", "3.11", "3.12", "3.13"] %} +{% for python_version in ["3.10", "3.11", "3.12", "3.13", "3.14"] %} test-conda-python-{{ python_version }}: ci: github template: docker-tests/github.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index 7cb7e03b2a9..937620f82ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1178,7 +1178,7 @@ services: args: arch: ${ARCH} arch_short: ${ARCH_SHORT} - base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-08-03-32dfa47 + base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2025-06-04-496f7e1 manylinux: 2_28 python: ${PYTHON} python_abi_tag: ${PYTHON_ABI_TAG} @@ -1203,7 +1203,7 @@ services: args: arch: ${ARCH} arch_short: ${ARCH_SHORT} - base: quay.io/pypa/musllinux_1_2_${ARCH_ALIAS}:2025-01-18-a325f1d + base: quay.io/pypa/musllinux_1_2_${ARCH_ALIAS}:2025-06-04-496f7e1 musllinux: 1_2 python: ${PYTHON} python_abi_tag: ${PYTHON_ABI_TAG} @@ -1239,14 +1239,16 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-musllinux-test-imports: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-imports + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-imports build: args: base: "${ARCH}/alpine:${ALPINE_LINUX}" + python_version: ${PYTHON} + python_patch_version: ${PYTHON_PATCH_VERSION} context: . dockerfile: ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-imports + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-imports shm_size: 2G volumes: - .:/arrow:delegated @@ -1254,6 +1256,7 @@ services: <<: *common CHECK_IMPORTS: "ON" CHECK_UNITTESTS: "OFF" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-musllinux-test-unittests: @@ -1279,14 +1282,16 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-musllinux-test-unittests: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-unittests + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-unittests build: args: base: "${ARCH}/alpine:${ALPINE_LINUX}" + python_version: ${PYTHON} + python_patch_version: ${PYTHON_PATCH_VERSION} context: . dockerfile: ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-unittests + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-unittests shm_size: 2G volumes: - .:/arrow:delegated @@ -1294,6 +1299,7 @@ services: <<: *common CHECK_IMPORTS: "OFF" CHECK_UNITTESTS: "ON" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-manylinux-test-imports: @@ -1311,14 +1317,15 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-manylinux-test-imports: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-imports + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-imports build: args: base: "${ARCH}/ubuntu:${UBUNTU}" + python_version: ${PYTHON} context: . dockerfile: ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-imports + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-imports shm_size: 2G volumes: - .:/arrow:delegated @@ -1326,6 +1333,7 @@ services: <<: *common CHECK_IMPORTS: "ON" CHECK_UNITTESTS: "OFF" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-manylinux-test-unittests: @@ -1352,14 +1360,15 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-manylinux-test-unittests: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-unittests + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-unittests build: args: base: "${ARCH}/ubuntu:${UBUNTU}" + python_version: ${PYTHON} context: . dockerfile: ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-unittests + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-unittests shm_size: 2G volumes: - .:/arrow:delegated @@ -1367,6 +1376,7 @@ services: <<: *common CHECK_IMPORTS: "OFF" CHECK_UNITTESTS: "ON" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-windows-vs2022-base: @@ -1441,6 +1451,7 @@ services: build: args: base: ${REPO}:python-wheel-windows-test-vs2022-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION} + python: ${PYTHON} context: . dockerfile: ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile volumes: *python-wheel-windows-vs2022-volumes diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 009ab1e849b..ec361159c5f 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -292,10 +292,12 @@ def test_to_pandas_zero_copy(): for i in range(10): series = arr.to_pandas() - assert sys.getrefcount(series) == 2 + # In Python 3.14 interpreter might avoid some + # reference count modifications + assert sys.getrefcount(series) in (1, 2) series = None # noqa - assert sys.getrefcount(arr) == 2 + assert sys.getrefcount(arr) in (1, 2) for i in range(10): arr = pa.array(range(10)) diff --git a/python/pyproject.toml b/python/pyproject.toml index fac3b25c554..240db14dc72 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -39,6 +39,8 @@ classifiers = [ 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', + 'Programming Language :: Python :: Free Threading :: 2 - Beta', ] maintainers = [ {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"} diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt index 06c059f591f..b2ff4553b45 100644 --- a/python/requirements-wheel-test.txt +++ b/python/requirements-wheel-test.txt @@ -15,6 +15,7 @@ tzdata; sys_platform == 'win32' numpy~=1.21.3; python_version < "3.11" numpy~=1.23.2; python_version == "3.11" numpy~=1.26.0; python_version == "3.12" -numpy~=2.1.0; python_version >= "3.13" +numpy~=2.1.0; python_version == "3.13" +numpy~=2.3.3; python_version >= "3.14" pandas From 08f5304074ce550c342f97e107081ea61a51e3da Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 14 Oct 2025 08:44:19 +0200 Subject: [PATCH 11/18] GH-47803: [C++][Parquet] Fix read out of bounds on invalid RLE data (#47804) Found by OSS-Fuzz, should fix https://issues.oss-fuzz.com/issues/451150486. Ensure RLE run is within bounds before reading it. Yes, by fuzz regression test in ASAN/UBSAN build. No. **This PR contains a "Critical Fix".** (If the changes fix either (a) a security vulnerability, (b) a bug that caused incorrect or invalid data to be produced, or (c) a bug that causes a crash (even when the API contract is upheld), please provide explanation. If not, you can remove this.) * GitHub Issue: #47803 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/util/rle_encoding_internal.h | 5 +++++ testing | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index a7917483bbc..2420270f3ab 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -699,6 +699,11 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const ARROW_DCHECK_LT(value_bytes, internal::max_size_for_v); const auto bytes_read = header_bytes + static_cast(value_bytes); + if (ARROW_PREDICT_FALSE(bytes_read > data_size_)) { + // RLE run would overflow data buffer + return {0, ControlFlow::Break}; + } + auto control = handler.OnRleRun(RleRun(data_ + header_bytes, values_count, value_bit_width_)); diff --git a/testing b/testing index abf6d7ebde7..9a02925d1ba 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit abf6d7ebde7ab70b541c51859dad2bef71a0151e +Subproject commit 9a02925d1ba80bd493b6d4da6e8a777588d57ac4 From 2140934e4d79587ffa91e5d1462ac7ee673d2df0 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Tue, 14 Oct 2025 08:17:18 +0100 Subject: [PATCH 12/18] GH-47738: [R] Update NEWS.md for 22.0.0 (#47739) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Summarise changes for release ### What changes are included in this PR? Update NEWS file ### Are these changes tested? No ### Are there any user-facing changes? No * GitHub Issue: #47738 Authored-by: Nic Crane Signed-off-by: Raúl Cumplido --- r/NEWS.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/r/NEWS.md b/r/NEWS.md index 3be4bd951b1..b5d894215ca 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,14 @@ # arrow 21.0.0.9000 +## New features + +- `stringr::str_replace_na()` binding implemented (#47521). + +## Minor improvements and fixes + +- Subsecond time variables no longer truncated in `hms::hms()` bindings (#47278) + # arrow 21.0.0.1 ## Minor improvements and fixes From e2fa94cf96c5b21924451ca52a6b522618c28a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 14 Oct 2025 14:43:32 +0200 Subject: [PATCH 13/18] GH-47809: [CI][Release] Fix Windows verification job trying to install patch from conda (#47810) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Our verify-rc-source Windows job is failing due to patch not being available for Windows. ### What changes are included in this PR? Move patch requirement from `conda_env_cpp.txt` to `conda_env_unix.txt` ### Are these changes tested? Yes via CI and archery. ### Are there any user-facing changes? No * GitHub Issue: #47809 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- ci/conda_env_cpp.txt | 1 - ci/conda_env_unix.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 52b4cdbba81..6e23e920a40 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -41,7 +41,6 @@ meson ninja nodejs orc<2.1.0 -patch pkg-config python rapidjson diff --git a/ci/conda_env_unix.txt b/ci/conda_env_unix.txt index 1973238adff..4728068c4e8 100644 --- a/ci/conda_env_unix.txt +++ b/ci/conda_env_unix.txt @@ -20,4 +20,5 @@ autoconf ccache orc +patch pkg-config From 07e4d5aeadf0c24542309edc3056ed26dfc5f49c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 15 Oct 2025 11:41:23 +0200 Subject: [PATCH 14/18] GH-47819: [CI][Packaging][Release] Avoid triggering Linux packages on release branch push (#47826) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We require the Linux package jobs to be triggered on RC tag creation. For example for 22.0.0, we currently push the tag `apache-arrow-22.0.0-rc0` and the release branch `release-22.0.0-rc0`. Those events are triggering builds over the same commit and the tag event gets cancelled due to a "high priority task" triggering the same jobs. This causes jobs to fail on the branch because the ARROW_VERSION is not generated. If we manually re-trigger the jobs on the tag they are successful. ### What changes are included in this PR? Remove the `release-*` branches from triggering the event to allow only the tag to run the jobs so they don't get cancelled. ### Are these changes tested? No ### Are there any user-facing changes? No * GitHub Issue: #47819 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- .github/workflows/package_linux.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index f39ca02bf52..75969615861 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -22,6 +22,7 @@ on: branches: - '**' - '!dependabot/**' + - '!release-*' paths: - '.github/workflows/check_labels.yml' - '.github/workflows/package_linux.yml' From e04fafb1242ce428b5068f075de58579992f8c5c Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Mon, 20 Oct 2025 04:55:30 -0700 Subject: [PATCH 15/18] GH-47838: [C++][Parquet] Set Variant specification version to 1 to align with the variant spec (#47835) ### Rationale for this change According to the [Variant specification](https://github.com/apache/parquet-format/blob/master/VariantEncoding.md), the specification_version field must be set to 1 to indicate Variant encoding version 1. Currently, this field defaults to 0, which violates the specification. Parquet readers that strictly enforce specification version validation will fail to read files containing Variant types. image ### What changes are included in this PR? The change includes defaulting the specification version to 1. ### Are these changes tested? The change is covered by unit test. ### Are there any user-facing changes? The Parquet files produced the variant logical type annotation `VARIANT(1)`. ``` Schema: message schema { optional group V (VARIANT(1)) = 1 { required binary metadata; required binary value; } } ``` * GitHub Issue: #47838 Lead-authored-by: Aihua Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/parquet/schema_test.cc | 34 +++++++++++++++++++- cpp/src/parquet/types.cc | 58 +++++++++++++++++++++++++++++----- cpp/src/parquet/types.h | 11 +++++-- 3 files changed, 92 insertions(+), 11 deletions(-) diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index c33e5ccf4a5..2950a7df70f 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1580,7 +1580,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) { LogicalType::EdgeInterpolationAlgorithm::KARNEY), "Geography(crs=srid:1234, algorithm=karney)", R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "karney"})"}, - {LogicalType::Variant(), "Variant", R"({"Type": "Variant"})"}, + {LogicalType::Variant(), "Variant(1)", R"({"Type": "Variant", "SpecVersion": 1})"}, + {LogicalType::Variant(2), "Variant(2)", R"({"Type": "Variant", "SpecVersion": 2})"}, {LogicalType::None(), "None", R"({"Type": "None"})"}, }; @@ -2353,6 +2354,37 @@ TEST(TestLogicalTypeSerialization, Roundtrips) { // Group nodes ... ConfirmGroupNodeRoundtrip("map", LogicalType::Map()); ConfirmGroupNodeRoundtrip("list", LogicalType::List()); + ConfirmGroupNodeRoundtrip("variant", LogicalType::Variant()); +} + +TEST(TestLogicalTypeSerialization, VariantSpecificationVersion) { + // Confirm that Variant logical type sets specification_version to expected value in + // thrift serialization + constexpr int8_t spec_version = 2; + auto metadata = PrimitiveNode::Make("metadata", Repetition::REQUIRED, Type::BYTE_ARRAY); + auto value = PrimitiveNode::Make("value", Repetition::REQUIRED, Type::BYTE_ARRAY); + NodePtr variant_node = + GroupNode::Make("variant", Repetition::REQUIRED, {metadata, value}, + LogicalType::Variant(spec_version)); + + // Verify variant logical type + auto logical_type = variant_node->logical_type(); + ASSERT_TRUE(logical_type->is_variant()); + const auto& variant_type = checked_cast(*logical_type); + ASSERT_EQ(variant_type.spec_version(), spec_version); + + // Verify thrift serialization + std::vector elements; + ToParquet(reinterpret_cast(variant_node.get()), &elements); + + // Verify that logicalType is set and is VARIANT + ASSERT_EQ(elements[0].name, "variant"); + ASSERT_TRUE(elements[0].__isset.logicalType); + ASSERT_TRUE(elements[0].logicalType.__isset.VARIANT); + + // Verify that specification_version is set properly + ASSERT_TRUE(elements[0].logicalType.VARIANT.__isset.specification_version); + ASSERT_EQ(elements[0].logicalType.VARIANT.specification_version, spec_version); } } // namespace schema diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 9f5003b79e5..f545f512201 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -591,7 +591,12 @@ std::shared_ptr LogicalType::FromThrift( return GeographyLogicalType::Make(std::move(crs), algorithm); } else if (type.__isset.VARIANT) { - return VariantLogicalType::Make(); + int8_t spec_version = kVariantSpecVersion; + if (type.VARIANT.__isset.specification_version) { + spec_version = type.VARIANT.specification_version; + } + + return VariantLogicalType::Make(spec_version); } else { // Sentinel type for one we do not recognize return UndefinedLogicalType::Make(); @@ -659,8 +664,8 @@ std::shared_ptr LogicalType::Geography( return GeographyLogicalType::Make(std::move(crs), algorithm); } -std::shared_ptr LogicalType::Variant() { - return VariantLogicalType::Make(); +std::shared_ptr LogicalType::Variant(int8_t spec_version) { + return VariantLogicalType::Make(spec_version); } std::shared_ptr LogicalType::None() { return NoLogicalType::Make(); } @@ -1958,16 +1963,53 @@ class LogicalType::Impl::Variant final : public LogicalType::Impl::Incompatible, public: friend class VariantLogicalType; - OVERRIDE_TOSTRING(Variant) - OVERRIDE_TOTHRIFT(VariantType, VARIANT) + std::string ToString() const override; + std::string ToJSON() const override; + format::LogicalType ToThrift() const override; + + int8_t spec_version() const { return spec_version_; } private: - Variant() + explicit Variant(const int8_t spec_version) : LogicalType::Impl(LogicalType::Type::VARIANT, SortOrder::UNKNOWN), - LogicalType::Impl::Inapplicable() {} + LogicalType::Impl::Inapplicable() { + this->spec_version_ = spec_version; + } + + int8_t spec_version_; }; -GENERATE_MAKE(Variant) +int8_t VariantLogicalType::spec_version() const { + return (dynamic_cast(*impl_)).spec_version(); +} + +std::string LogicalType::Impl::Variant::ToString() const { + std::stringstream type; + type << "Variant(" << static_cast(spec_version_) << ")"; + return type.str(); +} + +std::string LogicalType::Impl::Variant::ToJSON() const { + std::stringstream json; + json << R"({"Type": "Variant", "SpecVersion": )" << static_cast(spec_version_) + << "}"; + + return json.str(); +} + +format::LogicalType LogicalType::Impl::Variant::ToThrift() const { + format::LogicalType type; + format::VariantType variant_type; + variant_type.__set_specification_version(spec_version_); + type.__set_VARIANT(variant_type); + return type; +} + +std::shared_ptr VariantLogicalType::Make(const int8_t spec_version) { + auto logical_type = std::shared_ptr(new VariantLogicalType()); + logical_type->impl_.reset(new LogicalType::Impl::Variant(spec_version)); + return logical_type; +} class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible, public LogicalType::Impl::UniversalApplicable { diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index c2040e555fd..7e8a18fc94d 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -178,6 +178,9 @@ class PARQUET_EXPORT LogicalType { KARNEY = 5 }; + /// \brief The latest supported Variant specification version by this library + static constexpr int8_t kVariantSpecVersion = 1; + /// \brief If possible, return a logical type equivalent to the given legacy /// converted type (and decimal metadata if applicable). static std::shared_ptr FromConvertedType( @@ -224,7 +227,8 @@ class PARQUET_EXPORT LogicalType { static std::shared_ptr BSON(); static std::shared_ptr UUID(); static std::shared_ptr Float16(); - static std::shared_ptr Variant(); + static std::shared_ptr Variant( + int8_t specVersion = kVariantSpecVersion); static std::shared_ptr Geometry(std::string crs = ""); @@ -495,7 +499,10 @@ class PARQUET_EXPORT GeographyLogicalType : public LogicalType { /// \brief Allowed for group nodes only. class PARQUET_EXPORT VariantLogicalType : public LogicalType { public: - static std::shared_ptr Make(); + static std::shared_ptr Make( + int8_t specVersion = kVariantSpecVersion); + + int8_t spec_version() const; private: VariantLogicalType() = default; From c5055366fb5d6ae6a7c51ae50135ebf3203b48af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 16:48:31 +0200 Subject: [PATCH 16/18] MINOR: [Release] Update CHANGELOG.md for 22.0.0 --- CHANGELOG.md | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6101f5d3cac..3fb888dee23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,227 @@ +# Apache Arrow 22.0.0 (2025-10-20) + +## Bug Fixes + +* [GH-26727](https://github.com/apache/arrow/issues/26727) - [C++][Flight] Use ipc::RecordBatchWriter with custom IpcPayloadWriter for TransportMessageWriter (DoExchange) (#47410) +* [GH-31603](https://github.com/apache/arrow/issues/31603) - [C++] Wrap Parquet encryption keys in SecureString (#46017) +* [GH-40911](https://github.com/apache/arrow/issues/40911) - [C++][Compute] Fix the decimal division kernel dispatching (#47445) +* [GH-41011](https://github.com/apache/arrow/issues/41011) - [C++][Compute] Fix the issue that comparison function could not handle decimal arguments with different scales (#47459) +* [GH-41110](https://github.com/apache/arrow/issues/41110) - [C#] Handle empty stream in ArrowStreamReaderImplementation (#47098) +* [GH-41336](https://github.com/apache/arrow/issues/41336) - [C++][Compute] Fix case_when kernel dispatch for decimals with different precisions and scales (#47479) +* [GH-42971](https://github.com/apache/arrow/issues/42971) - [C++] Parquet stream writer: Allow writing BYTE_ARRAY with converted type NONE (#44739) +* [GH-43355](https://github.com/apache/arrow/issues/43355) - [C++] Don't require `__once_proxy` in `symbols.map` (#47354) +* [GH-46629](https://github.com/apache/arrow/issues/46629) - [Python] Add options to DatasetFactory.inspect (#46961) +* [GH-46690](https://github.com/apache/arrow/issues/46690) - [GLib][CI] Use Meson 1.8.4 or later (#47425) +* [GH-46739](https://github.com/apache/arrow/issues/46739) - [C++] Fix Float16 signed zero/NaN equality comparisons (#46973) +* [GH-46897](https://github.com/apache/arrow/issues/46897) - [Docs][C++][Python] Fix asof join documentation (#46898) +* [GH-46928](https://github.com/apache/arrow/issues/46928) - [C++] Retry on EINTR while opening file in FileOpenReadable (#47629) +* [GH-46942](https://github.com/apache/arrow/issues/46942) - [Docs] Replace the directive versionadded with note (#46997) +* [GH-46946](https://github.com/apache/arrow/issues/46946) - [Python] PyArrow fails compiling without CSV enabled +* [GH-47009](https://github.com/apache/arrow/issues/47009) - [C#] ExportedAllocationOwner should use 64-bit integer to track total allocated memory. (#47011) +* [GH-47016](https://github.com/apache/arrow/issues/47016) - [C++][FlightSQL] Fix negative timestamps to date types (#47017) +* [GH-47027](https://github.com/apache/arrow/issues/47027) - [C++][Parquet] Fix repeated column pages not being written when reaching page size limit (#47032) +* [GH-47029](https://github.com/apache/arrow/issues/47029) - [Archery][Integration] Fix generation of run-end-encoded data (#47653) +* [GH-47039](https://github.com/apache/arrow/issues/47039) - [C++] Bump RapidJSON dependency in Meson configuration (#47041) +* [GH-47051](https://github.com/apache/arrow/issues/47051) - [Python][Release] verify-rc-source-windows Python tests are failing due to MSVC compiler bug +* [GH-47052](https://github.com/apache/arrow/issues/47052) - [CI][C++] Use Alpine Linux 3.22 instead of 3.18 (#47148) +* [GH-47096](https://github.com/apache/arrow/issues/47096) - [CI][R] Drop support for R 4.0 (#47285) +* [GH-47101](https://github.com/apache/arrow/issues/47101) - [Statistics][C++] Implement Statistics specification attribute ARROW:distinct_count:approximate (#47183) +* [GH-47124](https://github.com/apache/arrow/issues/47124) - [C++][Dataset] Fix DatasetWriter deadlock on concurrent WriteRecordBatch (#47129) +* [GH-47128](https://github.com/apache/arrow/issues/47128) - [Python] Numba-CUDA interop with NVIDIA bindings (#47150) +* [GH-47130](https://github.com/apache/arrow/issues/47130) - [Packaging][deb] Fix upgrade from 20.0.0-1 (#47343) +* [GH-47131](https://github.com/apache/arrow/issues/47131) - [C#] Fix day off by 1 in Date64Array (#47132) +* [GH-47143](https://github.com/apache/arrow/issues/47143) - [Dev] Ignore `apache-arrow.tar.gz` (#47145) +* [GH-47162](https://github.com/apache/arrow/issues/47162) - [Dev][Release][GLib] Fix indent in generate-version-header.py (#47163) +* [GH-47165](https://github.com/apache/arrow/issues/47165) - [Python] Update s3 test with new non-existent bucket (#47166) +* [GH-47175](https://github.com/apache/arrow/issues/47175) - [C++] Require xsimd 13.0.0 or later (#47221) +* [GH-47179](https://github.com/apache/arrow/issues/47179) - [Python] Revert FileSystem.from_uri to be a staticmethod again (#47178) +* [GH-47203](https://github.com/apache/arrow/issues/47203) - [C++] Restore CMAKE_DEBUG_POSTFIX in building bundled Apache Thrift (#47209) +* [GH-47213](https://github.com/apache/arrow/issues/47213) - [R] Require CMake 3.26 or later (#47217) +* [GH-47229](https://github.com/apache/arrow/issues/47229) - [C++][Arm] Force mimalloc to generate armv8.0 binary (#47766) +* [GH-47234](https://github.com/apache/arrow/issues/47234) - [C++][Python] Add test for fill_null regression on Windows (#47249) +* [GH-47241](https://github.com/apache/arrow/issues/47241) - [C++][Parquet] Fix VariantExtensionType conversion (#47242) +* [GH-47243](https://github.com/apache/arrow/issues/47243) - [C++] Initialize arrow::compute in execution_plan_documentation_examples (#47227) +* [GH-47256](https://github.com/apache/arrow/issues/47256) - [Python] Do not use cffi in free-threaded 3.13 builds (#47313) +* [GH-47257](https://github.com/apache/arrow/issues/47257) - [R] Fix truncation of time variables to work with numeric subseconds time with hms bindings (#47278) +* [GH-47265](https://github.com/apache/arrow/issues/47265) - [Ruby] Fix wrong `Time` object detection (#47267) +* [GH-47268](https://github.com/apache/arrow/issues/47268) - [C++][Compute] Fix discarded bad status for call binding (#47284) +* [GH-47277](https://github.com/apache/arrow/issues/47277) - [C++] r-binary-packages nightly failures due to incompatibility with old compiler (#47299) +* [GH-47283](https://github.com/apache/arrow/issues/47283) - [C++] Fix flight visibility issue in Meson configuration (#47298) +* [GH-47287](https://github.com/apache/arrow/issues/47287) - [C++][Compute] Add constraint for kernel signature matching and use it for binary decimal arithmetic kernels (#47297) +* [GH-47301](https://github.com/apache/arrow/issues/47301) - [Python] Fix FileFragment.open() seg fault behavior for file-like objects (#47302) +* [GH-47303](https://github.com/apache/arrow/issues/47303) - [C++] Don't install arrow-compute.pc twice (#47304) +* [GH-47323](https://github.com/apache/arrow/issues/47323) - [R][CI] test-r-rhub-debian-gcc-release-custom-ccache nightly job fails due to update in Debian (#47611) +* [GH-47332](https://github.com/apache/arrow/issues/47332) - [C++][Compute] Fix the issue that the arguments of function call become invalid before wrapping results (#47333) +* [GH-47356](https://github.com/apache/arrow/issues/47356) - [R] NEWS file states version 20.0.0.1 but release package number on CRAN is 20.0.0.2 (#47421) +* [GH-47367](https://github.com/apache/arrow/issues/47367) - [Packaging][Python] Patch vcpkg to show logs and install newer Windows SDK for vs_buildtools (#47484) +* [GH-47373](https://github.com/apache/arrow/issues/47373) - [C++] Raise for invalid decimal precision input from the C Data Interface (#47414) +* [GH-47380](https://github.com/apache/arrow/issues/47380) - [Python] Apply maps_as_pydicts to Nested MapScalar Values (#47454) +* [GH-47399](https://github.com/apache/arrow/issues/47399) - [C++] Update bundled Apache ORC to 2.2.0 with Protobuf patch (#47408) +* [GH-47431](https://github.com/apache/arrow/issues/47431) - [C++] Improve Meson configuration for WrapDB distribution (#47541) +* [GH-47434](https://github.com/apache/arrow/issues/47434) - [C++] Fix issue preventing running of tests on Windows (#47455) +* [GH-47440](https://github.com/apache/arrow/issues/47440) - [C++] Accept gflags::gflags as system gflags CMake target (#47468) +* [GH-47446](https://github.com/apache/arrow/issues/47446) - [C++] Update Meson configuration with compute swizzle change (#47448) +* [GH-47451](https://github.com/apache/arrow/issues/47451) - [Python][CI] Install tzdata-legacy in newer python-wheel-manylinux-test images (#47452) +* [GH-47453](https://github.com/apache/arrow/issues/47453) - [Packaging][CI] Token expired to upload nightly wheels +* [GH-47485](https://github.com/apache/arrow/issues/47485) - [C++][CI] Work around Valgrind failure on Azure tests (#47496) +* [GH-47486](https://github.com/apache/arrow/issues/47486) - [Dev][R] Define default R_UPDATE_CLANG (#47487) +* [GH-47491](https://github.com/apache/arrow/issues/47491) - [C++] Don't set include directories to found targets (#47492) +* [GH-47506](https://github.com/apache/arrow/issues/47506) - [CI][Packaging] Fix Amazon Linux 2023 packages verification (#47507) +* [GH-47534](https://github.com/apache/arrow/issues/47534) - [C++] Detect conda-installed packages in Meson CI (#47535) +* [GH-47537](https://github.com/apache/arrow/issues/47537) - [C++] Use pkgconfig name for benchmark in Meson (#47538) +* [GH-47539](https://github.com/apache/arrow/issues/47539) - [C++] Detect Snappy and bzip2 in Meson CI (#47540) +* [GH-47554](https://github.com/apache/arrow/issues/47554) - [C++] Fix Meson Parquet symbol visibility issues (#47556) +* [GH-47560](https://github.com/apache/arrow/issues/47560) - [C++] Fix host handling for default HDFS URI (#47458) +* [GH-47570](https://github.com/apache/arrow/issues/47570) - [CI] Don't notify nightly "CI: Extra" result from forks (#47571) +* [GH-47590](https://github.com/apache/arrow/issues/47590) - [C++] Use W functions explicitly for Windows UNICODE compatibility (#47593) +* [GH-47591](https://github.com/apache/arrow/issues/47591) - [C++] Fix passing zlib compression level (#47594) +* [GH-47596](https://github.com/apache/arrow/issues/47596) - [C++][Parquet] Fix printing of large Decimal statistics (#47619) +* [GH-47602](https://github.com/apache/arrow/issues/47602) - [Python] Make Schema hashable even when it has metadata (#47601) +* [GH-47614](https://github.com/apache/arrow/issues/47614) - [CI] Upgrade vcpkg on our CI (#47627) +* [GH-47620](https://github.com/apache/arrow/issues/47620) - [CI][C++] Use Ubuntu 24.04 for ASAN UBSAN job (#47623) +* [GH-47625](https://github.com/apache/arrow/issues/47625) - [Python] Free-threaded musllinux and manylinux wheels started failing with cffi 2.0.0 (#47626) +* [GH-47655](https://github.com/apache/arrow/issues/47655) - [C++][Parquet][CI] Fix failure to generate seed corpus (#47656) +* [GH-47659](https://github.com/apache/arrow/issues/47659) - [C++] Fix Arrow Flight Testing's unresolved external symbol error (#47660) +* [GH-47673](https://github.com/apache/arrow/issues/47673) - [CI][Integration] Fix Go build failure (#47674) +* [GH-47682](https://github.com/apache/arrow/issues/47682) - [R] `install_pyarrow(nightly = TRUE)` installs old pyarrow (#47699) +* [GH-47695](https://github.com/apache/arrow/issues/47695) - [CI][Release] Link arrow-io hdfs_test to c++fs on compilers where std:::filesystem is not default present (#47701) +* [GH-47740](https://github.com/apache/arrow/issues/47740) - [C++][Parquet] Fix undefined behavior when reading invalid Parquet data (#47741) +* [GH-47742](https://github.com/apache/arrow/issues/47742) - [C++][CI] Silence Valgrind leak on protobuf initialization (#47743) +* [GH-47748](https://github.com/apache/arrow/issues/47748) - [C++][Dataset] Fix link error on macOS (#47749) +* [GH-47795](https://github.com/apache/arrow/issues/47795) - [Archery] Add support for custom Docker registry (#47796) +* [GH-47803](https://github.com/apache/arrow/issues/47803) - [C++][Parquet] Fix read out of bounds on invalid RLE data (#47804) +* [GH-47809](https://github.com/apache/arrow/issues/47809) - [CI][Release] Fix Windows verification job trying to install patch from conda (#47810) +* [GH-47819](https://github.com/apache/arrow/issues/47819) - [CI][Packaging][Release] Avoid triggering Linux packages on release branch push (#47826) +* [GH-47838](https://github.com/apache/arrow/issues/47838) - [C++][Parquet] Set Variant specification version to 1 to align with the variant spec (#47835) + + +## New Features and Improvements + +* [GH-20125](https://github.com/apache/arrow/issues/20125) - [Docs][Python] Restructure developers/python.rst (#47334) +* [GH-30036](https://github.com/apache/arrow/issues/30036) - [C++] Timezone-aware kernels should handle offset strings (e.g. "+04:30") (#12865) +* [GH-38211](https://github.com/apache/arrow/issues/38211) - [MATLAB] Add support for creating an empty `arrow.tabular.RecordBatch` by calling `arrow.recordBatch` with no input arguments (#47060) +* [GH-38213](https://github.com/apache/arrow/issues/38213) - [MATLAB] Create a superclass for tabular type MATLAB tests (i.e. for `Table` and `RecordBatch`) (#47107) +* [GH-38422](https://github.com/apache/arrow/issues/38422) - [MATLAB] Add `NumNulls` property to `arrow.array.Array` class (#47116) +* [GH-38532](https://github.com/apache/arrow/issues/38532) - [MATLAB] Add a `validate` method to all `arrow.array.Array` classes (#47059) +* [GH-38572](https://github.com/apache/arrow/issues/38572) - [Docs][MATLAB] Update `arrow/matlab/README.md` with the latest change. (#47109) +* [GH-39875](https://github.com/apache/arrow/issues/39875) - [C++] Why arrow decimal divide precision and scale is not correct? +* [GH-41108](https://github.com/apache/arrow/issues/41108) - [Docs] Remove Sphinx pin (#47326) +* [GH-41239](https://github.com/apache/arrow/issues/41239) - [C++] Support to write csv header without quotes (#47524) +* [GH-41476](https://github.com/apache/arrow/issues/41476) - [Python][C++] Impossible to specify `is_adjusted_to_utc` for `Time` type when writing to Parquet (#47316) +* [GH-42137](https://github.com/apache/arrow/issues/42137) - [CI][Python] Add Python Windows GitHub Action and remove AppVeyor (#47567) +* [GH-43662](https://github.com/apache/arrow/issues/43662) - [R] Add binding to stringr::str_replace_na() (#47521) +* [GH-43694](https://github.com/apache/arrow/issues/43694) - [C++] Add `Executor *` Option to `arrow::dataset::ScanOptions` (#43698) +* [GH-43904](https://github.com/apache/arrow/issues/43904) - [CI][Python] Stop uploading nightly wheels to gemfury (#47470) +* [GH-44345](https://github.com/apache/arrow/issues/44345) - [C++][Parquet] Add Decimal32/64 support to Parquet (#47427) +* [GH-44800](https://github.com/apache/arrow/issues/44800) - [C#] Implement Flight SQL Client (#44783) +* [GH-45055](https://github.com/apache/arrow/issues/45055) - [C++][Flight] Update Flight Server RecordBatchStreamImpl to reuse ipc::RecordBatchWriter with custom IpcPayloadWriter instead of manually generating FlightPayload (#47115) +* [GH-45056](https://github.com/apache/arrow/issues/45056) - [C++][Flight] Fully support dictionary replacement in Flight +* [GH-45382](https://github.com/apache/arrow/issues/45382) - [Python] Add support for pandas DataFrame.attrs (#47147) +* [GH-45639](https://github.com/apache/arrow/issues/45639) - [C++][Statistics] Add support for ARROW:average_byte_width:{exac,approximate} (#46385) +* [GH-45860](https://github.com/apache/arrow/issues/45860) - [C++] Respect CPU affinity in cpu_count and ThreadPool default capacity (#47152) +* [GH-45921](https://github.com/apache/arrow/issues/45921) - [Release][R] Use GitHub Release not apache.jfrog.io (#45964) +* [GH-46137](https://github.com/apache/arrow/issues/46137) - [C++] Replace grpc-cpp conda package with libgrpc (#47606) +* [GH-46272](https://github.com/apache/arrow/issues/46272) - [C++] Build Arrow libraries with `-Wmissing-definitions` on gcc (#47042) +* [GH-46374](https://github.com/apache/arrow/issues/46374) - [Python][Doc] Improve docs to specify that source argument on parquet.read_table can also be a list of strings (#47142) +* [GH-46410](https://github.com/apache/arrow/issues/46410) - [C++] Add parquet options to Meson configuration (#46647) +* [GH-46669](https://github.com/apache/arrow/issues/46669) - [CI][Archery] Automate Zulip and email notifications for Extra CI (#47546) +* [GH-46728](https://github.com/apache/arrow/issues/46728) - [Python] Skip test_gdb.py tests if PyArrow wasn't built debug (#46755) +* [GH-46835](https://github.com/apache/arrow/issues/46835) - [C++] Add more configuration options to arrow::EqualOptions (#47204) +* [GH-46860](https://github.com/apache/arrow/issues/46860) - [C++] Making HalfFloatBuilder accept Float16 as well as uint16_t (#46981) +* [GH-46905](https://github.com/apache/arrow/issues/46905) - [C++][Parquet] Expose Statistics.is_{min/max}_value_exact and default set to true if min/max are set (#46992) +* [GH-46908](https://github.com/apache/arrow/issues/46908) - [Docs][Format] Add variant extension type docs (#47456) +* [GH-46937](https://github.com/apache/arrow/issues/46937) - [C++] Enable arrow::EqualOptions for arrow::Table (#47164) +* [GH-46938](https://github.com/apache/arrow/issues/46938) - [C++] Enhance arrow::ChunkedArray::Equals to support floating-point comparison when values share the same memory (#47044) +* [GH-46939](https://github.com/apache/arrow/issues/46939) - [C++] Add support for shared memory comparison in arrow::RecordBatch (#47149) +* [GH-46962](https://github.com/apache/arrow/issues/46962) - [C++][Parquet] Generic xsimd function and dynamic dispatch for Byte Stream Split (#46963) +* [GH-46971](https://github.com/apache/arrow/issues/46971) - [C++][Parquet] Use temporary buffers when decrypting Parquet data pages (#46972) +* [GH-46982](https://github.com/apache/arrow/issues/46982) - [C++] Remove Boost dependency from hdfs_test (#47200) +* [GH-47005](https://github.com/apache/arrow/issues/47005) - [C++] Disable exporting CMake packages (#47006) +* [GH-47012](https://github.com/apache/arrow/issues/47012) - [C++][Parquet] Reserve values correctly when reading BYTE_ARRAY and FLBA (#47013) +* [GH-47040](https://github.com/apache/arrow/issues/47040) - [C++] Refine reset of Span to be reusable (#47004) +* [GH-47045](https://github.com/apache/arrow/issues/47045) - [CI][C++] Use Fedora 42 instead of 39 (#47046) +* [GH-47047](https://github.com/apache/arrow/issues/47047) - [CI][C++] Use Google Cloud Storage Testbench v0.55.0 (#47048) +* [GH-47058](https://github.com/apache/arrow/issues/47058) - [Release] Update Release Management Guide to reflect status in preparation for Arrow 22 (#47474) +* [GH-47075](https://github.com/apache/arrow/issues/47075) - [Release][Dev] Use GH_TOKEN as GitHub token environment variable (#47181) +* [GH-47084](https://github.com/apache/arrow/issues/47084) - [Release] Stop using https://dist.apache.org/repos/dist/dev/arrow/KEYS (#47182) +* [GH-47088](https://github.com/apache/arrow/issues/47088) - [CI][Dev] Fix shellcheck errors in the ci/scripts/integration_arrow.sh (#47089) +* [GH-47102](https://github.com/apache/arrow/issues/47102) - [Statistics][C++] Implement Statistics specification attribute ARROW:max_byte_width:{exact,approximate} Component: C++ (#47463) +* [GH-47106](https://github.com/apache/arrow/issues/47106) - [R] Update R package to use R 4.1+ native forward pipe syntax (#47622) +* [GH-47112](https://github.com/apache/arrow/issues/47112) - [Parquet][C++] Rle BitPacked parser (#47294) +* [GH-47120](https://github.com/apache/arrow/issues/47120) - [R] Update NEWS for 21.0.0 (#47121) +* [GH-47123](https://github.com/apache/arrow/issues/47123) - [Python] Add Enums to PyArrow Types (#47139) +* [GH-47125](https://github.com/apache/arrow/issues/47125) - [CI][Dev] Fix shellcheck errors in the ci/scripts/integration_hdfs.sh (#47126) +* [GH-47137](https://github.com/apache/arrow/issues/47137) - [Python][dependency-groups] ` (#47176) +* [GH-47153](https://github.com/apache/arrow/issues/47153) - [Docs][C++] Update cmake target table in build_system.rst with newly added targets (#47154) +* [GH-47157](https://github.com/apache/arrow/issues/47157) - [Docs] Improve presentation of Other available packages section in build_system.rst (#47411) +* [GH-47172](https://github.com/apache/arrow/issues/47172) - [Python] Add a utility function to create Arrow table instead of pandas df (#47199) +* [GH-47184](https://github.com/apache/arrow/issues/47184) - [Parquet][C++] Avoid multiplication overflow in FixedSizeBinaryBuilder::Reserve (#47185) +* [GH-47191](https://github.com/apache/arrow/issues/47191) - [R] Turn GCS back on by default on MacOS source builds (#47192) +* [GH-47193](https://github.com/apache/arrow/issues/47193) - [R] Update R Makefile to exclude flight odbc from cpp sync (#47194) +* [GH-47205](https://github.com/apache/arrow/issues/47205) - [C++] Suppress GNU variadic macro warnings (#47286) +* [GH-47208](https://github.com/apache/arrow/issues/47208) - [C++][CI] Add a CI job for C++23 (#47261) +* [GH-47208](https://github.com/apache/arrow/issues/47208) - [C++] Update bundled s2n-tls to 1.5.23 (#47220) +* [GH-47211](https://github.com/apache/arrow/issues/47211) - [CI][R] Disable non-system memory allocators when on linux-devel (#47212) +* [GH-47218](https://github.com/apache/arrow/issues/47218) - [C++] Update bundled s2n-tls +* [GH-47222](https://github.com/apache/arrow/issues/47222) - [CI][C++] Add a CI job that uses the same build options for JNI on macOS (#47305) +* [GH-47223](https://github.com/apache/arrow/issues/47223) - [Release] Use "upstream" as apache/arrow{,-site} remote name (#47224) +* [GH-47225](https://github.com/apache/arrow/issues/47225) - [C++] Remove Skyhook (#47262) +* [GH-47232](https://github.com/apache/arrow/issues/47232) - [Ruby] Suppress warnings in test with Ruby 3.5 (#47233) +* [GH-47244](https://github.com/apache/arrow/issues/47244) - [CI][Dev] Fix shellcheck errors in the ci/scripts/msys2_setup.sh (#47245) +* [GH-47258](https://github.com/apache/arrow/issues/47258) - [Release] Set `date:` for apache/arrow-site's `_release/${VERSION}.md` (#47260) +* [GH-47263](https://github.com/apache/arrow/issues/47263) - [MATLAB] Add `NumNulls` property to `arrow.array.ChunkedArray` class (#47264) +* [GH-47289](https://github.com/apache/arrow/issues/47289) - [CI][Dev] Fix shellcheck errors in the ci/scripts/python_build_emscripten.sh (#47290) +* [GH-47291](https://github.com/apache/arrow/issues/47291) - [C++] Update bundled aws-c-common to 0.12.4 (#47292) +* [GH-47306](https://github.com/apache/arrow/issues/47306) - [CI][Dev] Fix shellcheck errors in the ci/scripts/python_build.sh (#47307) +* [GH-47312](https://github.com/apache/arrow/issues/47312) - [Packaging] Add support for Debian forky (#47342) +* [GH-47317](https://github.com/apache/arrow/issues/47317) - [C++][C++23][Gandiva] Use pointer for Cache test (#47318) +* [GH-47319](https://github.com/apache/arrow/issues/47319) - [CI] Fix actions/checkout hash version comments (#47320) +* [GH-47321](https://github.com/apache/arrow/issues/47321) - [CI][Dev] Fix shellcheck errors in the ci/scripts/python_sdist_test.sh (#47322) +* [GH-47338](https://github.com/apache/arrow/issues/47338) - [C++][Python] Remove deprecated string-based Parquet encryption methods (#47339) +* [GH-47349](https://github.com/apache/arrow/issues/47349) - [C++] Include request ID in AWS S3 Error (#47351) +* [GH-47358](https://github.com/apache/arrow/issues/47358) - [Python] IPC and Flight options representation (#47461) +* [GH-47370](https://github.com/apache/arrow/issues/47370) - [Python] Require Cython 3.1 (#47396) +* [GH-47375](https://github.com/apache/arrow/issues/47375) - [C++][Compute] Move scatter function into compute core (#47378) +* [GH-47384](https://github.com/apache/arrow/issues/47384) - [C++][Acero] Isolate BackpressureHandler from ExecNode (#47386) +* [GH-47395](https://github.com/apache/arrow/issues/47395) - [R] Update fedora-clang to install latest clang version to match CRAN setup (#47206) +* [GH-47401](https://github.com/apache/arrow/issues/47401) - [C++] Remove needless Snappy patch (#47407) +* [GH-47404](https://github.com/apache/arrow/issues/47404) - [Ruby] Remove needless `require "extpp/setup"` (#47405) +* [GH-47412](https://github.com/apache/arrow/issues/47412) - [C++] Use inlineshidden visibility in Meson configuration (#47413) +* [GH-47422](https://github.com/apache/arrow/issues/47422) - [Python][C++][Flight] Expose ipc::ReadStats in Flight MetadataRecordBatchReader (#47432) +* [GH-47438](https://github.com/apache/arrow/issues/47438) - [Python][Packaging] Set up wheel building for Python 3.14 (#47616) +* [GH-47443](https://github.com/apache/arrow/issues/47443) - [Python][Packaging] Drop Python 3.9 support (#47478) +* [GH-47449](https://github.com/apache/arrow/issues/47449) - [C++][Parquet] Do not drop all Statistics if SortOrder is UNKNOWN (#47466) +* [GH-47469](https://github.com/apache/arrow/issues/47469) - [C++][Gandiva] Add support for LLVM 21.1.0 (#47473) +* [GH-47483](https://github.com/apache/arrow/issues/47483) - [C++] Bump vendored xxhash to 0.8.3 (#47476) +* [GH-47500](https://github.com/apache/arrow/issues/47500) - [C++] Add QualifierAlignment to clang-format options (#47501) +* [GH-47505](https://github.com/apache/arrow/issues/47505) - [CI][C#][Integration] Use apache/arrow-dotnet (#47508) +* [GH-47509](https://github.com/apache/arrow/issues/47509) - [CI][Packaging][Linux] Enable Docker build cache (#47510) +* [GH-47512](https://github.com/apache/arrow/issues/47512) - [C++] Bump meson-fmt in pre-commit to 1.9.0 (#47513) +* [GH-47514](https://github.com/apache/arrow/issues/47514) - [C++][Parquet] Add unpack tests and benchmarks (#47515) +* [GH-47516](https://github.com/apache/arrow/issues/47516) - [C++][FlightRPC] Initial ODBC driver framework (#47517) +* [GH-47518](https://github.com/apache/arrow/issues/47518) - [C++][FlightRPC] Replace `spdlogs` with Arrow's Internal Logging (#47645) +* [GH-47523](https://github.com/apache/arrow/issues/47523) - [C#] Remove csharp/ (#47547) +* [GH-47543](https://github.com/apache/arrow/issues/47543) - [C++] Search for system install of Azure libraries with Meson (#47544) +* [GH-47552](https://github.com/apache/arrow/issues/47552) - [C++] Fix creating wrong object by `FixedShapeTensorType::MakeArray()` (#47533) +* [GH-47575](https://github.com/apache/arrow/issues/47575) - [Python] add quoting_header option to pyarrow WriterOptions (#47610) +* [GH-47582](https://github.com/apache/arrow/issues/47582) - [CI][Packaging] Move linux-packaging tasks to apache/arrow repository (#47600) +* [GH-47584](https://github.com/apache/arrow/issues/47584) - [C++][CI] Remove "large memory" mark from TestListArray::TestOverflowCheck (#47585) +* [GH-47588](https://github.com/apache/arrow/issues/47588) - [C++] Bump mimalloc version to 3.1.5 (#47589) +* [GH-47597](https://github.com/apache/arrow/issues/47597) - [C++][Parquet] Fuzz more data types (#47621) +* [GH-47632](https://github.com/apache/arrow/issues/47632) - [CI][C++] Add a CI job for JNI on Linux (#47746) +* [GH-47633](https://github.com/apache/arrow/issues/47633) - [Dev][Integration] Write all files with `--write_generated_json` (#47634) +* [GH-47639](https://github.com/apache/arrow/issues/47639) - [Benchmarking] Clean up conbench config (#47638) +* [GH-47646](https://github.com/apache/arrow/issues/47646) - [C++][FlightRPC] Follow Naming Convention (#47658) +* [GH-47648](https://github.com/apache/arrow/issues/47648) - [Archery][Integration] More granularity in JSON test cases (#47649) +* [GH-47650](https://github.com/apache/arrow/issues/47650) - [Archery][Integration] Add option to generate gold files (#47651) +* [GH-47679](https://github.com/apache/arrow/issues/47679) - [C++] Register arrow compute calls in ODBC (#47680) +* [GH-47704](https://github.com/apache/arrow/issues/47704) - [R] Update paths in nightly libarrow upload job (#47727) +* [GH-47705](https://github.com/apache/arrow/issues/47705) - [R][CI] Migrate rhub debian-gcc-release to equivalent supported image (#47730) +* [GH-47738](https://github.com/apache/arrow/issues/47738) - [R] Update NEWS.md for 22.0.0 (#47739) + + + # Apache Arrow 6.0.1 (2021-11-18) ## Bug Fixes From 81563ad2c53f886a4901191e0f2d05238398e177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 16:48:35 +0200 Subject: [PATCH 17/18] MINOR: [Release] Update .deb/.rpm changelogs for 22.0.0 --- .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++ .../apache-arrow-release/yum/apache-arrow-release.spec.in | 3 +++ dev/tasks/linux-packages/apache-arrow/debian/changelog | 6 ++++++ dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 3 +++ 4 files changed, 18 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 86cacd308e5..8d0ec6a6a83 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (22.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Mon, 20 Oct 2025 14:48:34 -0000 + apache-arrow-apt-source (21.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index 8e75611889c..2053b950622 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Mon Oct 20 2025 Raúl Cumplido - 22.0.0-1 +- New upstream release. + * Fri Jul 11 2025 Bryce Mecum - 21.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 01b71214f07..8394a2c05d1 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (22.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Mon, 20 Oct 2025 14:48:34 -0000 + apache-arrow (21.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 6afa6d25421..32bd076e821 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -931,6 +931,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Mon Oct 20 2025 Raúl Cumplido - 22.0.0-1 +- New upstream release. + * Fri Jul 11 2025 Bryce Mecum - 21.0.0-1 - New upstream release. From 5aeb5f217f1ba39c428c9b481f393bd37097f69e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 16:48:39 +0200 Subject: [PATCH 18/18] MINOR: [Release] Update versions for 22.0.0 --- c_glib/meson.build | 2 +- c_glib/vcpkg.json | 2 +- ci/scripts/PKGBUILD | 2 +- cpp/CMakeLists.txt | 2 +- cpp/meson.build | 2 +- cpp/vcpkg.json | 2 +- dev/tasks/homebrew-formulae/apache-arrow-glib.rb | 2 +- dev/tasks/homebrew-formulae/apache-arrow.rb | 2 +- docs/source/_static/versions.json | 9 +++++++-- matlab/CMakeLists.txt | 2 +- python/CMakeLists.txt | 2 +- python/pyproject.toml | 2 +- r/DESCRIPTION | 2 +- r/NEWS.md | 2 +- r/pkgdown/assets/versions.html | 5 +++-- r/pkgdown/assets/versions.json | 8 ++++++-- ruby/red-arrow-cuda/lib/arrow-cuda/version.rb | 2 +- ruby/red-arrow-dataset/lib/arrow-dataset/version.rb | 2 +- .../red-arrow-flight-sql/lib/arrow-flight-sql/version.rb | 2 +- ruby/red-arrow-flight/lib/arrow-flight/version.rb | 2 +- ruby/red-arrow/lib/arrow/version.rb | 2 +- ruby/red-gandiva/lib/gandiva/version.rb | 2 +- ruby/red-parquet/lib/parquet/version.rb | 2 +- 23 files changed, 36 insertions(+), 26 deletions(-) diff --git a/c_glib/meson.build b/c_glib/meson.build index b56157e66c3..f10a8042545 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -34,7 +34,7 @@ project( # * 22.04: 0.61.2 # * 24.04: 1.3.2 meson_version: '>=0.61.2', - version: '22.0.0-SNAPSHOT', + version: '22.0.0', ) version = meson.project_version() diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json index 004f2e12ffe..150f54a1d41 100644 --- a/c_glib/vcpkg.json +++ b/c_glib/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow-glib", - "version-string": "22.0.0-SNAPSHOT", + "version-string": "22.0.0", "$comment:dependencies": "We can enable gobject-introspection again once it's updated", "dependencies": [ "glib", diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index bf902c1cb64..9b53ba99704 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=21.0.0.9000 +pkgver=22.0.0 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e805694f522..14cf1b91411 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -96,7 +96,7 @@ if(POLICY CMP0170) cmake_policy(SET CMP0170 NEW) endif() -set(ARROW_VERSION "22.0.0-SNAPSHOT") +set(ARROW_VERSION "22.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/meson.build b/cpp/meson.build index 81143ed1e28..194da0ccef2 100644 --- a/cpp/meson.build +++ b/cpp/meson.build @@ -19,7 +19,7 @@ project( 'arrow', 'cpp', 'c', - version: '22.0.0-SNAPSHOT', + version: '22.0.0', license: 'Apache-2.0', meson_version: '>=1.3.0', default_options: ['c_std=gnu11,c11', 'warning_level=2', 'cpp_std=c++17'], diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 68f20663b59..c1d28a2a559 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "22.0.0-SNAPSHOT", + "version-string": "22.0.0", "dependencies": [ "abseil", { diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index dd6f25283a5..1c87c353de8 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0-SNAPSHOT/apache-arrow-22.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index 3cbd808357c..06949bd10f3 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0-SNAPSHOT/apache-arrow-22.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index 26d1834bb0f..6feaa86e1a7 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,15 +1,20 @@ [ { - "name": "22.0 (dev)", + "name": "23.0 (dev)", "version": "dev/", "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "21.0 (stable)", + "name": "22.0 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "21.0", + "version": "21.0/", + "url": "https://arrow.apache.org/docs/21.0/" + }, { "name": "20.0", "version": "20.0/", diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index ca5a323b15f..9479015b1e4 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -100,7 +100,7 @@ endfunction() set(CMAKE_CXX_STANDARD 17) -set(MLARROW_VERSION "22.0.0-SNAPSHOT") +set(MLARROW_VERSION "22.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fc26ea71bde..0587720f409 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -28,7 +28,7 @@ project(pyarrow) # which in turn meant that Py_GIL_DISABLED was not set. set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON) -set(PYARROW_VERSION "22.0.0-SNAPSHOT") +set(PYARROW_VERSION "22.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") # Generate SO version and full SO version diff --git a/python/pyproject.toml b/python/pyproject.toml index 240db14dc72..a1cab40173a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -92,4 +92,4 @@ root = '..' version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' -fallback_version = '22.0.0a0' +fallback_version = '22.0.0' diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 7888f72ee9f..d12b8a4ba03 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 21.0.0.9000 +Version: 22.0.0 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index b5d894215ca..fb85b96655d 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,7 @@ under the License. --> -# arrow 21.0.0.9000 +# arrow 22.0.0 ## New features diff --git a/r/pkgdown/assets/versions.html b/r/pkgdown/assets/versions.html index caac5839059..c90d4ae2164 100644 --- a/r/pkgdown/assets/versions.html +++ b/r/pkgdown/assets/versions.html @@ -1,7 +1,8 @@ -

21.0.0.9000 (dev)

-

21.0.0 (release)

+

22.0.0.9000 (dev)

+

22.0.0 (release)

+

21.0.0

20.0.0

19.0.1

18.1.0

diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 927b7407c9b..0d783995062 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "21.0.0.9000 (dev)", + "name": "22.0.0.9000 (dev)", "version": "dev/" }, { - "name": "21.0.0 (release)", + "name": "22.0.0 (release)", "version": "" }, + { + "name": "21.0.0", + "version": "21.0/" + }, { "name": "20.0.0", "version": "20.0/" diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index de9372bd49e..0004568f245 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index b31eb98b5f5..d27cfb7ffa9 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index 751e7c16245..e319cdd4a2f 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index 89e783944d2..95a837d27f3 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index f19fcfdd09e..ee7606bd0bd 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index e75303aa970..a1f57e7d47a 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index 0f5273bbed2..696db57c683 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-")