diff --git a/.env b/.env index 1735b7163b4..5951b3c95f6 100644 --- a/.env +++ b/.env @@ -37,6 +37,10 @@ DOCKER_BUILDKIT=1 ARCH=amd64 ARCH_ALIAS=x86_64 ARCH_SHORT=amd64 +# For aarch64 +# ARCH=arm64v8 +# ARCH_ALIAS=aarch64 +# ARCH_SHORT=arm64 # Default repository to pull and push images from REPO=apache/arrow-dev @@ -98,8 +102,8 @@ VCPKG="4334d8b4c8916018600212ab4dd4bbdc343065d1" # 2025.09.17 Release # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. # This is a workaround for our CI problem that "archery docker build" doesn't # use pulled built images in dev/tasks/python-wheels/github.windows.yml. -PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-09-04 -PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-09-04 +PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-10-13 +PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-10-13 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan". # See https://github.com/conan-io/conan-docker-tools#readme and diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index b92c6fe8437..0122f01e757 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -321,7 +321,6 @@ jobs: BOOST_SOURCE: BUNDLED CMAKE_CXX_STANDARD: "17" CMAKE_GENERATOR: Ninja - CMAKE_INSTALL_LIBDIR: bin CMAKE_INSTALL_PREFIX: /usr CMAKE_UNITY_BUILD: ON steps: @@ -369,11 +368,12 @@ jobs: call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" - name: Test - shell: bash + shell: cmd run: | + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 # For ORC - export TZDIR=/c/msys64/usr/share/zoneinfo - ci/scripts/cpp_test.sh $(pwd) $(pwd)/build + set TZDIR=C:\msys64\usr\share\zoneinfo + bash -c "ci/scripts/cpp_test.sh $(pwd) $(pwd)/build" windows-mingw: name: AMD64 Windows MinGW ${{ matrix.msystem_upper }} C++ diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 97dbadd25d5..5b054ddfb58 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -164,6 +164,67 @@ jobs: continue-on-error: true run: archery docker push ${{ matrix.image }} + jni-linux: + needs: check-labels + name: JNI ${{ matrix.platform.runs-on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs-on }} + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: C++') + timeout-minutes: 240 + permissions: + # This is for using GitHub Packages for vcpkg cache + packages: write + strategy: + fail-fast: false + matrix: + platform: + - arch: "amd64" + runs-on: ubuntu-latest + - arch: "arm64v8" + runs-on: ubuntu-24.04-arm + env: + ARCH: ${{ matrix.platform.arch }} + REPO: ghcr.io/${{ github.repository }}-dev + steps: + - name: Checkout Arrow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + submodules: recursive + - name: Cache Docker Volumes + uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: .docker + key: jni-${{ matrix.platform.runs-on }}-${{ hashFiles('cpp/**') }} + restore-keys: jni-${{ matrix.platform.runs-on }}- + - name: Setup Python + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + with: + python-version: 3 + - name: Setup Archery + run: python3 -m pip install -e dev/archery[docker] + - name: Execute Docker Build + env: + ARCHERY_DOCKER_USER: ${{ github.actor }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VCPKG_BINARY_SOURCES: "clear;nuget,GitHub,readwrite" + run: | + source ci/scripts/util_enable_core_dumps.sh + archery docker run cpp-jni + - name: Docker Push + if: >- + success() && + github.event_name == 'push' && + github.ref_name == 'main' + env: + ARCHERY_DOCKER_USER: ${{ github.actor }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true + run: archery docker push cpp-jni + jni-macos: needs: check-labels name: JNI macOS @@ -177,7 +238,7 @@ jobs: MACOSX_DEPLOYMENT_TARGET: "14.0" steps: - name: Checkout Arrow - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 with: fetch-depth: 0 submodules: recursive diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index f39ca02bf52..75969615861 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -22,6 +22,7 @@ on: branches: - '**' - '!dependabot/**' + - '!release-*' paths: - '.github/workflows/check_labels.yml' - '.github/workflows/package_linux.yml' diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml index 7c52672c1cd..4e12fce9545 100644 --- a/.github/workflows/r_nightly.yml +++ b/.github/workflows/r_nightly.yml @@ -1,193 +1,217 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Upload R Nightly builds -# This workflow downloads the (nightly) binaries created in crossbow and uploads them -# to nightlies.apache.org. Due to authorization requirements, this upload can't be done -# from the crossbow repository. - -on: - workflow_dispatch: - inputs: - prefix: - description: Job prefix to use. - required: false - default: '' - keep: - description: Number of versions to keep. - required: false - default: 14 - - schedule: - #Crossbow packaging runs at 0 8 * * * - - cron: '0 14 * * *' - -permissions: - contents: read - -jobs: - upload: - if: github.repository == 'apache/arrow' - runs-on: ubuntu-latest - steps: - - name: Checkout Arrow - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - fetch-depth: 1 - path: arrow - repository: apache/arrow - ref: main - submodules: recursive - - name: Checkout Crossbow - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - with: - fetch-depth: 0 - path: crossbow - repository: ursacomputing/crossbow - ref: main - - name: Set up Python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Upload R Nightly builds +# This workflow downloads the (nightly) binaries created in crossbow and uploads them +# to nightlies.apache.org. Due to authorization requirements, this upload can't be done +# from the crossbow repository. + +on: + workflow_dispatch: + inputs: + prefix: + description: Job prefix to use. + required: false + default: '' + keep: + description: Number of versions to keep. + required: false + default: 14 + + schedule: + #Crossbow packaging runs at 0 8 * * * + - cron: '0 14 * * *' + +permissions: + contents: read + +jobs: + upload: + if: github.repository == 'apache/arrow' + runs-on: ubuntu-latest + steps: + - name: Checkout Arrow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 1 + path: arrow + repository: apache/arrow + ref: main + submodules: recursive + - name: Checkout Crossbow + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + path: crossbow + repository: ursacomputing/crossbow + ref: main + - name: Set up Python uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 - with: - cache: 'pip' - python-version: 3.12 - - name: Install Archery - shell: bash - run: pip install -e arrow/dev/archery[all] - - run: mkdir -p binaries - - name: Download Artifacts - env: - PREFIX: ${{ github.event.inputs.prefix || ''}} - run: | - if [ -z $PREFIX ]; then - PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0 - fi - echo $PREFIX - - archery crossbow download-artifacts -f r-binary-packages -t binaries $PREFIX - - if [ -n "$(ls -A binaries/*/*/)" ]; then - echo "Found files!" - else - echo "No files found. Stopping upload." - exit 1 - fi - - name: Cache Repo - uses: actions/cache@v4 - with: - path: repo - key: r-nightly-${{ github.run_id }} - restore-keys: r-nightly- - - name: Sync from Remote - uses: ./arrow/.github/actions/sync-nightlies - with: - switches: -avzh --update --delete --progress - local_path: repo - remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r - remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} - remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} - remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} - remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} - remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} - - run: tree repo - - uses: r-lib/actions/setup-r@v2 - - name: Build Repository - shell: Rscript {0} - run: | - # folder that we sync to nightlies.apache.org - repo_root <- "repo" - # The binaries are in a nested dir - # so we need to find the correct path. - art_path <- list.files("binaries", - recursive = TRUE, - include.dirs = TRUE, - pattern = "r-binary-packages$", - full.names = TRUE - ) - - current_path <- list.files(art_path, full.names = TRUE, recursive = TRUE) - files <- sub("r-(pkg|lib)", repo_root, current_path) - - # decode contrib.url from artifact name: - # bin__windows__contrib__4.1 -> bin/windows/contrib/4.1 - new_paths <- gsub("__", "/", files) - # strip superfluous nested dirs - new_paths <- sub(art_path, ".", new_paths) - dirs <- dirname(new_paths) - sapply(dirs, dir.create, recursive = TRUE, showWarnings = FALSE) - - # overwrite allows us to "force push" a new version with the same name - copy_result <- file.copy(current_path, new_paths, overwrite = TRUE) - - if (!all(copy_result)) { - stop("There was an issue while copying the files!") - } - - name: Prune Repository - shell: bash - env: - KEEP: ${{ github.event.inputs.keep || 14 }} - run: | - prune() { - # list files | retain $KEEP newest files | delete everything else - ls -t $1/arrow* | tail -n +$((KEEP + 1)) | xargs --no-run-if-empty rm - } - - # find leaf sub dirs - repo_dirs=$(find repo -type d -links 2) - - # We want to retain $keep (14) versions of each pkg/lib so we call - # prune on each leaf dir and not on repo/. - for dir in ${repo_dirs[@]}; do - prune $dir - done - - name: Update Repository Index - shell: Rscript {0} - run: | - # folder that we sync to nightlies.apache.org - repo_root <- "repo" - tools::write_PACKAGES(file.path(repo_root, "src/contrib"), - type = "source", - verbose = TRUE, - latestOnly = FALSE - ) - - repo_dirs <- list.dirs(repo_root) - # find dirs with binary R packages: e.g. */contrib/4.1 - pkg_dirs <- grep(".+contrib\\/\\d.+", repo_dirs, value = TRUE) - - - for (dir in pkg_dirs) { - on_win <- grepl("windows", dir) - tools::write_PACKAGES(dir, - type = ifelse(on_win, "win.binary", "mac.binary"), - verbose = TRUE, - latestOnly = FALSE - ) - } - - name: Show repo contents - run: tree repo - - name: Sync to Remote - uses: ./arrow/.github/actions/sync-nightlies - with: - upload: true - switches: -avzh --update --delete --progress - local_path: repo - remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r - remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} - remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} - remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} - remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} - remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} + with: + cache: 'pip' + python-version: 3.12 + - name: Install Archery + shell: bash + run: pip install -e arrow/dev/archery[all] + - run: mkdir -p binaries + - name: Download Artifacts + env: + PREFIX: ${{ github.event.inputs.prefix || ''}} + run: | + if [ -z $PREFIX ]; then + PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0 + fi + echo $PREFIX + + archery crossbow download-artifacts -f r-binary-packages -t binaries $PREFIX + + if [ -n "$(ls -A binaries/*/*/)" ]; then + echo "Found files!" + else + echo "No files found. Stopping upload." + exit 1 + fi + - name: Cache Repo + uses: actions/cache@v4 + with: + path: repo + key: r-nightly-${{ github.run_id }} + restore-keys: r-nightly- + - name: Sync from Remote + uses: ./arrow/.github/actions/sync-nightlies + with: + switches: -avzh --update --delete --progress + local_path: repo + remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r + remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} + remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} + remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} + remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} + remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} + - run: tree repo + - uses: r-lib/actions/setup-r@v2 + - name: Build Repository + shell: Rscript {0} + run: | + # folder that we sync to nightlies.apache.org + repo_root <- "repo" + # The binaries are in a nested dir + # so we need to find the correct path. + art_path <- list.files("binaries", + recursive = TRUE, + include.dirs = TRUE, + pattern = "r-binary-packages$", + full.names = TRUE + ) + + current_pkg_path <- list.files(art_path, + full.names = TRUE, + pattern = "r-pkg", + recursive = TRUE + ) + current_lib_path <- list.files(art_path, + full.names = TRUE, + pattern = "r-lib", + recursive = TRUE + ) + files <- c( + sub("r-pkg", repo_root, current_pkg_path), + sub("r-lib", paste0(repo_root, "__r-lib"), current_lib_path), + ) + + # decode contrib.url from artifact name: + # bin__windows__contrib__4.1 -> bin/windows/contrib/4.1 + new_paths <- gsub("__", "/", files) + # strip superfluous nested dirs + new_paths <- sub(art_path, ".", new_paths) + dirs <- dirname(new_paths) + sapply(dirs, dir.create, recursive = TRUE, showWarnings = FALSE) + + # overwrite allows us to "force push" a new version with the same name + copy_result <- file.copy(current_path, new_paths, overwrite = TRUE) + + if (!all(copy_result)) { + stop("There was an issue while copying the files!") + } + - name: Prune Repository + shell: bash + env: + KEEP: ${{ github.event.inputs.keep || 14 }} + run: | + prune() { + # list files | retain $KEEP newest files | delete everything else + ls -t "$@" | tail -n +$((KEEP + 1)) | xargs --no-run-if-empty rm + } + + # find leaf sub dirs + repo_dirs=$(find repo -type d -links 2) + + # Old packages: repo/libarrow/bin/${TARGET}/arrow-${VERSION}.zip + # + # We want to retain $keep (14) versions of each pkg/lib so we call + # prune on each leaf dir and not on repo/. + for dir in "${repo_dirs[@]}"; do + prune $dir/arrow* + done + + # New packages: repo/libarrow/${TARGET}-arrow-${VERSION}.zip + prune repo/libarrow/r-libarrow-darwin-arm64-openssl-1.1-* || : + prune repo/libarrow/r-libarrow-darwin-arm64-openssl-3.0-* || : + prune repo/libarrow/r-libarrow-darwin-x86_64-openssl-1.1-* || : + prune repo/libarrow/r-libarrow-darwin-x86_64-openssl-3.0-* || : + prune repo/libarrow/r-libarrow-linux-x86_64-openssl-1.0-* || : + prune repo/libarrow/r-libarrow-linux-x86_64-openssl-1.1-* || : + prune repo/libarrow/r-libarrow-linux-x86_64-openssl-3.0-* || : + prune repo/libarrow/r-libarrow-windows-x86_64-* || : + - name: Update Repository Index + shell: Rscript {0} + run: | + # folder that we sync to nightlies.apache.org + repo_root <- "repo" + tools::write_PACKAGES(file.path(repo_root, "src/contrib"), + type = "source", + verbose = TRUE, + latestOnly = FALSE + ) + + repo_dirs <- list.dirs(repo_root) + # find dirs with binary R packages: e.g. */contrib/4.1 + pkg_dirs <- grep(".+contrib\\/\\d.+", repo_dirs, value = TRUE) + + + for (dir in pkg_dirs) { + on_win <- grepl("windows", dir) + tools::write_PACKAGES(dir, + type = ifelse(on_win, "win.binary", "mac.binary"), + verbose = TRUE, + latestOnly = FALSE + ) + } + - name: Show repo contents + run: tree repo + - name: Sync to Remote + uses: ./arrow/.github/actions/sync-nightlies + with: + upload: true + switches: -avzh --update --delete --progress + local_path: repo + remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/r + remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} + remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} + remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} + remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} + remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 6101f5d3cac..3fb888dee23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,227 @@ +# Apache Arrow 22.0.0 (2025-10-20) + +## Bug Fixes + +* [GH-26727](https://github.com/apache/arrow/issues/26727) - [C++][Flight] Use ipc::RecordBatchWriter with custom IpcPayloadWriter for TransportMessageWriter (DoExchange) (#47410) +* [GH-31603](https://github.com/apache/arrow/issues/31603) - [C++] Wrap Parquet encryption keys in SecureString (#46017) +* [GH-40911](https://github.com/apache/arrow/issues/40911) - [C++][Compute] Fix the decimal division kernel dispatching (#47445) +* [GH-41011](https://github.com/apache/arrow/issues/41011) - [C++][Compute] Fix the issue that comparison function could not handle decimal arguments with different scales (#47459) +* [GH-41110](https://github.com/apache/arrow/issues/41110) - [C#] Handle empty stream in ArrowStreamReaderImplementation (#47098) +* [GH-41336](https://github.com/apache/arrow/issues/41336) - [C++][Compute] Fix case_when kernel dispatch for decimals with different precisions and scales (#47479) +* [GH-42971](https://github.com/apache/arrow/issues/42971) - [C++] Parquet stream writer: Allow writing BYTE_ARRAY with converted type NONE (#44739) +* [GH-43355](https://github.com/apache/arrow/issues/43355) - [C++] Don't require `__once_proxy` in `symbols.map` (#47354) +* [GH-46629](https://github.com/apache/arrow/issues/46629) - [Python] Add options to DatasetFactory.inspect (#46961) +* [GH-46690](https://github.com/apache/arrow/issues/46690) - [GLib][CI] Use Meson 1.8.4 or later (#47425) +* [GH-46739](https://github.com/apache/arrow/issues/46739) - [C++] Fix Float16 signed zero/NaN equality comparisons (#46973) +* [GH-46897](https://github.com/apache/arrow/issues/46897) - [Docs][C++][Python] Fix asof join documentation (#46898) +* [GH-46928](https://github.com/apache/arrow/issues/46928) - [C++] Retry on EINTR while opening file in FileOpenReadable (#47629) +* [GH-46942](https://github.com/apache/arrow/issues/46942) - [Docs] Replace the directive versionadded with note (#46997) +* [GH-46946](https://github.com/apache/arrow/issues/46946) - [Python] PyArrow fails compiling without CSV enabled +* [GH-47009](https://github.com/apache/arrow/issues/47009) - [C#] ExportedAllocationOwner should use 64-bit integer to track total allocated memory. (#47011) +* [GH-47016](https://github.com/apache/arrow/issues/47016) - [C++][FlightSQL] Fix negative timestamps to date types (#47017) +* [GH-47027](https://github.com/apache/arrow/issues/47027) - [C++][Parquet] Fix repeated column pages not being written when reaching page size limit (#47032) +* [GH-47029](https://github.com/apache/arrow/issues/47029) - [Archery][Integration] Fix generation of run-end-encoded data (#47653) +* [GH-47039](https://github.com/apache/arrow/issues/47039) - [C++] Bump RapidJSON dependency in Meson configuration (#47041) +* [GH-47051](https://github.com/apache/arrow/issues/47051) - [Python][Release] verify-rc-source-windows Python tests are failing due to MSVC compiler bug +* [GH-47052](https://github.com/apache/arrow/issues/47052) - [CI][C++] Use Alpine Linux 3.22 instead of 3.18 (#47148) +* [GH-47096](https://github.com/apache/arrow/issues/47096) - [CI][R] Drop support for R 4.0 (#47285) +* [GH-47101](https://github.com/apache/arrow/issues/47101) - [Statistics][C++] Implement Statistics specification attribute ARROW:distinct_count:approximate (#47183) +* [GH-47124](https://github.com/apache/arrow/issues/47124) - [C++][Dataset] Fix DatasetWriter deadlock on concurrent WriteRecordBatch (#47129) +* [GH-47128](https://github.com/apache/arrow/issues/47128) - [Python] Numba-CUDA interop with NVIDIA bindings (#47150) +* [GH-47130](https://github.com/apache/arrow/issues/47130) - [Packaging][deb] Fix upgrade from 20.0.0-1 (#47343) +* [GH-47131](https://github.com/apache/arrow/issues/47131) - [C#] Fix day off by 1 in Date64Array (#47132) +* [GH-47143](https://github.com/apache/arrow/issues/47143) - [Dev] Ignore `apache-arrow.tar.gz` (#47145) +* [GH-47162](https://github.com/apache/arrow/issues/47162) - [Dev][Release][GLib] Fix indent in generate-version-header.py (#47163) +* [GH-47165](https://github.com/apache/arrow/issues/47165) - [Python] Update s3 test with new non-existent bucket (#47166) +* [GH-47175](https://github.com/apache/arrow/issues/47175) - [C++] Require xsimd 13.0.0 or later (#47221) +* [GH-47179](https://github.com/apache/arrow/issues/47179) - [Python] Revert FileSystem.from_uri to be a staticmethod again (#47178) +* [GH-47203](https://github.com/apache/arrow/issues/47203) - [C++] Restore CMAKE_DEBUG_POSTFIX in building bundled Apache Thrift (#47209) +* [GH-47213](https://github.com/apache/arrow/issues/47213) - [R] Require CMake 3.26 or later (#47217) +* [GH-47229](https://github.com/apache/arrow/issues/47229) - [C++][Arm] Force mimalloc to generate armv8.0 binary (#47766) +* [GH-47234](https://github.com/apache/arrow/issues/47234) - [C++][Python] Add test for fill_null regression on Windows (#47249) +* [GH-47241](https://github.com/apache/arrow/issues/47241) - [C++][Parquet] Fix VariantExtensionType conversion (#47242) +* [GH-47243](https://github.com/apache/arrow/issues/47243) - [C++] Initialize arrow::compute in execution_plan_documentation_examples (#47227) +* [GH-47256](https://github.com/apache/arrow/issues/47256) - [Python] Do not use cffi in free-threaded 3.13 builds (#47313) +* [GH-47257](https://github.com/apache/arrow/issues/47257) - [R] Fix truncation of time variables to work with numeric subseconds time with hms bindings (#47278) +* [GH-47265](https://github.com/apache/arrow/issues/47265) - [Ruby] Fix wrong `Time` object detection (#47267) +* [GH-47268](https://github.com/apache/arrow/issues/47268) - [C++][Compute] Fix discarded bad status for call binding (#47284) +* [GH-47277](https://github.com/apache/arrow/issues/47277) - [C++] r-binary-packages nightly failures due to incompatibility with old compiler (#47299) +* [GH-47283](https://github.com/apache/arrow/issues/47283) - [C++] Fix flight visibility issue in Meson configuration (#47298) +* [GH-47287](https://github.com/apache/arrow/issues/47287) - [C++][Compute] Add constraint for kernel signature matching and use it for binary decimal arithmetic kernels (#47297) +* [GH-47301](https://github.com/apache/arrow/issues/47301) - [Python] Fix FileFragment.open() seg fault behavior for file-like objects (#47302) +* [GH-47303](https://github.com/apache/arrow/issues/47303) - [C++] Don't install arrow-compute.pc twice (#47304) +* [GH-47323](https://github.com/apache/arrow/issues/47323) - [R][CI] test-r-rhub-debian-gcc-release-custom-ccache nightly job fails due to update in Debian (#47611) +* [GH-47332](https://github.com/apache/arrow/issues/47332) - [C++][Compute] Fix the issue that the arguments of function call become invalid before wrapping results (#47333) +* [GH-47356](https://github.com/apache/arrow/issues/47356) - [R] NEWS file states version 20.0.0.1 but release package number on CRAN is 20.0.0.2 (#47421) +* [GH-47367](https://github.com/apache/arrow/issues/47367) - [Packaging][Python] Patch vcpkg to show logs and install newer Windows SDK for vs_buildtools (#47484) +* [GH-47373](https://github.com/apache/arrow/issues/47373) - [C++] Raise for invalid decimal precision input from the C Data Interface (#47414) +* [GH-47380](https://github.com/apache/arrow/issues/47380) - [Python] Apply maps_as_pydicts to Nested MapScalar Values (#47454) +* [GH-47399](https://github.com/apache/arrow/issues/47399) - [C++] Update bundled Apache ORC to 2.2.0 with Protobuf patch (#47408) +* [GH-47431](https://github.com/apache/arrow/issues/47431) - [C++] Improve Meson configuration for WrapDB distribution (#47541) +* [GH-47434](https://github.com/apache/arrow/issues/47434) - [C++] Fix issue preventing running of tests on Windows (#47455) +* [GH-47440](https://github.com/apache/arrow/issues/47440) - [C++] Accept gflags::gflags as system gflags CMake target (#47468) +* [GH-47446](https://github.com/apache/arrow/issues/47446) - [C++] Update Meson configuration with compute swizzle change (#47448) +* [GH-47451](https://github.com/apache/arrow/issues/47451) - [Python][CI] Install tzdata-legacy in newer python-wheel-manylinux-test images (#47452) +* [GH-47453](https://github.com/apache/arrow/issues/47453) - [Packaging][CI] Token expired to upload nightly wheels +* [GH-47485](https://github.com/apache/arrow/issues/47485) - [C++][CI] Work around Valgrind failure on Azure tests (#47496) +* [GH-47486](https://github.com/apache/arrow/issues/47486) - [Dev][R] Define default R_UPDATE_CLANG (#47487) +* [GH-47491](https://github.com/apache/arrow/issues/47491) - [C++] Don't set include directories to found targets (#47492) +* [GH-47506](https://github.com/apache/arrow/issues/47506) - [CI][Packaging] Fix Amazon Linux 2023 packages verification (#47507) +* [GH-47534](https://github.com/apache/arrow/issues/47534) - [C++] Detect conda-installed packages in Meson CI (#47535) +* [GH-47537](https://github.com/apache/arrow/issues/47537) - [C++] Use pkgconfig name for benchmark in Meson (#47538) +* [GH-47539](https://github.com/apache/arrow/issues/47539) - [C++] Detect Snappy and bzip2 in Meson CI (#47540) +* [GH-47554](https://github.com/apache/arrow/issues/47554) - [C++] Fix Meson Parquet symbol visibility issues (#47556) +* [GH-47560](https://github.com/apache/arrow/issues/47560) - [C++] Fix host handling for default HDFS URI (#47458) +* [GH-47570](https://github.com/apache/arrow/issues/47570) - [CI] Don't notify nightly "CI: Extra" result from forks (#47571) +* [GH-47590](https://github.com/apache/arrow/issues/47590) - [C++] Use W functions explicitly for Windows UNICODE compatibility (#47593) +* [GH-47591](https://github.com/apache/arrow/issues/47591) - [C++] Fix passing zlib compression level (#47594) +* [GH-47596](https://github.com/apache/arrow/issues/47596) - [C++][Parquet] Fix printing of large Decimal statistics (#47619) +* [GH-47602](https://github.com/apache/arrow/issues/47602) - [Python] Make Schema hashable even when it has metadata (#47601) +* [GH-47614](https://github.com/apache/arrow/issues/47614) - [CI] Upgrade vcpkg on our CI (#47627) +* [GH-47620](https://github.com/apache/arrow/issues/47620) - [CI][C++] Use Ubuntu 24.04 for ASAN UBSAN job (#47623) +* [GH-47625](https://github.com/apache/arrow/issues/47625) - [Python] Free-threaded musllinux and manylinux wheels started failing with cffi 2.0.0 (#47626) +* [GH-47655](https://github.com/apache/arrow/issues/47655) - [C++][Parquet][CI] Fix failure to generate seed corpus (#47656) +* [GH-47659](https://github.com/apache/arrow/issues/47659) - [C++] Fix Arrow Flight Testing's unresolved external symbol error (#47660) +* [GH-47673](https://github.com/apache/arrow/issues/47673) - [CI][Integration] Fix Go build failure (#47674) +* [GH-47682](https://github.com/apache/arrow/issues/47682) - [R] `install_pyarrow(nightly = TRUE)` installs old pyarrow (#47699) +* [GH-47695](https://github.com/apache/arrow/issues/47695) - [CI][Release] Link arrow-io hdfs_test to c++fs on compilers where std:::filesystem is not default present (#47701) +* [GH-47740](https://github.com/apache/arrow/issues/47740) - [C++][Parquet] Fix undefined behavior when reading invalid Parquet data (#47741) +* [GH-47742](https://github.com/apache/arrow/issues/47742) - [C++][CI] Silence Valgrind leak on protobuf initialization (#47743) +* [GH-47748](https://github.com/apache/arrow/issues/47748) - [C++][Dataset] Fix link error on macOS (#47749) +* [GH-47795](https://github.com/apache/arrow/issues/47795) - [Archery] Add support for custom Docker registry (#47796) +* [GH-47803](https://github.com/apache/arrow/issues/47803) - [C++][Parquet] Fix read out of bounds on invalid RLE data (#47804) +* [GH-47809](https://github.com/apache/arrow/issues/47809) - [CI][Release] Fix Windows verification job trying to install patch from conda (#47810) +* [GH-47819](https://github.com/apache/arrow/issues/47819) - [CI][Packaging][Release] Avoid triggering Linux packages on release branch push (#47826) +* [GH-47838](https://github.com/apache/arrow/issues/47838) - [C++][Parquet] Set Variant specification version to 1 to align with the variant spec (#47835) + + +## New Features and Improvements + +* [GH-20125](https://github.com/apache/arrow/issues/20125) - [Docs][Python] Restructure developers/python.rst (#47334) +* [GH-30036](https://github.com/apache/arrow/issues/30036) - [C++] Timezone-aware kernels should handle offset strings (e.g. "+04:30") (#12865) +* [GH-38211](https://github.com/apache/arrow/issues/38211) - [MATLAB] Add support for creating an empty `arrow.tabular.RecordBatch` by calling `arrow.recordBatch` with no input arguments (#47060) +* [GH-38213](https://github.com/apache/arrow/issues/38213) - [MATLAB] Create a superclass for tabular type MATLAB tests (i.e. for `Table` and `RecordBatch`) (#47107) +* [GH-38422](https://github.com/apache/arrow/issues/38422) - [MATLAB] Add `NumNulls` property to `arrow.array.Array` class (#47116) +* [GH-38532](https://github.com/apache/arrow/issues/38532) - [MATLAB] Add a `validate` method to all `arrow.array.Array` classes (#47059) +* [GH-38572](https://github.com/apache/arrow/issues/38572) - [Docs][MATLAB] Update `arrow/matlab/README.md` with the latest change. (#47109) +* [GH-39875](https://github.com/apache/arrow/issues/39875) - [C++] Why arrow decimal divide precision and scale is not correct? +* [GH-41108](https://github.com/apache/arrow/issues/41108) - [Docs] Remove Sphinx pin (#47326) +* [GH-41239](https://github.com/apache/arrow/issues/41239) - [C++] Support to write csv header without quotes (#47524) +* [GH-41476](https://github.com/apache/arrow/issues/41476) - [Python][C++] Impossible to specify `is_adjusted_to_utc` for `Time` type when writing to Parquet (#47316) +* [GH-42137](https://github.com/apache/arrow/issues/42137) - [CI][Python] Add Python Windows GitHub Action and remove AppVeyor (#47567) +* [GH-43662](https://github.com/apache/arrow/issues/43662) - [R] Add binding to stringr::str_replace_na() (#47521) +* [GH-43694](https://github.com/apache/arrow/issues/43694) - [C++] Add `Executor *` Option to `arrow::dataset::ScanOptions` (#43698) +* [GH-43904](https://github.com/apache/arrow/issues/43904) - [CI][Python] Stop uploading nightly wheels to gemfury (#47470) +* [GH-44345](https://github.com/apache/arrow/issues/44345) - [C++][Parquet] Add Decimal32/64 support to Parquet (#47427) +* [GH-44800](https://github.com/apache/arrow/issues/44800) - [C#] Implement Flight SQL Client (#44783) +* [GH-45055](https://github.com/apache/arrow/issues/45055) - [C++][Flight] Update Flight Server RecordBatchStreamImpl to reuse ipc::RecordBatchWriter with custom IpcPayloadWriter instead of manually generating FlightPayload (#47115) +* [GH-45056](https://github.com/apache/arrow/issues/45056) - [C++][Flight] Fully support dictionary replacement in Flight +* [GH-45382](https://github.com/apache/arrow/issues/45382) - [Python] Add support for pandas DataFrame.attrs (#47147) +* [GH-45639](https://github.com/apache/arrow/issues/45639) - [C++][Statistics] Add support for ARROW:average_byte_width:{exac,approximate} (#46385) +* [GH-45860](https://github.com/apache/arrow/issues/45860) - [C++] Respect CPU affinity in cpu_count and ThreadPool default capacity (#47152) +* [GH-45921](https://github.com/apache/arrow/issues/45921) - [Release][R] Use GitHub Release not apache.jfrog.io (#45964) +* [GH-46137](https://github.com/apache/arrow/issues/46137) - [C++] Replace grpc-cpp conda package with libgrpc (#47606) +* [GH-46272](https://github.com/apache/arrow/issues/46272) - [C++] Build Arrow libraries with `-Wmissing-definitions` on gcc (#47042) +* [GH-46374](https://github.com/apache/arrow/issues/46374) - [Python][Doc] Improve docs to specify that source argument on parquet.read_table can also be a list of strings (#47142) +* [GH-46410](https://github.com/apache/arrow/issues/46410) - [C++] Add parquet options to Meson configuration (#46647) +* [GH-46669](https://github.com/apache/arrow/issues/46669) - [CI][Archery] Automate Zulip and email notifications for Extra CI (#47546) +* [GH-46728](https://github.com/apache/arrow/issues/46728) - [Python] Skip test_gdb.py tests if PyArrow wasn't built debug (#46755) +* [GH-46835](https://github.com/apache/arrow/issues/46835) - [C++] Add more configuration options to arrow::EqualOptions (#47204) +* [GH-46860](https://github.com/apache/arrow/issues/46860) - [C++] Making HalfFloatBuilder accept Float16 as well as uint16_t (#46981) +* [GH-46905](https://github.com/apache/arrow/issues/46905) - [C++][Parquet] Expose Statistics.is_{min/max}_value_exact and default set to true if min/max are set (#46992) +* [GH-46908](https://github.com/apache/arrow/issues/46908) - [Docs][Format] Add variant extension type docs (#47456) +* [GH-46937](https://github.com/apache/arrow/issues/46937) - [C++] Enable arrow::EqualOptions for arrow::Table (#47164) +* [GH-46938](https://github.com/apache/arrow/issues/46938) - [C++] Enhance arrow::ChunkedArray::Equals to support floating-point comparison when values share the same memory (#47044) +* [GH-46939](https://github.com/apache/arrow/issues/46939) - [C++] Add support for shared memory comparison in arrow::RecordBatch (#47149) +* [GH-46962](https://github.com/apache/arrow/issues/46962) - [C++][Parquet] Generic xsimd function and dynamic dispatch for Byte Stream Split (#46963) +* [GH-46971](https://github.com/apache/arrow/issues/46971) - [C++][Parquet] Use temporary buffers when decrypting Parquet data pages (#46972) +* [GH-46982](https://github.com/apache/arrow/issues/46982) - [C++] Remove Boost dependency from hdfs_test (#47200) +* [GH-47005](https://github.com/apache/arrow/issues/47005) - [C++] Disable exporting CMake packages (#47006) +* [GH-47012](https://github.com/apache/arrow/issues/47012) - [C++][Parquet] Reserve values correctly when reading BYTE_ARRAY and FLBA (#47013) +* [GH-47040](https://github.com/apache/arrow/issues/47040) - [C++] Refine reset of Span to be reusable (#47004) +* [GH-47045](https://github.com/apache/arrow/issues/47045) - [CI][C++] Use Fedora 42 instead of 39 (#47046) +* [GH-47047](https://github.com/apache/arrow/issues/47047) - [CI][C++] Use Google Cloud Storage Testbench v0.55.0 (#47048) +* [GH-47058](https://github.com/apache/arrow/issues/47058) - [Release] Update Release Management Guide to reflect status in preparation for Arrow 22 (#47474) +* [GH-47075](https://github.com/apache/arrow/issues/47075) - [Release][Dev] Use GH_TOKEN as GitHub token environment variable (#47181) +* [GH-47084](https://github.com/apache/arrow/issues/47084) - [Release] Stop using https://dist.apache.org/repos/dist/dev/arrow/KEYS (#47182) +* [GH-47088](https://github.com/apache/arrow/issues/47088) - [CI][Dev] Fix shellcheck errors in the ci/scripts/integration_arrow.sh (#47089) +* [GH-47102](https://github.com/apache/arrow/issues/47102) - [Statistics][C++] Implement Statistics specification attribute ARROW:max_byte_width:{exact,approximate} Component: C++ (#47463) +* [GH-47106](https://github.com/apache/arrow/issues/47106) - [R] Update R package to use R 4.1+ native forward pipe syntax (#47622) +* [GH-47112](https://github.com/apache/arrow/issues/47112) - [Parquet][C++] Rle BitPacked parser (#47294) +* [GH-47120](https://github.com/apache/arrow/issues/47120) - [R] Update NEWS for 21.0.0 (#47121) +* [GH-47123](https://github.com/apache/arrow/issues/47123) - [Python] Add Enums to PyArrow Types (#47139) +* [GH-47125](https://github.com/apache/arrow/issues/47125) - [CI][Dev] Fix shellcheck errors in the ci/scripts/integration_hdfs.sh (#47126) +* [GH-47137](https://github.com/apache/arrow/issues/47137) - [Python][dependency-groups] ` (#47176) +* [GH-47153](https://github.com/apache/arrow/issues/47153) - [Docs][C++] Update cmake target table in build_system.rst with newly added targets (#47154) +* [GH-47157](https://github.com/apache/arrow/issues/47157) - [Docs] Improve presentation of Other available packages section in build_system.rst (#47411) +* [GH-47172](https://github.com/apache/arrow/issues/47172) - [Python] Add a utility function to create Arrow table instead of pandas df (#47199) +* [GH-47184](https://github.com/apache/arrow/issues/47184) - [Parquet][C++] Avoid multiplication overflow in FixedSizeBinaryBuilder::Reserve (#47185) +* [GH-47191](https://github.com/apache/arrow/issues/47191) - [R] Turn GCS back on by default on MacOS source builds (#47192) +* [GH-47193](https://github.com/apache/arrow/issues/47193) - [R] Update R Makefile to exclude flight odbc from cpp sync (#47194) +* [GH-47205](https://github.com/apache/arrow/issues/47205) - [C++] Suppress GNU variadic macro warnings (#47286) +* [GH-47208](https://github.com/apache/arrow/issues/47208) - [C++][CI] Add a CI job for C++23 (#47261) +* [GH-47208](https://github.com/apache/arrow/issues/47208) - [C++] Update bundled s2n-tls to 1.5.23 (#47220) +* [GH-47211](https://github.com/apache/arrow/issues/47211) - [CI][R] Disable non-system memory allocators when on linux-devel (#47212) +* [GH-47218](https://github.com/apache/arrow/issues/47218) - [C++] Update bundled s2n-tls +* [GH-47222](https://github.com/apache/arrow/issues/47222) - [CI][C++] Add a CI job that uses the same build options for JNI on macOS (#47305) +* [GH-47223](https://github.com/apache/arrow/issues/47223) - [Release] Use "upstream" as apache/arrow{,-site} remote name (#47224) +* [GH-47225](https://github.com/apache/arrow/issues/47225) - [C++] Remove Skyhook (#47262) +* [GH-47232](https://github.com/apache/arrow/issues/47232) - [Ruby] Suppress warnings in test with Ruby 3.5 (#47233) +* [GH-47244](https://github.com/apache/arrow/issues/47244) - [CI][Dev] Fix shellcheck errors in the ci/scripts/msys2_setup.sh (#47245) +* [GH-47258](https://github.com/apache/arrow/issues/47258) - [Release] Set `date:` for apache/arrow-site's `_release/${VERSION}.md` (#47260) +* [GH-47263](https://github.com/apache/arrow/issues/47263) - [MATLAB] Add `NumNulls` property to `arrow.array.ChunkedArray` class (#47264) +* [GH-47289](https://github.com/apache/arrow/issues/47289) - [CI][Dev] Fix shellcheck errors in the ci/scripts/python_build_emscripten.sh (#47290) +* [GH-47291](https://github.com/apache/arrow/issues/47291) - [C++] Update bundled aws-c-common to 0.12.4 (#47292) +* [GH-47306](https://github.com/apache/arrow/issues/47306) - [CI][Dev] Fix shellcheck errors in the ci/scripts/python_build.sh (#47307) +* [GH-47312](https://github.com/apache/arrow/issues/47312) - [Packaging] Add support for Debian forky (#47342) +* [GH-47317](https://github.com/apache/arrow/issues/47317) - [C++][C++23][Gandiva] Use pointer for Cache test (#47318) +* [GH-47319](https://github.com/apache/arrow/issues/47319) - [CI] Fix actions/checkout hash version comments (#47320) +* [GH-47321](https://github.com/apache/arrow/issues/47321) - [CI][Dev] Fix shellcheck errors in the ci/scripts/python_sdist_test.sh (#47322) +* [GH-47338](https://github.com/apache/arrow/issues/47338) - [C++][Python] Remove deprecated string-based Parquet encryption methods (#47339) +* [GH-47349](https://github.com/apache/arrow/issues/47349) - [C++] Include request ID in AWS S3 Error (#47351) +* [GH-47358](https://github.com/apache/arrow/issues/47358) - [Python] IPC and Flight options representation (#47461) +* [GH-47370](https://github.com/apache/arrow/issues/47370) - [Python] Require Cython 3.1 (#47396) +* [GH-47375](https://github.com/apache/arrow/issues/47375) - [C++][Compute] Move scatter function into compute core (#47378) +* [GH-47384](https://github.com/apache/arrow/issues/47384) - [C++][Acero] Isolate BackpressureHandler from ExecNode (#47386) +* [GH-47395](https://github.com/apache/arrow/issues/47395) - [R] Update fedora-clang to install latest clang version to match CRAN setup (#47206) +* [GH-47401](https://github.com/apache/arrow/issues/47401) - [C++] Remove needless Snappy patch (#47407) +* [GH-47404](https://github.com/apache/arrow/issues/47404) - [Ruby] Remove needless `require "extpp/setup"` (#47405) +* [GH-47412](https://github.com/apache/arrow/issues/47412) - [C++] Use inlineshidden visibility in Meson configuration (#47413) +* [GH-47422](https://github.com/apache/arrow/issues/47422) - [Python][C++][Flight] Expose ipc::ReadStats in Flight MetadataRecordBatchReader (#47432) +* [GH-47438](https://github.com/apache/arrow/issues/47438) - [Python][Packaging] Set up wheel building for Python 3.14 (#47616) +* [GH-47443](https://github.com/apache/arrow/issues/47443) - [Python][Packaging] Drop Python 3.9 support (#47478) +* [GH-47449](https://github.com/apache/arrow/issues/47449) - [C++][Parquet] Do not drop all Statistics if SortOrder is UNKNOWN (#47466) +* [GH-47469](https://github.com/apache/arrow/issues/47469) - [C++][Gandiva] Add support for LLVM 21.1.0 (#47473) +* [GH-47483](https://github.com/apache/arrow/issues/47483) - [C++] Bump vendored xxhash to 0.8.3 (#47476) +* [GH-47500](https://github.com/apache/arrow/issues/47500) - [C++] Add QualifierAlignment to clang-format options (#47501) +* [GH-47505](https://github.com/apache/arrow/issues/47505) - [CI][C#][Integration] Use apache/arrow-dotnet (#47508) +* [GH-47509](https://github.com/apache/arrow/issues/47509) - [CI][Packaging][Linux] Enable Docker build cache (#47510) +* [GH-47512](https://github.com/apache/arrow/issues/47512) - [C++] Bump meson-fmt in pre-commit to 1.9.0 (#47513) +* [GH-47514](https://github.com/apache/arrow/issues/47514) - [C++][Parquet] Add unpack tests and benchmarks (#47515) +* [GH-47516](https://github.com/apache/arrow/issues/47516) - [C++][FlightRPC] Initial ODBC driver framework (#47517) +* [GH-47518](https://github.com/apache/arrow/issues/47518) - [C++][FlightRPC] Replace `spdlogs` with Arrow's Internal Logging (#47645) +* [GH-47523](https://github.com/apache/arrow/issues/47523) - [C#] Remove csharp/ (#47547) +* [GH-47543](https://github.com/apache/arrow/issues/47543) - [C++] Search for system install of Azure libraries with Meson (#47544) +* [GH-47552](https://github.com/apache/arrow/issues/47552) - [C++] Fix creating wrong object by `FixedShapeTensorType::MakeArray()` (#47533) +* [GH-47575](https://github.com/apache/arrow/issues/47575) - [Python] add quoting_header option to pyarrow WriterOptions (#47610) +* [GH-47582](https://github.com/apache/arrow/issues/47582) - [CI][Packaging] Move linux-packaging tasks to apache/arrow repository (#47600) +* [GH-47584](https://github.com/apache/arrow/issues/47584) - [C++][CI] Remove "large memory" mark from TestListArray::TestOverflowCheck (#47585) +* [GH-47588](https://github.com/apache/arrow/issues/47588) - [C++] Bump mimalloc version to 3.1.5 (#47589) +* [GH-47597](https://github.com/apache/arrow/issues/47597) - [C++][Parquet] Fuzz more data types (#47621) +* [GH-47632](https://github.com/apache/arrow/issues/47632) - [CI][C++] Add a CI job for JNI on Linux (#47746) +* [GH-47633](https://github.com/apache/arrow/issues/47633) - [Dev][Integration] Write all files with `--write_generated_json` (#47634) +* [GH-47639](https://github.com/apache/arrow/issues/47639) - [Benchmarking] Clean up conbench config (#47638) +* [GH-47646](https://github.com/apache/arrow/issues/47646) - [C++][FlightRPC] Follow Naming Convention (#47658) +* [GH-47648](https://github.com/apache/arrow/issues/47648) - [Archery][Integration] More granularity in JSON test cases (#47649) +* [GH-47650](https://github.com/apache/arrow/issues/47650) - [Archery][Integration] Add option to generate gold files (#47651) +* [GH-47679](https://github.com/apache/arrow/issues/47679) - [C++] Register arrow compute calls in ODBC (#47680) +* [GH-47704](https://github.com/apache/arrow/issues/47704) - [R] Update paths in nightly libarrow upload job (#47727) +* [GH-47705](https://github.com/apache/arrow/issues/47705) - [R][CI] Migrate rhub debian-gcc-release to equivalent supported image (#47730) +* [GH-47738](https://github.com/apache/arrow/issues/47738) - [R] Update NEWS.md for 22.0.0 (#47739) + + + # Apache Arrow 6.0.1 (2021-11-18) ## Bug Fixes diff --git a/c_glib/meson.build b/c_glib/meson.build index b56157e66c3..f10a8042545 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -34,7 +34,7 @@ project( # * 22.04: 0.61.2 # * 24.04: 1.3.2 meson_version: '>=0.61.2', - version: '22.0.0-SNAPSHOT', + version: '22.0.0', ) version = meson.project_version() diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json index 004f2e12ffe..150f54a1d41 100644 --- a/c_glib/vcpkg.json +++ b/c_glib/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow-glib", - "version-string": "22.0.0-SNAPSHOT", + "version-string": "22.0.0", "$comment:dependencies": "We can enable gobject-introspection again once it's updated", "dependencies": [ "glib", diff --git a/ci/conda_env_unix.txt b/ci/conda_env_unix.txt index 1973238adff..4728068c4e8 100644 --- a/ci/conda_env_unix.txt +++ b/ci/conda_env_unix.txt @@ -20,4 +20,5 @@ autoconf ccache orc +patch pkg-config diff --git a/ci/docker/alpine-linux-3.22-cpp.dockerfile b/ci/docker/alpine-linux-3.22-cpp.dockerfile index f03e1913e27..48907e61a4a 100644 --- a/ci/docker/alpine-linux-3.22-cpp.dockerfile +++ b/ci/docker/alpine-linux-3.22-cpp.dockerfile @@ -48,6 +48,7 @@ RUN apk add \ musl-locales \ nlohmann-json \ openssl-dev \ + patch \ perl \ pkgconfig \ protobuf-dev \ diff --git a/ci/docker/cpp-jni.dockerfile b/ci/docker/cpp-jni.dockerfile new file mode 100644 index 00000000000..b21ec762d67 --- /dev/null +++ b/ci/docker/cpp-jni.dockerfile @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ARG arch +ARG arch_short + +SHELL ["/bin/bash", "-i", "-c"] +ENTRYPOINT ["/bin/bash", "-i", "-c"] + +# Install basic dependencies +RUN dnf install -y \ + autoconf \ + curl \ + flex \ + gdb \ + git \ + perl-IPC-Cmd \ + wget \ + zip + +# A system Python is required for Ninja and vcpkg in this Dockerfile. +# On manylinux_2_28 base images, no system Python is installed. +# We therefore override the PATH with Python 3.10 in /opt/python +# so that we have a consistent Python version across base images. +ENV CPYTHON_VERSION=cp310 +ENV PATH=/opt/python/${CPYTHON_VERSION}-${CPYTHON_VERSION}/bin:${PATH} + +# Install CMake +ARG cmake=3.29.2 +COPY ci/scripts/install_cmake.sh arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local + +# Install Ninja +ARG ninja=1.10.2 +COPY ci/scripts/install_ninja.sh arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_ninja.sh ${ninja} /usr/local + +# Install ccache +ARG ccache=4.1 +COPY ci/scripts/install_ccache.sh arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_ccache.sh ${ccache} /usr/local + +# Install vcpkg +ARG vcpkg +COPY ci/vcpkg/*.patch \ + ci/vcpkg/*linux*.cmake \ + ci/vcpkg/vcpkg.json \ + arrow/ci/vcpkg/ +COPY ci/scripts/install_vcpkg.sh \ + arrow/ci/scripts/ +ENV VCPKG_ROOT=/opt/vcpkg +ARG build_type=release +ENV CMAKE_BUILD_TYPE=${build_type} \ + PATH="${PATH}:${VCPKG_ROOT}" \ + VCPKG_DEFAULT_TRIPLET=${arch_short}-linux-static-${build_type} \ + VCPKG_FEATURE_FLAGS="manifests" \ + VCPKG_FORCE_SYSTEM_BINARIES=1 \ + VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg +# For --mount=type=secret: The GITHUB_TOKEN is the only real secret but we use +# --mount=type=secret for GITHUB_REPOSITORY_OWNER and +# VCPKG_BINARY_SOURCES too because we don't want to store them +# into the built image in order to easily reuse the built image cache. +# +# For vcpkg install: cannot use the S3 feature here because while +# aws-sdk-cpp=1.9.160 contains ssl related fixes as well as we can +# patch the vcpkg portfile to support arm machines it hits ARROW-15141 +# where we would need to fall back to 1.8.186 but we cannot patch +# those portfiles since vcpkg-tool handles the checkout of previous +# versions => use bundled S3 build +RUN --mount=type=secret,id=github_repository_owner \ + --mount=type=secret,id=github_token \ + --mount=type=secret,id=vcpkg_binary_sources \ + export GITHUB_REPOSITORY_OWNER=$(cat /run/secrets/github_repository_owner); \ + export GITHUB_TOKEN=$(cat /run/secrets/github_token); \ + export VCPKG_BINARY_SOURCES=$(cat /run/secrets/vcpkg_binary_sources); \ + arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg} && \ + vcpkg install \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=/arrow/ci/vcpkg \ + --x-feature=azure \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gandiva \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=orc \ + --x-feature=parquet \ + --x-feature=s3 && \ + rm -rf ~/.config/NuGet/ + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_CMAKE_ARGS="-DARROW_BUILD_TESTS=ON" \ + CMAKE_PRESET=ninja-${CMAKE_BUILD_TYPE}-jni-linux diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index 15716151fce..44c845bb17e 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -85,6 +85,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/debian-13-cpp.dockerfile b/ci/docker/debian-13-cpp.dockerfile index 3e5c645c81a..ca96b4177ff 100644 --- a/ci/docker/debian-13-cpp.dockerfile +++ b/ci/docker/debian-13-cpp.dockerfile @@ -87,6 +87,7 @@ RUN apt-get update -y -q && \ nlohmann-json3-dev \ npm \ opentelemetry-cpp-dev \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/debian-experimental-cpp.dockerfile b/ci/docker/debian-experimental-cpp.dockerfile index 2721b1d5f20..743f5ddd3be 100644 --- a/ci/docker/debian-experimental-cpp.dockerfile +++ b/ci/docker/debian-experimental-cpp.dockerfile @@ -79,6 +79,7 @@ RUN if [ -n "${gcc}" ]; then \ nlohmann-json3-dev \ npm \ opentelemetry-cpp-dev \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/fedora-42-cpp.dockerfile b/ci/docker/fedora-42-cpp.dockerfile index 82e3fa9f7aa..cabb066fec3 100644 --- a/ci/docker/fedora-42-cpp.dockerfile +++ b/ci/docker/fedora-42-cpp.dockerfile @@ -53,6 +53,7 @@ RUN dnf update -y && \ make \ ninja-build \ openssl-devel \ + patch \ protobuf-devel \ python \ python-devel \ diff --git a/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile b/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile index c884611ca39..e4149821de3 100644 --- a/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile +++ b/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile @@ -18,18 +18,20 @@ ARG base FROM ${base} +ARG python_version=3.13 + ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y -q && \ apt install -y -q --no-install-recommends software-properties-common gpg-agent && \ add-apt-repository -y ppa:deadsnakes/ppa && \ apt-get update -y -q && \ - apt install -y -q --no-install-recommends python3.13-dev python3.13-nogil python3.13-venv && \ + apt install -y -q --no-install-recommends python${python_version}-dev python${python_version}-nogil python${python_version}-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists* ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile b/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile index 3c4c53f805e..566f0c0402a 100644 --- a/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile +++ b/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile @@ -18,6 +18,8 @@ ARG base FROM ${base} +ARG python_version=3.13 + ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y -q && \ @@ -27,14 +29,14 @@ RUN apt-get update -y -q && \ apt install -y -q --no-install-recommends \ build-essential \ libffi-dev \ - python3.13-dev \ - python3.13-nogil \ - python3.13-venv && \ + python${python_version}-dev \ + python${python_version}-nogil \ + python${python_version}-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists* ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile b/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile index 526f345416b..e79facb4904 100644 --- a/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile +++ b/ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile @@ -18,6 +18,9 @@ ARG base FROM ${base} +ARG python_version=3.13 +ARG python_patch_version=3.13.7 + RUN apk add --no-cache \ bash \ build-base \ @@ -34,19 +37,19 @@ RUN apk add --no-cache \ tzdata \ zlib-dev -# Install Python3.13.2 without GIL -RUN wget https://github.com/python/cpython/archive/refs/tags/v3.13.2.tar.gz && \ - tar -xzf v3.13.2.tar.gz && \ - rm v3.13.2.tar.gz && \ - cd cpython-3.13.2/ && \ +# Install Python without GIL +RUN wget https://github.com/python/cpython/archive/refs/tags/v${python_patch_version}.tar.gz && \ + tar -xzf v${python_patch_version}.tar.gz && \ + rm v${python_patch_version}.tar.gz && \ + cd cpython-${python_patch_version}/ && \ ./configure --disable-gil --with-ensurepip && \ make -j && \ make install && \ cd ../ && \ - rm -rf cpython-3.13.2/ + rm -rf cpython-${python_patch_version}/ ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile b/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile index fa2139b6ba5..3b170087ba8 100644 --- a/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile +++ b/ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile @@ -18,6 +18,9 @@ ARG base FROM ${base} +ARG python_version=3.13 +ARG python_patch_version=3.13.7 + RUN apk add --no-cache \ bash \ build-base \ @@ -34,19 +37,19 @@ RUN apk add --no-cache \ tzdata \ zlib-dev -# Install Python3.13.2 without GIL -RUN wget https://github.com/python/cpython/archive/refs/tags/v3.13.2.tar.gz && \ - tar -xzf v3.13.2.tar.gz && \ - rm v3.13.2.tar.gz && \ - cd cpython-3.13.2/ && \ +# Install Python without GIL +RUN wget https://github.com/python/cpython/archive/refs/tags/v${python_patch_version}.tar.gz && \ + tar -xzf v${python_patch_version}.tar.gz && \ + rm v${python_patch_version}.tar.gz && \ + cd cpython-${python_patch_version}/ && \ ./configure --disable-gil --with-ensurepip && \ make -j && \ make install && \ cd ../ && \ - rm -rf cpython-3.13.2/ + rm -rf cpython-${python_patch_version}/ ENV ARROW_PYTHON_VENV /arrow-dev -RUN python3.13t -m venv ${ARROW_PYTHON_VENV} +RUN python${python_version}t -m venv ${ARROW_PYTHON_VENV} ENV PYTHON_GIL 0 ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}" diff --git a/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile index 5b27c786ff4..ab257b271e5 100644 --- a/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile +++ b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile @@ -26,24 +26,34 @@ FROM ${base} ARG python=3.13 +# hadolint ignore=SC1072 +RUN (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0") + SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] -RUN $filename = 'python-3.13.1-amd64.exe'; \ - $url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \ +RUN $version = $env:PYTHON_VERSION; \ + $filename = 'python-' + $version + '-amd64.exe'; \ + $url = 'https://www.python.org/ftp/python/' + $version + '/' + $filename; \ Invoke-WebRequest -Uri $url -OutFile $filename; \ Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait ENV PYTHON_CMD="py -${python}t" SHELL ["cmd", "/S", "/C"] -RUN %PYTHON_CMD% -m pip install -U pip setuptools - -COPY python/requirements-wheel-test-3.13t.txt C:/arrow/python/ -# Cython and Pandas wheels for 3.13 free-threaded are not released yet +RUN %PYTHON_CMD% -m pip install -U pip setuptools & \ + if "%python%"=="3.13" ( \ + setx REQUIREMENTS_FILE "requirements-wheel-test-3.13t.txt" \ + ) else ( \ + setx REQUIREMENTS_FILE "requirements-wheel-test.txt" \ + ) + +COPY python/requirements-wheel-test-3.13t.txt python/requirements-wheel-test.txt C:/arrow/python/ +# Cython and Pandas wheels for free-threaded are not released yet RUN %PYTHON_CMD% -m pip install \ --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ --pre \ --prefer-binary \ - -r C:/arrow/python/requirements-wheel-test-3.13t.txt + -r C:/arrow/python/%REQUIREMENTS_FILE% ENV PYTHON="${python}t" ENV PYTHON_GIL=0 diff --git a/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile b/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile index adbdccde71d..77a64fd5c24 100644 --- a/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile +++ b/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile @@ -26,9 +26,13 @@ FROM ${base} ARG python=3.13 +RUN (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0") + SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] -RUN $filename = 'python-3.13.1-amd64.exe'; \ - $url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \ +RUN $version = $env:PYTHON_VERSION; \ + $filename = 'python-' + $version + '-amd64.exe'; \ + $url = 'https://www.python.org/ftp/python/' + $version + '/' + $filename; \ Invoke-WebRequest -Uri $url -OutFile $filename; \ Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait diff --git a/ci/docker/python-wheel-musllinux.dockerfile b/ci/docker/python-wheel-musllinux.dockerfile index c1c4fd5bd28..d00d44bd093 100644 --- a/ci/docker/python-wheel-musllinux.dockerfile +++ b/ci/docker/python-wheel-musllinux.dockerfile @@ -82,6 +82,7 @@ RUN --mount=type=secret,id=github_repository_owner \ export GITHUB_REPOSITORY_OWNER=$(cat /run/secrets/github_repository_owner); \ export GITHUB_TOKEN=$(cat /run/secrets/github_token); \ export VCPKG_BINARY_SOURCES=$(cat /run/secrets/vcpkg_binary_sources); \ + export CMAKE_POLICY_VERSION_MINIMUM=3.5; \ arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg} && \ vcpkg install \ --clean-after-build \ @@ -110,10 +111,5 @@ RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-${PYTHON_ABI_TAG SHELL ["/bin/bash", "-i", "-c", "-l"] ENTRYPOINT ["/bin/bash", "-i", "-c", "-l"] -# Remove once there are released Cython wheels for 3.13 free-threaded available -RUN if [ "${python_abi_tag}" = "cp313t" ]; then \ - pip install cython --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" --prefer-binary ; \ - fi - COPY python/requirements-wheel-build.txt /arrow/python/ RUN pip install -r /arrow/python/requirements-wheel-build.txt diff --git a/ci/docker/python-wheel-windows-test-vs2022.dockerfile b/ci/docker/python-wheel-windows-test-vs2022.dockerfile index a520fac6215..1bed37eb001 100644 --- a/ci/docker/python-wheel-windows-test-vs2022.dockerfile +++ b/ci/docker/python-wheel-windows-test-vs2022.dockerfile @@ -30,8 +30,9 @@ FROM ${base} ARG python=3.10 RUN (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PYTHON_CMD "py -3.10") & \ (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PYTHON_CMD "py -3.11") & \ - (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.8" && setx PYTHON_CMD "py -3.12") & \ - (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1" && setx PYTHON_CMD "py -3.13") + (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.10" && setx PYTHON_CMD "py -3.12") & \ + (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.7" && setx PYTHON_CMD "py -3.13") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0" && setx PYTHON_CMD "py -3.14") # hadolint ignore=DL3059 RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% diff --git a/ci/docker/python-wheel-windows-vs2022.dockerfile b/ci/docker/python-wheel-windows-vs2022.dockerfile index c15970ca9b4..04750ff44c4 100644 --- a/ci/docker/python-wheel-windows-vs2022.dockerfile +++ b/ci/docker/python-wheel-windows-vs2022.dockerfile @@ -25,8 +25,9 @@ FROM ${base} ARG python=3.10 RUN (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PYTHON_CMD "py -3.10") & \ (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PYTHON_CMD "py -3.11") & \ - (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.8" && setx PYTHON_CMD "py -3.12") & \ - (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1" && setx PYTHON_CMD "py -3.13") + (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.10" && setx PYTHON_CMD "py -3.12") & \ + (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.7" && setx PYTHON_CMD "py -3.13") & \ + (if "%python%"=="3.14" setx PYTHON_VERSION "3.14.0" && setx PYTHON_CMD "py -3.14") RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% RUN %PYTHON_CMD% -m pip install -U pip setuptools diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index 2a90a5637d4..d38dd418e29 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \ git \ libssl-dev \ libcurl4-openssl-dev \ + patch \ python3-pip \ python3-venv \ tzdata \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 44c1a452c17..88a27efe335 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -107,6 +107,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile index a1fd178a2c7..5e114d5dcd9 100644 --- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \ git \ libssl-dev \ libcurl4-openssl-dev \ + patch \ python3-pip \ python3-venv \ tzdata \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 6bc49a4c842..0347d452d7b 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -108,6 +108,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index bf902c1cb64..9b53ba99704 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=21.0.0.9000 +pkgver=22.0.0 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 3a4431239f1..fd550d8fb08 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -174,6 +174,11 @@ elif [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ ${ARROW_CMAKE_ARGS} \ ${source_dir} +elif [ -n "${CMAKE_PRESET}" ]; then + cmake \ + --preset="${CMAKE_PRESET}" \ + ${ARROW_CMAKE_ARGS} \ + ${source_dir} else cmake \ -Dabsl_SOURCE=${absl_SOURCE:-} \ @@ -308,10 +313,14 @@ fi popd if [ -x "$(command -v ldconfig)" ]; then - if [ -x "$(command -v sudo)" ]; then - SUDO=sudo - else + if [ "$(id --user)" -eq 0 ]; then SUDO= + else + if [ -x "$(command -v sudo)" ]; then + SUDO=sudo + else + SUDO= + fi fi ${SUDO} ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} fi diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 3d88b6f1cd5..4243e78bca7 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -47,6 +47,12 @@ ctest_options=() if ! type azurite >/dev/null 2>&1; then exclude_tests+=("arrow-azurefs-test") fi +if ! type storage-testbench >/dev/null 2>&1; then + exclude_tests+=("arrow-gcsfs-test") +fi +if ! type minio >/dev/null 2>&1; then + exclude_tests+=("arrow-s3fs-test") +fi case "$(uname)" in Linux) n_jobs=$(nproc) @@ -114,6 +120,41 @@ else "$@" fi +# This is for testing find_package(Arrow). +# +# Note that this is not a perfect solution. We should improve this +# later. +# +# * This is ad-hoc +# * This doesn't test other CMake packages such as ArrowDataset +if [ "${ARROW_USE_MESON:-OFF}" = "OFF" ] && \ + [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ] && \ + [ "${ARROW_USE_ASAN:-OFF}" = "OFF" ]; then + CMAKE_PREFIX_PATH="${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}}" + case "$(uname)" in + MINGW*) + # /lib/cmake/ isn't searched on Windows. + # + # See also: + # https://cmake.org/cmake/help/latest/command/find_package.html#config-mode-search-procedure + CMAKE_PREFIX_PATH+="/lib/cmake/" + ;; + esac + if [ -n "${VCPKG_ROOT}" ] && [ -n "${VCPKG_DEFAULT_TRIPLET}" ]; then + CMAKE_PREFIX_PATH+=";${VCPKG_ROOT}/installed/${VCPKG_DEFAULT_TRIPLET}" + fi + cmake \ + -S "${source_dir}/examples/minimal_build" \ + -B "${build_dir}/examples/minimal_build" \ + -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" + cmake --build "${build_dir}/examples/minimal_build" + pushd "${source_dir}/examples/minimal_build" + # PATH= is for Windows. + PATH="${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}}/bin:${PATH}" \ + "${build_dir}/examples/minimal_build/arrow-example" + popd +fi + if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then examples=$(find "${binary_output_dir}" -executable -name "*example") if [ "${examples}" == "" ]; then diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh index 915c58bbb47..fe0c4bcb931 100755 --- a/ci/scripts/install_python.sh +++ b/ci/scripts/install_python.sh @@ -27,9 +27,11 @@ platforms=([windows]=Windows declare -A versions versions=([3.10]=3.10.11 [3.11]=3.11.9 - [3.12]=3.12.9 - [3.13]=3.13.2 - [3.13t]=3.13.2) + [3.12]=3.12.10 + [3.13]=3.13.7 + [3.13t]=3.13.7 + [3.14]=3.14.0 + [3.14t]=3.14.0) if [ "$#" -ne 2 ]; then echo "Usage: $0 " @@ -50,7 +52,9 @@ if [ "$platform" = "macOS" ]; then wget "https://www.python.org/ftp/python/${full_version}/${fname}" echo "Installing Python..." - if [[ $2 == "3.13t" ]]; then + if [[ $2 == "3.13t" ]] || [[ $2 == "3.14t" ]]; then + # Extract the base version without 't' suffix + base_version="${version%t}" # See https://github.com/python/cpython/issues/120098#issuecomment-2151122033 for more info on this. cat > ./choicechanges.plist < @@ -63,7 +67,7 @@ if [ "$platform" = "macOS" ]; then choiceAttribute selected choiceIdentifier - org.python.Python.PythonTFramework-3.13 + org.python.Python.PythonTFramework-${base_version} @@ -76,8 +80,9 @@ EOF rm "$fname" python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}" - if [[ $2 == "3.13t" ]]; then - python="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t" + if [[ $2 == "3.13t" ]] || [[ $2 == "3.14t" ]]; then + base_version="${version%t}" + python="/Library/Frameworks/PythonT.framework/Versions/${base_version}/bin/python${base_version}t" fi echo "Installing Pip..." diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index 0ce9343a7f7..b4634070a87 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -51,6 +51,7 @@ case "${target}" in packages+=("${MINGW_PACKAGE_PREFIX}-xsimd") packages+=("${MINGW_PACKAGE_PREFIX}-uriparser") packages+=("${MINGW_PACKAGE_PREFIX}-zstd") + packages+=("patch") if [ "${target}" != "ruby" ]; then # We don't update the exiting packages for Ruby because diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh index e18c0c740ca..2b8ee7be745 100755 --- a/ci/scripts/python_wheel_unix_test.sh +++ b/ci/scripts/python_wheel_unix_test.sh @@ -31,9 +31,9 @@ source_dir=${1} : "${ARROW_AZURE:=ON}" : "${ARROW_FLIGHT:=ON}" : "${ARROW_GCS:=ON}" +: "${CHECK_IMPORTS:=ON}" : "${ARROW_S3:=ON}" : "${ARROW_SUBSTRAIT:=ON}" -: "${CHECK_IMPORTS:=ON}" : "${CHECK_WHEEL_CONTENT:=ON}" : "${CHECK_UNITTESTS:=ON}" : "${INSTALL_PYARROW:=ON}" @@ -106,10 +106,10 @@ is_free_threaded() { if [ "${CHECK_UNITTESTS}" == "ON" ]; then # Install testing dependencies - if [ "$(is_free_threaded)" = "ON" ]; then - echo "Free-threaded Python build detected" + if [ "$(is_free_threaded)" = "ON" ] && [[ "${PYTHON:-}" == *"3.13"* ]]; then + echo "Free-threaded Python 3.13 build detected" python -m pip install -U -r "${source_dir}/python/requirements-wheel-test-3.13t.txt" - elif [ "$(is_free_threaded)" = "OFF" ]; then + else echo "Regular Python build detected" python -m pip install -U -r "${source_dir}/python/requirements-wheel-test.txt" fi diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 0107153f018..5dfe61a0c60 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -19,6 +19,7 @@ "re2", "snappy", "utf8proc", + "xsimd", "zlib", "zstd", { diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e805694f522..14cf1b91411 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -96,7 +96,7 @@ if(POLICY CMP0170) cmake_policy(SET CMP0170 NEW) endif() -set(ARROW_VERSION "22.0.0-SNAPSHOT") +set(ARROW_VERSION "22.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index c9e2444389f..e1cad83ae3f 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -444,7 +444,8 @@ "CMAKE_CXX_COMPILER": "clang++", "ARROW_IPC": "ON", "ARROW_PARQUET": "ON", - "ARROW_FUZZING": "ON" + "ARROW_FUZZING": "ON", + "ARROW_WITH_SNAPPY": "ON" } }, { @@ -582,6 +583,37 @@ "displayName": "Benchmarking build with everything enabled", "cacheVariables": {} }, + { + "name": "ninja-release-jni-linux", + "inherits": [ + "base-release" + ], + "displayName": "Build for JNI on Linux", + "cacheVariables": { + "ARROW_ACERO": "ON", + "ARROW_BUILD_SHARED": "OFF", + "ARROW_BUILD_STATIC": "ON", + "ARROW_CSV": "ON", + "ARROW_DATASET": "ON", + "ARROW_DEPENDENCY_SOURCE": "VCPKG", + "ARROW_DEPENDENCY_USE_SHARED": "OFF", + "ARROW_GANDIVA": "ON", + "ARROW_GANDIVA_STATIC_LIBSTDCPP": "ON", + "ARROW_GCS": "ON", + "ARROW_JSON": "ON", + "ARROW_ORC": "ON", + "ARROW_PARQUET": "ON", + "ARROW_RPATH_ORIGIN": "ON", + "ARROW_S3": "ON", + "ARROW_SUBSTRAIT": "ON", + "PARQUET_BUILD_EXAMPLES": "OFF", + "PARQUET_BUILD_EXECUTABLES": "OFF", + "PARQUET_REQUIRE_ENCRYPTION": "OFF", + "VCPKG_MANIFEST_MODE": "OFF", + "VCPKG_ROOT": "$env{VCPKG_ROOT}", + "VCPKG_TARGET_TRIPLET": "$env{VCPKG_TARGET_TRIPLET}" + } + }, { "name": "ninja-release-jni-macos", "inherits": [ diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5b8b039357e..7b8cef5fb5e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2275,9 +2275,9 @@ if(ARROW_MIMALLOC) endif() set(MIMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/mimalloc_ep/src/mimalloc_ep") - set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/include/mimalloc-2.2") + set(MIMALLOC_INCLUDE_DIR "${MIMALLOC_PREFIX}/include") set(MIMALLOC_STATIC_LIB - "${MIMALLOC_PREFIX}/lib/mimalloc-2.2/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${MIMALLOC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${MIMALLOC_LIB_BASE_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" ) set(MIMALLOC_C_FLAGS ${EP_C_FLAGS}) @@ -2286,20 +2286,31 @@ if(ARROW_MIMALLOC) set(MIMALLOC_C_FLAGS "${MIMALLOC_C_FLAGS} -DERROR_COMMITMENT_MINIMUM=635") endif() + set(MIMALLOC_PATCH_COMMAND "") + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + find_program(PATCH patch REQUIRED) + set(MIMALLOC_PATCH_COMMAND ${PATCH} -p1 -i + ${CMAKE_CURRENT_LIST_DIR}/mimalloc-1138.patch) + endif() + set(MIMALLOC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_C_FLAGS=${MIMALLOC_C_FLAGS}" "-DCMAKE_INSTALL_PREFIX=${MIMALLOC_PREFIX}" + -DMI_INSTALL_TOPLEVEL=ON -DMI_OVERRIDE=OFF -DMI_LOCAL_DYNAMIC_TLS=ON -DMI_BUILD_OBJECT=OFF -DMI_BUILD_SHARED=OFF - -DMI_BUILD_TESTS=OFF) + -DMI_BUILD_TESTS=OFF + # GH-47229: Force mimalloc to generate armv8.0 binary + -DMI_NO_OPT_ARCH=ON) externalproject_add(mimalloc_ep ${EP_COMMON_OPTIONS} URL ${MIMALLOC_SOURCE_URL} URL_HASH "SHA256=${ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM}" + PATCH_COMMAND ${MIMALLOC_PATCH_COMMAND} CMAKE_ARGS ${MIMALLOC_CMAKE_ARGS} BUILD_BYPRODUCTS "${MIMALLOC_STATIC_LIB}") diff --git a/cpp/cmake_modules/mimalloc-1138.patch b/cpp/cmake_modules/mimalloc-1138.patch new file mode 100644 index 00000000000..1ffa4bffbba --- /dev/null +++ b/cpp/cmake_modules/mimalloc-1138.patch @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +Fix for https://github.com/microsoft/mimalloc/issues/1138 + +diff --git a/src/arena.c b/src/arena.c +index b26f4442..d7e99b55 100644 +--- a/src/arena.c ++++ b/src/arena.c +@@ -797,6 +797,9 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc + else { + page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment); + } ++ if mi_unlikely(page == NULL) { ++ return NULL; ++ } + // mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); + mi_assert_internal(_mi_ptr_page(page)==page); diff --git a/cpp/meson.build b/cpp/meson.build index 81143ed1e28..194da0ccef2 100644 --- a/cpp/meson.build +++ b/cpp/meson.build @@ -19,7 +19,7 @@ project( 'arrow', 'cpp', 'c', - version: '22.0.0-SNAPSHOT', + version: '22.0.0', license: 'Apache-2.0', meson_version: '>=1.3.0', default_options: ['c_std=gnu11,c11', 'warning_level=2', 'cpp_std=c++17'], diff --git a/cpp/src/arrow/ArrowConfig.cmake.in b/cpp/src/arrow/ArrowConfig.cmake.in index ee462663d6b..cbadad4d742 100644 --- a/cpp/src/arrow/ArrowConfig.cmake.in +++ b/cpp/src/arrow/ArrowConfig.cmake.in @@ -124,11 +124,13 @@ if(TARGET Arrow::arrow_static AND NOT TARGET Arrow::arrow_bundled_dependencies) # https://cmake.org/cmake/help/latest/policy/CMP0057.html cmake_policy(PUSH) cmake_policy(SET CMP0057 NEW) - if("AWS::aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) + if("aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) if(APPLE) find_library(CORE_FOUNDATION CoreFoundation) target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${CORE_FOUNDATION}) + find_library(NETWORK Network) + target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${NETWORK}) find_library(SECURITY Security) target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${SECURITY}) elseif(WIN32) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 771505f8c90..e299e8f6167 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -780,7 +780,6 @@ if(ARROW_COMPUTE) compute/kernels/scalar_temporal_binary.cc compute/kernels/scalar_temporal_unary.cc compute/kernels/scalar_validity.cc - compute/kernels/temporal_internal.cc compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_cumulative_ops.cc diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h b/cpp/src/arrow/compute/kernels/temporal_internal.h index 68e09e91d72..3674c233dc9 100644 --- a/cpp/src/arrow/compute/kernels/temporal_internal.h +++ b/cpp/src/arrow/compute/kernels/temporal_internal.h @@ -80,7 +80,7 @@ inline int64_t GetQuarter(const year_month_day& ymd) { return static_cast((static_cast(ymd.month()) - 1) / 3); } -Result LocateZone(const std::string_view timezone); +ARROW_EXPORT Result LocateZone(const std::string_view timezone); static inline const std::string& GetInputTimezone(const DataType& type) { static const std::string no_timezone = ""; diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index 1872f96df6a..5590ba41c91 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -525,7 +525,6 @@ if needs_compute 'compute/kernels/scalar_temporal_binary.cc', 'compute/kernels/scalar_temporal_unary.cc', 'compute/kernels/scalar_validity.cc', - 'compute/kernels/temporal_internal.cc', 'compute/kernels/util_internal.cc', 'compute/kernels/vector_array_sort.cc', 'compute/kernels/vector_cumulative_ops.cc', diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc index e5632e47253..40538bd9862 100644 --- a/cpp/src/arrow/testing/process.cc +++ b/cpp/src/arrow/testing/process.cc @@ -176,7 +176,7 @@ class Process::Impl { for (const auto& kv : process::environment::current()) { env[kv.key()] = process::environment::value(kv.value()); } - env["PATH"] = process::environment::value(current_exe.parent_path()); + env["PATH"] = process::environment::value(current_exe.parent_path().string()); executable_ = process::environment::find_executable(name, env); # else executable_ = process::search_path(name, {current_exe.parent_path()}); diff --git a/cpp/src/arrow/util/rle_encoding_internal.h b/cpp/src/arrow/util/rle_encoding_internal.h index c231c9a63eb..2420270f3ab 100644 --- a/cpp/src/arrow/util/rle_encoding_internal.h +++ b/cpp/src/arrow/util/rle_encoding_internal.h @@ -657,13 +657,14 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const const auto header_bytes = bit_util::ParseLeadingLEB128(data_, kMaxSize, &run_len_type); if (ARROW_PREDICT_FALSE(header_bytes == 0)) { - // Malfomrmed LEB128 data + // Malformed LEB128 data return {0, ControlFlow::Break}; } const bool is_bit_packed = run_len_type & 1; const uint32_t count = run_len_type >> 1; if (is_bit_packed) { + // Bit-packed run constexpr auto kMaxCount = bit_util::CeilDiv(internal::max_size_for_v, 8); if (ARROW_PREDICT_FALSE(count == 0 || count > kMaxCount)) { // Illegal number of encoded values @@ -672,17 +673,21 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const ARROW_DCHECK_LT(static_cast(count) * 8, internal::max_size_for_v); + // Count Already divided by 8 for byte size calculations + const auto bytes_read = header_bytes + static_cast(count) * value_bit_width_; + if (ARROW_PREDICT_FALSE(bytes_read > data_size_)) { + // Bit-packed run would overflow data buffer + return {0, ControlFlow::Break}; + } const auto values_count = static_cast(count * 8); - // Count Already divided by 8 - const auto bytes_read = - header_bytes + static_cast(count) * value_bit_width_; auto control = handler.OnBitPackedRun( BitPackedRun(data_ + header_bytes, values_count, value_bit_width_)); - return {bytes_read, control}; + return {static_cast(bytes_read), control}; } + // RLE run if (ARROW_PREDICT_FALSE(count == 0)) { // Illegal number of encoded values return {0, ControlFlow::Break}; @@ -694,6 +699,11 @@ auto RleBitPackedParser::PeekImpl(Handler&& handler) const ARROW_DCHECK_LT(value_bytes, internal::max_size_for_v); const auto bytes_read = header_bytes + static_cast(value_bytes); + if (ARROW_PREDICT_FALSE(bytes_read > data_size_)) { + // RLE run would overflow data buffer + return {0, ControlFlow::Break}; + } + auto control = handler.OnRleRun(RleRun(data_ + header_bytes, values_count, value_bit_width_)); @@ -1079,7 +1089,6 @@ auto RleBitPackedDecoder::GetSpaced(Converter converter, // There may be remaining null if they are not greedily filled by either decoder calls check_and_handle_fully_null_remaining(); - ARROW_DCHECK(batch.is_done() || exhausted()); return batch.total_read(); } diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index 3fbe80d4cc3..dc1ac9dfd26 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -389,6 +389,10 @@ TEST_F(TestProjector, TestAllIntTypes) { } TEST_F(TestProjector, TestExtendedMath) { +#ifdef __aarch64__ + GTEST_SKIP() << "Failed on aarch64 with 'JIT session error: Symbols not found: [ " + "__multf3, __subtf3, __trunctfdf2, __extenddftf2, __divtf3 ]'"; +#endif // schema for input fields auto field0 = arrow::field("f0", arrow::float64()); auto field1 = arrow::field("f1", arrow::float64()); diff --git a/cpp/src/parquet/decoder.cc b/cpp/src/parquet/decoder.cc index 46d1c201e92..b6d79665621 100644 --- a/cpp/src/parquet/decoder.cc +++ b/cpp/src/parquet/decoder.cc @@ -2082,9 +2082,12 @@ class DeltaByteArrayDecoderImpl : public TypedDecoderImpl { int64_t valid_bits_offset, typename EncodingTraits::Accumulator* out, int* out_num_values) { - std::vector values(num_values); + std::vector values(num_values - null_count); const int num_valid_values = GetInternal(values.data(), num_values - null_count); - DCHECK_EQ(num_values - null_count, num_valid_values); + if (ARROW_PREDICT_FALSE(num_values - null_count != num_valid_values)) { + throw ParquetException("Expected to decode ", num_values - null_count, + " values, but decoded ", num_valid_values, " values."); + } auto visit_binary_helper = [&](auto* helper) { auto values_ptr = reinterpret_cast(values.data()); diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index c33e5ccf4a5..2950a7df70f 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1580,7 +1580,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) { LogicalType::EdgeInterpolationAlgorithm::KARNEY), "Geography(crs=srid:1234, algorithm=karney)", R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "karney"})"}, - {LogicalType::Variant(), "Variant", R"({"Type": "Variant"})"}, + {LogicalType::Variant(), "Variant(1)", R"({"Type": "Variant", "SpecVersion": 1})"}, + {LogicalType::Variant(2), "Variant(2)", R"({"Type": "Variant", "SpecVersion": 2})"}, {LogicalType::None(), "None", R"({"Type": "None"})"}, }; @@ -2353,6 +2354,37 @@ TEST(TestLogicalTypeSerialization, Roundtrips) { // Group nodes ... ConfirmGroupNodeRoundtrip("map", LogicalType::Map()); ConfirmGroupNodeRoundtrip("list", LogicalType::List()); + ConfirmGroupNodeRoundtrip("variant", LogicalType::Variant()); +} + +TEST(TestLogicalTypeSerialization, VariantSpecificationVersion) { + // Confirm that Variant logical type sets specification_version to expected value in + // thrift serialization + constexpr int8_t spec_version = 2; + auto metadata = PrimitiveNode::Make("metadata", Repetition::REQUIRED, Type::BYTE_ARRAY); + auto value = PrimitiveNode::Make("value", Repetition::REQUIRED, Type::BYTE_ARRAY); + NodePtr variant_node = + GroupNode::Make("variant", Repetition::REQUIRED, {metadata, value}, + LogicalType::Variant(spec_version)); + + // Verify variant logical type + auto logical_type = variant_node->logical_type(); + ASSERT_TRUE(logical_type->is_variant()); + const auto& variant_type = checked_cast(*logical_type); + ASSERT_EQ(variant_type.spec_version(), spec_version); + + // Verify thrift serialization + std::vector elements; + ToParquet(reinterpret_cast(variant_node.get()), &elements); + + // Verify that logicalType is set and is VARIANT + ASSERT_EQ(elements[0].name, "variant"); + ASSERT_TRUE(elements[0].__isset.logicalType); + ASSERT_TRUE(elements[0].logicalType.__isset.VARIANT); + + // Verify that specification_version is set properly + ASSERT_TRUE(elements[0].logicalType.VARIANT.__isset.specification_version); + ASSERT_EQ(elements[0].logicalType.VARIANT.specification_version, spec_version); } } // namespace schema diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index 9f5003b79e5..f545f512201 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -591,7 +591,12 @@ std::shared_ptr LogicalType::FromThrift( return GeographyLogicalType::Make(std::move(crs), algorithm); } else if (type.__isset.VARIANT) { - return VariantLogicalType::Make(); + int8_t spec_version = kVariantSpecVersion; + if (type.VARIANT.__isset.specification_version) { + spec_version = type.VARIANT.specification_version; + } + + return VariantLogicalType::Make(spec_version); } else { // Sentinel type for one we do not recognize return UndefinedLogicalType::Make(); @@ -659,8 +664,8 @@ std::shared_ptr LogicalType::Geography( return GeographyLogicalType::Make(std::move(crs), algorithm); } -std::shared_ptr LogicalType::Variant() { - return VariantLogicalType::Make(); +std::shared_ptr LogicalType::Variant(int8_t spec_version) { + return VariantLogicalType::Make(spec_version); } std::shared_ptr LogicalType::None() { return NoLogicalType::Make(); } @@ -1958,16 +1963,53 @@ class LogicalType::Impl::Variant final : public LogicalType::Impl::Incompatible, public: friend class VariantLogicalType; - OVERRIDE_TOSTRING(Variant) - OVERRIDE_TOTHRIFT(VariantType, VARIANT) + std::string ToString() const override; + std::string ToJSON() const override; + format::LogicalType ToThrift() const override; + + int8_t spec_version() const { return spec_version_; } private: - Variant() + explicit Variant(const int8_t spec_version) : LogicalType::Impl(LogicalType::Type::VARIANT, SortOrder::UNKNOWN), - LogicalType::Impl::Inapplicable() {} + LogicalType::Impl::Inapplicable() { + this->spec_version_ = spec_version; + } + + int8_t spec_version_; }; -GENERATE_MAKE(Variant) +int8_t VariantLogicalType::spec_version() const { + return (dynamic_cast(*impl_)).spec_version(); +} + +std::string LogicalType::Impl::Variant::ToString() const { + std::stringstream type; + type << "Variant(" << static_cast(spec_version_) << ")"; + return type.str(); +} + +std::string LogicalType::Impl::Variant::ToJSON() const { + std::stringstream json; + json << R"({"Type": "Variant", "SpecVersion": )" << static_cast(spec_version_) + << "}"; + + return json.str(); +} + +format::LogicalType LogicalType::Impl::Variant::ToThrift() const { + format::LogicalType type; + format::VariantType variant_type; + variant_type.__set_specification_version(spec_version_); + type.__set_VARIANT(variant_type); + return type; +} + +std::shared_ptr VariantLogicalType::Make(const int8_t spec_version) { + auto logical_type = std::shared_ptr(new VariantLogicalType()); + logical_type->impl_.reset(new LogicalType::Impl::Variant(spec_version)); + return logical_type; +} class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible, public LogicalType::Impl::UniversalApplicable { diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index c2040e555fd..7e8a18fc94d 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -178,6 +178,9 @@ class PARQUET_EXPORT LogicalType { KARNEY = 5 }; + /// \brief The latest supported Variant specification version by this library + static constexpr int8_t kVariantSpecVersion = 1; + /// \brief If possible, return a logical type equivalent to the given legacy /// converted type (and decimal metadata if applicable). static std::shared_ptr FromConvertedType( @@ -224,7 +227,8 @@ class PARQUET_EXPORT LogicalType { static std::shared_ptr BSON(); static std::shared_ptr UUID(); static std::shared_ptr Float16(); - static std::shared_ptr Variant(); + static std::shared_ptr Variant( + int8_t specVersion = kVariantSpecVersion); static std::shared_ptr Geometry(std::string crs = ""); @@ -495,7 +499,10 @@ class PARQUET_EXPORT GeographyLogicalType : public LogicalType { /// \brief Allowed for group nodes only. class PARQUET_EXPORT VariantLogicalType : public LogicalType { public: - static std::shared_ptr Make(); + static std::shared_ptr Make( + int8_t specVersion = kVariantSpecVersion); + + int8_t spec_version() const; private: VariantLogicalType() = default; diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 9f16db79f12..10aad92785a 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -82,8 +82,8 @@ ARROW_JEMALLOC_BUILD_VERSION=5.3.0 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa ARROW_LZ4_BUILD_VERSION=v1.10.0 ARROW_LZ4_BUILD_SHA256_CHECKSUM=537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b -ARROW_MIMALLOC_BUILD_VERSION=v2.2.4 -ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM=754a98de5e2912fddbeaf24830f982b4540992f1bab4a0a8796ee118e0752bda +ARROW_MIMALLOC_BUILD_VERSION=v3.1.5 +ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM=1c6949032069d5ebea438ec5cedd602d06f40a92ddf0f0d9dcff0993e5f6635c ARROW_NLOHMANN_JSON_BUILD_VERSION=v3.12.0 ARROW_NLOHMANN_JSON_BUILD_SHA256_CHECKSUM=4b92eb0c06d10683f7447ce9406cb97cd4b453be18d7279320f7b2f025c10187 ARROW_OPENTELEMETRY_BUILD_VERSION=v1.21.0 diff --git a/cpp/valgrind.supp b/cpp/valgrind.supp index 2289e819e3d..6a2ad3f6b3b 100644 --- a/cpp/valgrind.supp +++ b/cpp/valgrind.supp @@ -82,3 +82,15 @@ fun:curl_share_init fun:*Azure*CurlConnection* } +{ + :leak on library initialization + Memcheck:Leak + ... + fun:*google*protobuf*AddDescriptorsRunner* +} +{ + :leak on library initialization + Memcheck:Leak + ... + fun:*google*protobuf*InternalRegisterGeneratedFile* +} diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 68f20663b59..c1d28a2a559 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "22.0.0-SNAPSHOT", + "version-string": "22.0.0", "dependencies": [ "abseil", { diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index 7bdc53a3df6..34ea42d6d2e 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -433,16 +433,24 @@ def _push(service): else: return self._execute_compose(*args, service['name']) + service = self.config.get(service_name) + if user is not None: + login_args = ['--username', user, '--password-stdin'] + login_kwargs = {'input': password.encode()} + image = service['image'] + # [[HOST[:PORT]/]NAMESPACE/]REPOSITORY[:TAG] + components = image.split('/', 3) + if len(components) == 3: + server = components[0] + login_args.append(server) try: - # TODO(kszucs): have an option for a prompt - self._execute_docker('login', '-u', user, '-p', password) + self._execute_docker('login', *login_args, **login_kwargs) except subprocess.CalledProcessError: # hide credentials msg = f'Failed to push `{service_name}`, check the passed credentials' raise RuntimeError(msg) from None - service = self.config.get(service_name) for ancestor in service['ancestors']: _push(self.config.get(ancestor)) _push(service) diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index e4a278712ea..631932d0fa2 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -140,39 +140,39 @@ services: conda-cpp: - image: org/conda-cpp + image: ${REPO}:conda-cpp build: context: . dockerfile: ci/docker/conda-cpp.dockerfile conda-python: - image: org/conda-python + image: ${REPO}:conda-python build: context: . dockerfile: ci/docker/conda-cpp.dockerfile args: python: 3.8 conda-python-pandas: - image: org/conda-python-pandas + image: ${REPO}:conda-python-pandas build: context: . dockerfile: ci/docker/conda-python-pandas.dockerfile conda-python-dask: - image: org/conda-python-dask + image: ${REPO}:conda-python-dask ubuntu-cpp: - image: org/ubuntu-cpp + image: ${REPO}:ubuntu-cpp build: context: . dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile ubuntu-cpp-cmake32: - image: org/ubuntu-cpp-cmake32 + image: ${REPO}:ubuntu-cpp-cmake32 ubuntu-c-glib: - image: org/ubuntu-c-glib + image: ${REPO}:ubuntu-c-glib environment: <<: [*sccache] ubuntu-ruby: - image: org/ubuntu-ruby + image: ${REPO}:ubuntu-ruby ubuntu-cuda: - image: org/ubuntu-cuda + image: ${REPO}:ubuntu-cuda environment: CUDA_ENV: 1 OTHER_ENV: 2 @@ -182,6 +182,7 @@ """ arrow_compose_env = { + 'REPO': 'apache/arrow', 'UBUNTU': '20.04', # overridden below 'PYTHON': '3.8', 'PANDAS': 'latest', @@ -484,7 +485,7 @@ def test_compose_run_with_resource_limits(arrow_compose_path): "--cpuset-cpus=0,1", "--memory=7g", "--memory-swap=7g", - "org/conda-cpp" + "apache/arrow:conda-cpp" ]), ] compose = DockerCompose(arrow_compose_path) @@ -493,10 +494,28 @@ def test_compose_run_with_resource_limits(arrow_compose_path): def test_compose_push(arrow_compose_path): - compose = DockerCompose(arrow_compose_path, params=dict(PYTHON='3.9')) + compose = DockerCompose(arrow_compose_path, params=dict(PYTHON="3.9")) + expected_env = PartialEnv(PYTHON="3.9") + expected_calls = [ + mock.call(["docker", "login", "--username", "user", + "--password-stdin"], input=b"pass", check=True), + ] + for image in ["conda-cpp", "conda-python", "conda-python-pandas"]: + expected_calls.append( + mock.call(["docker", "compose", f"--file={compose.config.path}", + "push", image], check=True, env=expected_env) + ) + with assert_subprocess_calls(expected_calls): + compose.push("conda-python-pandas", user="user", password="pass") + + +def test_compose_push_custom_server(arrow_compose_path): + compose = DockerCompose(arrow_compose_path, params=dict( + PYTHON="3.9", REPO="ghcr.io/apache/arrow-dev")) expected_env = PartialEnv(PYTHON="3.9") expected_calls = [ - mock.call(["docker", "login", "-u", "user", "-p", "pass"], check=True), + mock.call(["docker", "login", "--username", "user", "--password-stdin", + "ghcr.io"], input=b"pass", check=True), ] for image in ["conda-cpp", "conda-python", "conda-python-pandas"]: expected_calls.append( @@ -504,7 +523,7 @@ def test_compose_push(arrow_compose_path): "push", image], check=True, env=expected_env) ) with assert_subprocess_calls(expected_calls): - compose.push('conda-python-pandas', user='user', password='pass') + compose.push("conda-python-pandas", user="user", password="pass") def test_compose_error(arrow_compose_path): @@ -533,7 +552,7 @@ def test_image_with_gpu(arrow_compose_path): "-e", "CUDA_ENV=1", "-e", "OTHER_ENV=2", "-v", "/host:/container", - "org/ubuntu-cuda", + "apache/arrow:ubuntu-cuda", "/bin/bash", "-c", "echo 1 > /tmp/dummy && cat /tmp/dummy", ] ] @@ -560,7 +579,7 @@ def test_service_info(arrow_compose_path): compose = DockerCompose(arrow_compose_path) service = compose.config.raw_config["services"]["conda-cpp"] assert compose.info(service) == [ - " image: org/conda-cpp", + " image: ${REPO}:conda-cpp", " build", " context: .", " dockerfile: ci/docker/conda-cpp.dockerfile" diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e202c7b1e1e..56c671910f0 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -852,7 +852,7 @@ test_linux_wheels() { local arch="x86_64" fi - local python_versions="${TEST_PYTHON_VERSIONS:-3.10 3.11 3.12 3.13}" + local python_versions="${TEST_PYTHON_VERSIONS:-3.10 3.11 3.12 3.13 3.14}" local platform_tags="${TEST_WHEEL_PLATFORM_TAGS:-manylinux_2_28_${arch}}" if [ "${SOURCE_KIND}" != "local" ]; then @@ -891,11 +891,11 @@ test_macos_wheels() { # apple silicon processor if [ "$(uname -m)" = "arm64" ]; then - local python_versions="3.10 3.11 3.12 3.13" + local python_versions="3.10 3.11 3.12 3.13 3.14" local platform_tags="macosx_12_0_arm64" local check_flight=OFF else - local python_versions="3.10 3.11 3.12 3.13" + local python_versions="3.10 3.11 3.12 3.13 3.14" local platform_tags="macosx_12_0_x86_64" fi diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index dd6f25283a5..1c87c353de8 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0-SNAPSHOT/apache-arrow-22.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index 3cbd808357c..06949bd10f3 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0-SNAPSHOT/apache-arrow-22.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-22.0.0/apache-arrow-22.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git", branch: "main" diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 86cacd308e5..8d0ec6a6a83 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (22.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Mon, 20 Oct 2025 14:48:34 -0000 + apache-arrow-apt-source (21.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index 8e75611889c..2053b950622 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Mon Oct 20 2025 Raúl Cumplido - 22.0.0-1 +- New upstream release. + * Fri Jul 11 2025 Bryce Mecum - 21.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 01b71214f07..8394a2c05d1 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (22.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Mon, 20 Oct 2025 14:48:34 -0000 + apache-arrow (21.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 6afa6d25421..32bd076e821 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -931,6 +931,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Mon Oct 20 2025 Raúl Cumplido - 22.0.0-1 +- New upstream release. + * Fri Jul 11 2025 Bryce Mecum - 21.0.0-1 - New upstream release. diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml index 09822ed2ffc..871e044fa21 100644 --- a/dev/tasks/python-wheels/github.linux.yml +++ b/dev/tasks/python-wheels/github.linux.yml @@ -42,16 +42,25 @@ jobs: {% endif %} PYTHON: "{{ python_version }}" PYTHON_ABI_TAG: "{{ python_abi_tag }}" + {% if python_version == "3.14" %} + PYTHON_IMAGE_TAG: "3.14" + PYTHON_PATCH_VERSION: "3.14.0" + {% elif python_version == "3.13" %} PYTHON_IMAGE_TAG: "{{ python_version }}" + PYTHON_PATCH_VERSION: "3.13.7" + {% else %} + PYTHON_IMAGE_TAG: "{{ python_version }}" + {% endif %} steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} {{ macros.github_login_dockerhub()|indent }} + # Detect free-threaded Python builds - name: Prepare run: | - if [ "${PYTHON_ABI_TAG}" = "cp313t" ]; then + if [[ "${PYTHON_ABI_TAG}" == *t ]]; then test_image_prefix=python-free-threaded else test_image_prefix=python diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index 140971626bf..ef8e90f4129 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -27,7 +27,7 @@ PYARROW_VERSION: "{{ arrow.no_rc_version }}" PYTHON_VERSION: "{{ python_version }}" PYTHON_ABI_TAG: "{{ python_abi_tag }}" - {% if python_abi_tag == "cp313t" %} + {% if python_abi_tag.endswith('t') %} PYTHON: "/Library/Frameworks/PythonT.framework/Versions/{{ python_version }}/bin/python{{ python_version }}t" {% else %} PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}" @@ -94,7 +94,7 @@ jobs: --x-feature=s3 - name: Install Python {{ python_version }} - run: sudo arrow/ci/scripts/install_python.sh macos {{ "3.13t" if python_abi_tag == "cp313t" else python_version }} + run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version + "t" if python_abi_tag.endswith('t') else python_version }} - name: Build Wheel run: | @@ -131,7 +131,7 @@ jobs: - name: Test Wheel env: PYTEST_ADDOPTS: "-k 'not test_cancellation'" - PYTHON_GIL: {{ 0 if python_abi_tag == "cp313t" else 1 }} + PYTHON_GIL: {{ 0 if python_abi_tag.endswith('t') else 1 }} run: | $PYTHON -m venv test-env source test-env/bin/activate diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 11831e646c3..749042779ed 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -184,7 +184,9 @@ tasks: ("3.11", "cp311", "cp311"), ("3.12", "cp312", "cp312"), ("3.13", "cp313", "cp313"), - ("3.13", "cp313", "cp313t")] %} + ("3.13", "cp313", "cp313t"), + ("3.14", "cp314", "cp314"), + ("3.14", "cp314", "cp314t")] %} {############################## Wheel Linux ##################################} @@ -555,7 +557,7 @@ tasks: UBUNTU: 22.04 image: ubuntu-cpp-emscripten -{% for python_version in ["3.10", "3.11", "3.12", "3.13"] %} +{% for python_version in ["3.10", "3.11", "3.12", "3.13", "3.14"] %} test-conda-python-{{ python_version }}: ci: github template: docker-tests/github.linux.yml @@ -733,12 +735,12 @@ tasks: ci: github template: r/github.linux.offline.build.yml - test-r-rhub-debian-gcc-release-custom-ccache: + test-r-rhub-ubuntu-gcc12-custom-ccache: ci: azure template: r/azure.linux.yml params: r_org: rhub - r_image: debian-gcc-release + r_image: ubuntu-gcc12 r_tag: latest r_custom_ccache: true diff --git a/docker-compose.yml b/docker-compose.yml index 6d3babd4311..937620f82ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -130,6 +130,7 @@ x-hierarchy: - conda-python-spark - conda-verify-rc - conan + - cpp-jni - debian-cpp: - debian-c-glib: - debian-ruby @@ -187,6 +188,8 @@ volumes: name: ${ARCH}-alpine-linux-ccache conda-ccache: name: ${ARCH}-conda-ccache + cpp-jni-ccache: + name: ${ARCH}-cpp-jni-ccache debian-ccache: name: ${ARCH}-debian-${DEBIAN}-ccache fedora-ccache: @@ -765,6 +768,37 @@ services: /arrow/ci/scripts/conan_setup.sh && /arrow/ci/scripts/conan_build.sh /arrow /build" + cpp-jni: + # Test for the build configuration for JNI. + # + # Usage: + # docker compose run --rm cpp-jni + # Parameters: + # ARCH: amd64, arm64v8 + # ARCH_ALIAS: x86_64, aarch64 + # ARCH_SHORT: amd64, arm64 + image: ${REPO}:${ARCH}-cpp-jni-${VCPKG} + build: + args: + arch: ${ARCH} + arch_short: ${ARCH_SHORT} + # See available versions at: + # https://quay.io/repository/pypa/manylinux_2_28_x86_64?tab=tags + # https://quay.io/repository/pypa/manylinux_2_28_aarch64?tab=tags + base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2025.10.09-1 + vcpkg: ${VCPKG} + context: . + dockerfile: ci/docker/cpp-jni.dockerfile + cache_from: + - ${REPO}:${ARCH}-cpp-jni-${VCPKG} + secrets: *vcpkg-build-secrets + environment: + <<: [*common, *ccache] + volumes: + - .:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}cpp-jni-ccache:/ccache:delegated + command: *cpp-command + ############################### C GLib ###################################### debian-c-glib: @@ -1144,7 +1178,7 @@ services: args: arch: ${ARCH} arch_short: ${ARCH_SHORT} - base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-08-03-32dfa47 + base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2025-06-04-496f7e1 manylinux: 2_28 python: ${PYTHON} python_abi_tag: ${PYTHON_ABI_TAG} @@ -1169,7 +1203,7 @@ services: args: arch: ${ARCH} arch_short: ${ARCH_SHORT} - base: quay.io/pypa/musllinux_1_2_${ARCH_ALIAS}:2025-01-18-a325f1d + base: quay.io/pypa/musllinux_1_2_${ARCH_ALIAS}:2025-06-04-496f7e1 musllinux: 1_2 python: ${PYTHON} python_abi_tag: ${PYTHON_ABI_TAG} @@ -1205,14 +1239,16 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-musllinux-test-imports: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-imports + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-imports build: args: base: "${ARCH}/alpine:${ALPINE_LINUX}" + python_version: ${PYTHON} + python_patch_version: ${PYTHON_PATCH_VERSION} context: . dockerfile: ci/docker/python-free-threaded-wheel-musllinux-test-imports.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-imports + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-imports shm_size: 2G volumes: - .:/arrow:delegated @@ -1220,6 +1256,7 @@ services: <<: *common CHECK_IMPORTS: "ON" CHECK_UNITTESTS: "OFF" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-musllinux-test-unittests: @@ -1245,14 +1282,16 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-musllinux-test-unittests: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-unittests + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-unittests build: args: base: "${ARCH}/alpine:${ALPINE_LINUX}" + python_version: ${PYTHON} + python_patch_version: ${PYTHON_PATCH_VERSION} context: . dockerfile: ci/docker/python-free-threaded-wheel-musllinux-test-unittests.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-musllinux-test-unittests + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-musllinux-test-unittests shm_size: 2G volumes: - .:/arrow:delegated @@ -1260,6 +1299,7 @@ services: <<: *common CHECK_IMPORTS: "OFF" CHECK_UNITTESTS: "ON" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-manylinux-test-imports: @@ -1277,14 +1317,15 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-manylinux-test-imports: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-imports + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-imports build: args: base: "${ARCH}/ubuntu:${UBUNTU}" + python_version: ${PYTHON} context: . dockerfile: ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-imports + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-imports shm_size: 2G volumes: - .:/arrow:delegated @@ -1292,6 +1333,7 @@ services: <<: *common CHECK_IMPORTS: "ON" CHECK_UNITTESTS: "OFF" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-manylinux-test-unittests: @@ -1318,14 +1360,15 @@ services: # TODO: Remove this when the official Docker Python image supports the free-threaded build. # See https://github.com/docker-library/python/issues/947 for more info. python-free-threaded-wheel-manylinux-test-unittests: - image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-unittests + image: ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-unittests build: args: base: "${ARCH}/ubuntu:${UBUNTU}" + python_version: ${PYTHON} context: . dockerfile: ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile cache_from: - - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-unittests + - ${REPO}:${ARCH}-python-${PYTHON_IMAGE_TAG}-free-threaded-wheel-manylinux-test-unittests shm_size: 2G volumes: - .:/arrow:delegated @@ -1333,6 +1376,7 @@ services: <<: *common CHECK_IMPORTS: "OFF" CHECK_UNITTESTS: "ON" + PYTHON: ${PYTHON} command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow python-wheel-windows-vs2022-base: @@ -1407,6 +1451,7 @@ services: build: args: base: ${REPO}:python-wheel-windows-test-vs2022-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION} + python: ${PYTHON} context: . dockerfile: ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile volumes: *python-wheel-windows-vs2022-volumes diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index 26d1834bb0f..6feaa86e1a7 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -1,15 +1,20 @@ [ { - "name": "22.0 (dev)", + "name": "23.0 (dev)", "version": "dev/", "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "21.0 (stable)", + "name": "22.0 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "21.0", + "version": "21.0/", + "url": "https://arrow.apache.org/docs/21.0/" + }, { "name": "20.0", "version": "20.0/", diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index ca5a323b15f..9479015b1e4 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -100,7 +100,7 @@ endfunction() set(CMAKE_CXX_STANDARD 17) -set(MLARROW_VERSION "22.0.0-SNAPSHOT") +set(MLARROW_VERSION "22.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fc26ea71bde..0587720f409 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -28,7 +28,7 @@ project(pyarrow) # which in turn meant that Py_GIL_DISABLED was not set. set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON) -set(PYARROW_VERSION "22.0.0-SNAPSHOT") +set(PYARROW_VERSION "22.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") # Generate SO version and full SO version diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 009ab1e849b..ec361159c5f 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -292,10 +292,12 @@ def test_to_pandas_zero_copy(): for i in range(10): series = arr.to_pandas() - assert sys.getrefcount(series) == 2 + # In Python 3.14 interpreter might avoid some + # reference count modifications + assert sys.getrefcount(series) in (1, 2) series = None # noqa - assert sys.getrefcount(arr) == 2 + assert sys.getrefcount(arr) in (1, 2) for i in range(10): arr = pa.array(range(10)) diff --git a/python/pyproject.toml b/python/pyproject.toml index fac3b25c554..a1cab40173a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -39,6 +39,8 @@ classifiers = [ 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', + 'Programming Language :: Python :: 3.14', + 'Programming Language :: Python :: Free Threading :: 2 - Beta', ] maintainers = [ {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"} @@ -90,4 +92,4 @@ root = '..' version_file = 'pyarrow/_generated_version.py' version_scheme = 'guess-next-dev' git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' -fallback_version = '22.0.0a0' +fallback_version = '22.0.0' diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt index 06c059f591f..b2ff4553b45 100644 --- a/python/requirements-wheel-test.txt +++ b/python/requirements-wheel-test.txt @@ -15,6 +15,7 @@ tzdata; sys_platform == 'win32' numpy~=1.21.3; python_version < "3.11" numpy~=1.23.2; python_version == "3.11" numpy~=1.26.0; python_version == "3.12" -numpy~=2.1.0; python_version >= "3.13" +numpy~=2.1.0; python_version == "3.13" +numpy~=2.3.3; python_version >= "3.14" pandas diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 7888f72ee9f..d12b8a4ba03 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 21.0.0.9000 +Version: 22.0.0 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 3be4bd951b1..fb85b96655d 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,15 @@ under the License. --> -# arrow 21.0.0.9000 +# arrow 22.0.0 + +## New features + +- `stringr::str_replace_na()` binding implemented (#47521). + +## Minor improvements and fixes + +- Subsecond time variables no longer truncated in `hms::hms()` bindings (#47278) # arrow 21.0.0.1 diff --git a/r/pkgdown/assets/versions.html b/r/pkgdown/assets/versions.html index caac5839059..c90d4ae2164 100644 --- a/r/pkgdown/assets/versions.html +++ b/r/pkgdown/assets/versions.html @@ -1,7 +1,8 @@ -

21.0.0.9000 (dev)

-

21.0.0 (release)

+

22.0.0.9000 (dev)

+

22.0.0 (release)

+

21.0.0

20.0.0

19.0.1

18.1.0

diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 927b7407c9b..0d783995062 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,12 +1,16 @@ [ { - "name": "21.0.0.9000 (dev)", + "name": "22.0.0.9000 (dev)", "version": "dev/" }, { - "name": "21.0.0 (release)", + "name": "22.0.0 (release)", "version": "" }, + { + "name": "21.0.0", + "version": "21.0/" + }, { "name": "20.0.0", "version": "20.0/" diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index de9372bd49e..0004568f245 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index b31eb98b5f5..d27cfb7ffa9 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb index 751e7c16245..e319cdd4a2f 100644 --- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb +++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlightSQL - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index 89e783944d2..95a837d27f3 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index f19fcfdd09e..ee7606bd0bd 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index e75303aa970..a1f57e7d47a 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index 0f5273bbed2..696db57c683 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "22.0.0-SNAPSHOT" + VERSION = "22.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/testing b/testing index 6a7b02fac93..9a02925d1ba 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit 6a7b02fac93d8addbcdbb213264e58bfdc3068e4 +Subproject commit 9a02925d1ba80bd493b6d4da6e8a777588d57ac4