diff --git a/.env b/.env index 456c45b06589..d6d5f9658aae 100644 --- a/.env +++ b/.env @@ -92,7 +92,7 @@ TZ=UTC # Used through compose.yaml and serves as the default version for the # ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the # docker tags more readable. -VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" # 2026.01.16 Release +VCPKG="77826283b4676c526da3a5c59e6717e3775ba722" # This must be updated when we update # ci/docker/python-*-windows-*.dockerfile or the vcpkg config. diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 470db4f8b9da..e8a7d5523972 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -47,6 +47,6 @@ rapidjson re2 snappy thrift-cpp>=0.11.0 -xsimd>=14.0 +xsimd>=14.2 zlib zstd diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index 75f2b25cc0bf..88a7fd37a042 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -1,6 +1,7 @@ { "name": "arrow", "version-string": "", + "builtin-baseline": "77826283b4676c526da3a5c59e6717e3775ba722", "description": "Cross-language development platform for in-memory analytics", "homepage": "https://arrow.apache.org", "supports": "x64 | (arm64 & !windows)", @@ -19,7 +20,10 @@ "re2", "snappy", "utf8proc", - "xsimd", + { + "name": "xsimd", + "version>=": "14.2" + }, "zlib", "zstd", { diff --git a/cpp/src/arrow/util/bpacking_simd_kernel_internal.h b/cpp/src/arrow/util/bpacking_simd_kernel_internal.h index fe879bb5b0f2..95ece2675210 100644 --- a/cpp/src/arrow/util/bpacking_simd_kernel_internal.h +++ b/cpp/src/arrow/util/bpacking_simd_kernel_internal.h @@ -151,73 +151,6 @@ constexpr bool IsSse2 = std::is_base_of_v; template constexpr bool IsAvx2 = std::is_base_of_v; -/// Whether we are compiling for the Neon or above in the arm64 family. -template -constexpr bool IsNeon = std::is_base_of_v; - -/// Wrapper around ``xsimd::bitwise_lshift`` with optimizations for non implemented sizes. -/// -/// We replace the variable left shift by a variable multiply with a power of two. -/// -/// This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of -/// integers per second through vectorization, Software Practice & Experience 45 (1), -/// 2015. http://arxiv.org/abs/1209.2137 -/// -/// TODO(xsimd) Tracking in https://github.com/xtensor-stack/xsimd/pull/1220 -/// When migrating, be sure to use batch_constant overload, and not the batch one. -template -ARROW_FORCE_INLINE auto left_shift(const xsimd::batch& batch, - xsimd::batch_constant shifts) - -> xsimd::batch { - constexpr bool kIsSse2 = IsSse2; - constexpr bool kIsAvx2 = IsAvx2; - static_assert( - !(kIsSse2 && kIsAvx2), - "In xsimd, an x86 arch is either part of the SSE family or of the AVX family," - "not both. If this check fails, it means the assumptions made here to detect SSE " - "and AVX are out of date."); - - constexpr auto kMults = xsimd::make_batch_constant() << shifts; - - constexpr auto IntSize = sizeof(Int); - - // Sizes and architecture for which there is no variable left shift and there is a - // multiplication - if constexpr ( // - (kIsSse2 && (IntSize == sizeof(uint16_t) || IntSize == sizeof(uint32_t))) // - || (kIsAvx2 && (IntSize == sizeof(uint16_t))) // - ) { - return batch * kMults; - } - - // Architecture for which there is no variable left shift on uint8_t but a fallback - // exists for uint16_t. - if constexpr ((kIsSse2 || kIsAvx2) && (IntSize == sizeof(uint8_t))) { - const auto batch16 = xsimd::bitwise_cast(batch); - - constexpr auto kShifts0 = select_stride(shifts); - const auto shifted0 = left_shift(batch16, kShifts0) & 0x00FF; - - constexpr auto kShifts1 = select_stride(shifts); - const auto shifted1 = left_shift(batch16 & 0xFF00, kShifts1); - - return xsimd::bitwise_cast(shifted0 | shifted1); - } - - // TODO(xsimd) bug fixed in xsimd 14.1.0 - // https://github.com/xtensor-stack/xsimd/pull/1266 -#if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0) - if constexpr (IsNeon) { - using SInt = std::make_signed_t; - constexpr auto signed_shifts = - xsimd::batch_constant(kShifts)...>(); - return xsimd::kernel::bitwise_lshift(batch, signed_shifts.as_batch(), Arch{}); - } -#endif - - return batch << shifts; -} - /// Fallback for variable shift right. /// /// When we know that the relevant bits will not overflow, we can instead shift left all @@ -243,9 +176,8 @@ ARROW_FORCE_INLINE auto right_shift_by_excess( constexpr auto IntSize = sizeof(Int); - // Architecture for which there is no variable right shift but a larger fallback exists. - // TODO(xsimd) Tracking for Avx2 in https://github.com/xtensor-stack/xsimd/pull/1220 - // When migrating, be sure to use batch_constant overload, and not the batch one. + // Architectures for which there is no variable right shift but a larger fallback + // exists. if constexpr (kIsAvx2 && (IntSize == sizeof(uint8_t) || IntSize == sizeof(uint16_t))) { using twice_uint = SizedUint<2 * IntSize>; @@ -262,27 +194,17 @@ ARROW_FORCE_INLINE auto right_shift_by_excess( return xsimd::bitwise_cast(shifted0 | shifted1); } - // These conditions are the ones matched in `left_shift`, i.e. the ones where variable - // shift right will not be available but a left shift (fallback) exists. + // Architectures for which there is no variable right shift but a left shift exists + // (eventually using the multiply trick). + // We use a variable left shift and fix right shift. if constexpr (kIsSse2 && (IntSize != sizeof(uint64_t))) { constexpr Int kMaxRShift = max_value(std::array{kShifts...}); constexpr auto kLShifts = xsimd::make_batch_constant() - shifts; - return xsimd::bitwise_rshift(left_shift(batch, kLShifts)); - } - - // TODO(xsimd) bug fixed in xsimd 14.1.0 - // https://github.com/xtensor-stack/xsimd/pull/1266 -#if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0) - if constexpr (IsNeon) { - using SInt = std::make_signed_t; - constexpr auto signed_shifts = - xsimd::batch_constant(kShifts)...>(); - return xsimd::kernel::bitwise_rshift(batch, signed_shifts.as_batch(), Arch{}); + return xsimd::bitwise_rshift(batch << kLShifts); } -#endif return batch >> shifts; } @@ -1040,7 +962,7 @@ struct LargeKernel { const auto high_swizzled = xsimd::swizzle(bytes, kHighSwizzles); const auto high_words = xsimd::bitwise_cast(high_swizzled); - const auto high_shifted = left_shift(high_words, kHighLShifts); + const auto high_shifted = high_words << kHighLShifts; // We can have a single mask and apply it after OR because the shifts will ensure that // there are zeros where the high/low values are incomplete. diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 3af4a10a50a3..7e7212898bd9 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=6f4f1b639daa6dca9f80bc5db1233e9cbaa31a67790 # WIL (Windows Implementation Libraries) is required by Azure SDK on Windows for WinHTTP transport ARROW_WIL_BUILD_VERSION=v1.0.250325.1 ARROW_WIL_BUILD_SHA256_CHECKSUM=c9e667d5f86ded43d17b5669d243e95ca7b437e3a167c170805ffd4aa8a9a786 -ARROW_XSIMD_BUILD_VERSION=14.0.0 -ARROW_XSIMD_BUILD_SHA256_CHECKSUM=17de0236954955c10c09d6938d4c5f3a3b92d31be5dadd1d5d09fc1b15490dce +ARROW_XSIMD_BUILD_VERSION=14.2.0 +ARROW_XSIMD_BUILD_SHA256_CHECKSUM=21e841ab684b05331e81e7f782431753a029ef7b7d9d6d3ddab837e7782a40ee ARROW_ZLIB_BUILD_VERSION=1.3.1 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23 ARROW_ZSTD_BUILD_VERSION=1.5.7