Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ TZ=UTC
# Used through compose.yaml and serves as the default version for the
# ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the
# docker tags more readable.
VCPKG="66c0373dc7fca549e5803087b9487edfe3aca0a1" # 2026.01.16 Release
VCPKG="77826283b4676c526da3a5c59e6717e3775ba722"

# This must be updated when we update
# ci/docker/python-*-windows-*.dockerfile or the vcpkg config.
Expand Down
2 changes: 1 addition & 1 deletion ci/conda_env_cpp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ rapidjson
re2
snappy
thrift-cpp>=0.11.0
xsimd>=14.0
xsimd>=14.2
zlib
zstd
6 changes: 5 additions & 1 deletion ci/vcpkg/vcpkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"name": "arrow",
"version-string": "",
"builtin-baseline": "77826283b4676c526da3a5c59e6717e3775ba722",
"description": "Cross-language development platform for in-memory analytics",
"homepage": "https://arrow.apache.org",
"supports": "x64 | (arm64 & !windows)",
Expand All @@ -19,7 +20,10 @@
"re2",
"snappy",
"utf8proc",
"xsimd",
{
"name": "xsimd",
"version>=": "14.2"
},
"zlib",
"zstd",
{
Expand Down
92 changes: 7 additions & 85 deletions cpp/src/arrow/util/bpacking_simd_kernel_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,73 +151,6 @@ constexpr bool IsSse2 = std::is_base_of_v<xsimd::sse2, Arch>;
template <typename Arch>
constexpr bool IsAvx2 = std::is_base_of_v<xsimd::avx2, Arch>;

/// Whether we are compiling for the Neon or above in the arm64 family.
template <typename Arch>
constexpr bool IsNeon = std::is_base_of_v<xsimd::neon, Arch>;

/// Wrapper around ``xsimd::bitwise_lshift`` with optimizations for non implemented sizes.
///
/// We replace the variable left shift by a variable multiply with a power of two.
///
/// This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of
/// integers per second through vectorization, Software Practice & Experience 45 (1),
/// 2015. http://arxiv.org/abs/1209.2137
///
/// TODO(xsimd) Tracking in https://github.com/xtensor-stack/xsimd/pull/1220
/// When migrating, be sure to use batch_constant overload, and not the batch one.
template <typename Arch, typename Int, Int... kShifts>
ARROW_FORCE_INLINE auto left_shift(const xsimd::batch<Int, Arch>& batch,
xsimd::batch_constant<Int, Arch, kShifts...> shifts)
-> xsimd::batch<Int, Arch> {
constexpr bool kIsSse2 = IsSse2<Arch>;
constexpr bool kIsAvx2 = IsAvx2<Arch>;
static_assert(
!(kIsSse2 && kIsAvx2),
"In xsimd, an x86 arch is either part of the SSE family or of the AVX family,"
"not both. If this check fails, it means the assumptions made here to detect SSE "
"and AVX are out of date.");

constexpr auto kMults = xsimd::make_batch_constant<Int, 1, Arch>() << shifts;

constexpr auto IntSize = sizeof(Int);

// Sizes and architecture for which there is no variable left shift and there is a
// multiplication
if constexpr ( //
(kIsSse2 && (IntSize == sizeof(uint16_t) || IntSize == sizeof(uint32_t))) //
|| (kIsAvx2 && (IntSize == sizeof(uint16_t))) //
) {
return batch * kMults;
}

// Architecture for which there is no variable left shift on uint8_t but a fallback
// exists for uint16_t.
if constexpr ((kIsSse2 || kIsAvx2) && (IntSize == sizeof(uint8_t))) {
const auto batch16 = xsimd::bitwise_cast<uint16_t>(batch);

constexpr auto kShifts0 = select_stride<uint16_t, 0>(shifts);
const auto shifted0 = left_shift(batch16, kShifts0) & 0x00FF;

constexpr auto kShifts1 = select_stride<uint16_t, 1>(shifts);
const auto shifted1 = left_shift(batch16 & 0xFF00, kShifts1);

return xsimd::bitwise_cast<Int>(shifted0 | shifted1);
}

// TODO(xsimd) bug fixed in xsimd 14.1.0
// https://github.com/xtensor-stack/xsimd/pull/1266
#if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0)
if constexpr (IsNeon<Arch>) {
using SInt = std::make_signed_t<Int>;
constexpr auto signed_shifts =
xsimd::batch_constant<SInt, Arch, static_cast<SInt>(kShifts)...>();
return xsimd::kernel::bitwise_lshift(batch, signed_shifts.as_batch(), Arch{});
}
#endif

return batch << shifts;
}

/// Fallback for variable shift right.
///
/// When we know that the relevant bits will not overflow, we can instead shift left all
Expand All @@ -243,9 +176,8 @@ ARROW_FORCE_INLINE auto right_shift_by_excess(

constexpr auto IntSize = sizeof(Int);

// Architecture for which there is no variable right shift but a larger fallback exists.
// TODO(xsimd) Tracking for Avx2 in https://github.com/xtensor-stack/xsimd/pull/1220
// When migrating, be sure to use batch_constant overload, and not the batch one.
// Architectures for which there is no variable right shift but a larger fallback
// exists.
if constexpr (kIsAvx2 && (IntSize == sizeof(uint8_t) || IntSize == sizeof(uint16_t))) {
using twice_uint = SizedUint<2 * IntSize>;

Expand All @@ -262,27 +194,17 @@ ARROW_FORCE_INLINE auto right_shift_by_excess(
return xsimd::bitwise_cast<Int>(shifted0 | shifted1);
}

// These conditions are the ones matched in `left_shift`, i.e. the ones where variable
// shift right will not be available but a left shift (fallback) exists.
// Architectures for which there is no variable right shift but a left shift exists
// (eventually using the multiply trick).
// We use a variable left shift and fix right shift.
if constexpr (kIsSse2 && (IntSize != sizeof(uint64_t))) {
constexpr Int kMaxRShift = max_value(std::array{kShifts...});

constexpr auto kLShifts =
xsimd::make_batch_constant<Int, kMaxRShift, Arch>() - shifts;

return xsimd::bitwise_rshift<kMaxRShift>(left_shift(batch, kLShifts));
}

// TODO(xsimd) bug fixed in xsimd 14.1.0
// https://github.com/xtensor-stack/xsimd/pull/1266
#if XSIMD_VERSION_MAJOR < 14 || ((XSIMD_VERSION_MAJOR == 14) && XSIMD_VERSION_MINOR == 0)
if constexpr (IsNeon<Arch>) {
using SInt = std::make_signed_t<Int>;
constexpr auto signed_shifts =
xsimd::batch_constant<SInt, Arch, static_cast<SInt>(kShifts)...>();
return xsimd::kernel::bitwise_rshift(batch, signed_shifts.as_batch(), Arch{});
return xsimd::bitwise_rshift<kMaxRShift>(batch << kLShifts);
}
#endif

return batch >> shifts;
}
Expand Down Expand Up @@ -1040,7 +962,7 @@ struct LargeKernel {

const auto high_swizzled = xsimd::swizzle(bytes, kHighSwizzles);
const auto high_words = xsimd::bitwise_cast<unpacked_type>(high_swizzled);
const auto high_shifted = left_shift(high_words, kHighLShifts);
const auto high_shifted = high_words << kHighLShifts;

// We can have a single mask and apply it after OR because the shifts will ensure that
// there are zeros where the high/low values are incomplete.
Expand Down
4 changes: 2 additions & 2 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=6f4f1b639daa6dca9f80bc5db1233e9cbaa31a67790
# WIL (Windows Implementation Libraries) is required by Azure SDK on Windows for WinHTTP transport
ARROW_WIL_BUILD_VERSION=v1.0.250325.1
ARROW_WIL_BUILD_SHA256_CHECKSUM=c9e667d5f86ded43d17b5669d243e95ca7b437e3a167c170805ffd4aa8a9a786
ARROW_XSIMD_BUILD_VERSION=14.0.0
ARROW_XSIMD_BUILD_SHA256_CHECKSUM=17de0236954955c10c09d6938d4c5f3a3b92d31be5dadd1d5d09fc1b15490dce
ARROW_XSIMD_BUILD_VERSION=14.2.0
ARROW_XSIMD_BUILD_SHA256_CHECKSUM=21e841ab684b05331e81e7f782431753a029ef7b7d9d6d3ddab837e7782a40ee
ARROW_ZLIB_BUILD_VERSION=1.3.1
ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
ARROW_ZSTD_BUILD_VERSION=1.5.7
Expand Down
Loading