diff --git a/cpp/src/arrow/buffer_builder.h b/cpp/src/arrow/buffer_builder.h index e9177c656c0..10d236bf23e 100644 --- a/cpp/src/arrow/buffer_builder.h +++ b/cpp/src/arrow/buffer_builder.h @@ -27,6 +27,7 @@ #include "arrow/buffer.h" #include "arrow/status.h" #include "arrow/util/bit_util.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/bitmap_generate.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/macros.h" @@ -91,11 +92,19 @@ class ARROW_EXPORT BufferBuilder { /// \param[in] additional_bytes number of additional bytes to make space for /// \return Status Status Reserve(const int64_t additional_bytes) { - auto min_capacity = size_ + additional_bytes; + if (ARROW_PREDICT_FALSE(additional_bytes < 0)) { + return Status::Invalid("Reserve: negative additional_bytes"); + } + int64_t min_capacity; + if (ARROW_PREDICT_FALSE( + internal::AddWithOverflow(size_, additional_bytes, &min_capacity))) { + return Status::CapacityError("Reserve: capacity overflow"); + } if (min_capacity <= capacity_) { return Status::OK(); } - return Resize(GrowByFactor(capacity_, min_capacity), false); + int64_t requested_capacity = GrowByFactor(capacity_, min_capacity); + return Resize(requested_capacity, false); } /// \brief Return a capacity expanded by the desired growth factor @@ -104,15 +113,27 @@ class ARROW_EXPORT BufferBuilder { // (versus 1.5x) seems to have slightly better performance when using // jemalloc, but significantly better performance when using the system // allocator. See ARROW-6450 for further discussion - return std::max(new_capacity, current_capacity * 2); + int64_t doubled; + if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(current_capacity, current_capacity, + &doubled))) { + return new_capacity; + } + return std::max(new_capacity, doubled); } /// \brief Append the given data to the buffer /// /// The buffer is automatically expanded if necessary. Status Append(const void* data, const int64_t length) { - if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) { - ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false)); + if (ARROW_PREDICT_FALSE(length < 0)) { + return Status::Invalid("Append: negative length"); + } + int64_t new_size; + if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(size_, length, &new_size))) { + return Status::CapacityError("Append: size overflow"); + } + if (ARROW_PREDICT_FALSE(new_size > capacity_)) { + ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, new_size), false)); } UnsafeAppend(data, length); return Status::OK(); @@ -186,6 +207,9 @@ class ARROW_EXPORT BufferBuilder { /// mostly for memory allocation). Result> FinishWithLength(int64_t final_length, bool shrink_to_fit = true) { + if (ARROW_PREDICT_FALSE(final_length < 0)) { + return Status::Invalid("FinishWithLength: negative final length"); + } size_ = final_length; return Finish(shrink_to_fit); } @@ -250,12 +274,22 @@ class TypedBufferBuilder< } Status Append(const T* values, int64_t num_elements) { - return bytes_builder_.Append(reinterpret_cast(values), - num_elements * sizeof(T)); + if (ARROW_PREDICT_FALSE(num_elements < 0)) { + return Status::Invalid("Append: negative number of elements"); + } + int64_t num_bytes; + if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( + num_elements, static_cast(sizeof(T)), &num_bytes))) { + return Status::CapacityError("Append: size overflow"); + } + return bytes_builder_.Append(reinterpret_cast(values), num_bytes); } Status Append(const int64_t num_copies, T value) { - ARROW_RETURN_NOT_OK(Reserve(num_copies + length())); + if (ARROW_PREDICT_FALSE(num_copies < 0)) { + return Status::Invalid("Append: negative number of copies"); + } + ARROW_RETURN_NOT_OK(Reserve(num_copies)); UnsafeAppend(num_copies, value); return Status::OK(); } @@ -284,15 +318,39 @@ class TypedBufferBuilder< } Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) { - return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit); + if (ARROW_PREDICT_FALSE(new_capacity < 0)) { + return Status::Invalid("Resize: negative capacity"); + } + int64_t num_bytes; + if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( + new_capacity, static_cast(sizeof(T)), &num_bytes))) { + return Status::CapacityError("Resize: capacity overflow"); + } + return bytes_builder_.Resize(num_bytes, shrink_to_fit); } Status Reserve(const int64_t additional_elements) { - return bytes_builder_.Reserve(additional_elements * sizeof(T)); + if (ARROW_PREDICT_FALSE(additional_elements < 0)) { + return Status::Invalid("Reserve: negative additional_elements"); + } + int64_t num_bytes; + if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( + additional_elements, static_cast(sizeof(T)), &num_bytes))) { + return Status::CapacityError("Reserve: size overflow"); + } + return bytes_builder_.Reserve(num_bytes); } Status Advance(const int64_t length) { - return bytes_builder_.Advance(length * sizeof(T)); + if (ARROW_PREDICT_FALSE(length < 0)) { + return Status::Invalid("Advance: negative length"); + } + int64_t num_bytes; + if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( + length, static_cast(sizeof(T)), &num_bytes))) { + return Status::CapacityError("Advance: size overflow"); + } + return bytes_builder_.Advance(num_bytes); } void UnsafeAdvance(const int64_t length) { @@ -316,7 +374,15 @@ class TypedBufferBuilder< /// only for memory allocation). Result> FinishWithLength(int64_t final_length, bool shrink_to_fit = true) { - return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit); + if (ARROW_PREDICT_FALSE(final_length < 0)) { + return Status::Invalid("FinishWithLength: negative final length"); + } + int64_t num_bytes; + if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( + final_length, static_cast(sizeof(T)), &num_bytes))) { + return Status::CapacityError("FinishWithLength: final length overflow"); + } + return bytes_builder_.FinishWithLength(num_bytes, shrink_to_fit); } void Reset() { bytes_builder_.Reset(); } @@ -429,9 +495,15 @@ class TypedBufferBuilder { } Status Reserve(const int64_t additional_elements) { - return Resize( - BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements), - false); + if (ARROW_PREDICT_FALSE(additional_elements < 0)) { + return Status::Invalid("Reserve: negative additional_elements"); + } + int64_t min_length; + if (ARROW_PREDICT_FALSE( + internal::AddWithOverflow(bit_length_, additional_elements, &min_length))) { + return Status::CapacityError("Reserve: capacity overflow"); + } + return Resize(min_length, false); } Status Advance(const int64_t length) { diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc index 4dd210076ed..56ecc57d94b 100644 --- a/cpp/src/arrow/buffer_test.cc +++ b/cpp/src/arrow/buffer_test.cc @@ -726,6 +726,19 @@ TEST(TestBufferBuilder, ResizeReserve) { ASSERT_EQ(9, builder.length()); } +TEST(TestBufferBuilder, InvalidReserveAndAppendLengths) { + const std::string data = "x"; + auto data_ptr = data.c_str(); + BufferBuilder builder; + + ASSERT_RAISES(Invalid, builder.Append(data_ptr, -1)); + ASSERT_RAISES(Invalid, builder.Reserve(-1)); + ASSERT_RAISES(Invalid, builder.Advance(-1)); + + const int64_t overflow_add = std::numeric_limits::max() - 8; + ASSERT_RAISES(CapacityError, builder.Reserve(overflow_add)); +} + TEST(TestBufferBuilder, Alignment) { const std::string data = "some data"; auto data_ptr = data.c_str(); @@ -862,6 +875,16 @@ TYPED_TEST(TypedTestBufferBuilder, AppendCopies) { } } +TYPED_TEST(TypedTestBufferBuilder, NegativeAndOverflowAppend) { + TypedBufferBuilder builder; + + ASSERT_RAISES(Invalid, builder.Append(-1, static_cast(0))); + + const int64_t max_num_elements = std::numeric_limits::max() / sizeof(TypeParam) + 1; + ASSERT_RAISES(CapacityError, + builder.Append(max_num_elements, static_cast(0))); +} + TEST(TestBoolBufferBuilder, Basics) { TypedBufferBuilder builder; diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 1c77a60ba0e..ccee349ee13 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -69,6 +69,17 @@ namespace { constexpr char kDefaultBackendEnvVar[] = "ARROW_DEFAULT_MEMORY_POOL"; constexpr char kDebugMemoryEnvVar[] = "ARROW_DEBUG_MEMORY_POOL"; +constexpr int64_t kMaxBufferCapacity = std::numeric_limits::max() - 63; + +Status ValidateBufferCapacity(int64_t capacity, const char* operation) { + if (ARROW_PREDICT_FALSE(capacity < 0)) { + return Status::Invalid(operation, ": negative capacity"); + } + if (ARROW_PREDICT_FALSE(capacity > kMaxBufferCapacity)) { + return Status::CapacityError(operation, ": capacity overflow"); + } + return Status::OK(); +} enum class MemoryPoolBackend : uint8_t { System, Jemalloc, Mimalloc }; @@ -920,9 +931,7 @@ class PoolBuffer final : public ResizableBuffer { } Status Reserve(const int64_t capacity) override { - if (capacity < 0) { - return Status::Invalid("Negative buffer capacity: ", capacity); - } + RETURN_NOT_OK(ValidateBufferCapacity(capacity, "Reserve")); uint8_t* ptr = mutable_data(); if (!ptr || capacity > capacity_) { ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(capacity)); @@ -938,9 +947,7 @@ class PoolBuffer final : public ResizableBuffer { } Status Resize(const int64_t new_size, bool shrink_to_fit = true) override { - if (ARROW_PREDICT_FALSE(new_size < 0)) { - return Status::Invalid("Negative buffer resize: ", new_size); - } + RETURN_NOT_OK(ValidateBufferCapacity(new_size, "Resize")); uint8_t* ptr = mutable_data(); if (ptr && shrink_to_fit && new_size <= size_) { // Buffer is non-null and is not growing, so shrink to the requested size without @@ -984,9 +991,7 @@ class PoolBuffer final : public ResizableBuffer { private: static Result RoundCapacity(int64_t capacity) { - if (capacity > std::numeric_limits::max() - 63) { - return Status::OutOfMemory("capacity too large"); - } + DCHECK_LE(capacity, kMaxBufferCapacity); return bit_util::RoundUpToMultipleOf64(capacity); } @@ -1026,6 +1031,7 @@ Result> AllocateResizableBuffer(const int64_t s Result> AllocateResizableBuffer(const int64_t size, const int64_t alignment, MemoryPool* pool) { + RETURN_NOT_OK(ValidateBufferCapacity(size, "AllocateResizableBuffer")); return ResizePoolBuffer>( PoolBuffer::MakeUnique(pool, alignment), size); }