Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 87 additions & 15 deletions cpp/src/arrow/buffer_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "arrow/buffer.h"
#include "arrow/status.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/bitmap_generate.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/macros.h"
Expand Down Expand Up @@ -91,11 +92,19 @@ class ARROW_EXPORT BufferBuilder {
/// \param[in] additional_bytes number of additional bytes to make space for
/// \return Status
Status Reserve(const int64_t additional_bytes) {
auto min_capacity = size_ + additional_bytes;
if (ARROW_PREDICT_FALSE(additional_bytes < 0)) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a lot of similar checks that we're adding in many methods. Can we find a way of factoring these out, and hopefully avoid forgetting any call sites where such checks must be done?

return Status::Invalid("Reserve: negative additional_bytes");
}
int64_t min_capacity;
if (ARROW_PREDICT_FALSE(
internal::AddWithOverflow(size_, additional_bytes, &min_capacity))) {
return Status::CapacityError("Reserve: capacity overflow");
}
if (min_capacity <= capacity_) {
return Status::OK();
}
return Resize(GrowByFactor(capacity_, min_capacity), false);
int64_t requested_capacity = GrowByFactor(capacity_, min_capacity);
return Resize(requested_capacity, false);
}

/// \brief Return a capacity expanded by the desired growth factor
Expand All @@ -104,15 +113,27 @@ class ARROW_EXPORT BufferBuilder {
// (versus 1.5x) seems to have slightly better performance when using
// jemalloc, but significantly better performance when using the system
// allocator. See ARROW-6450 for further discussion
return std::max(new_capacity, current_capacity * 2);
int64_t doubled;
if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(current_capacity, current_capacity,
&doubled))) {
return new_capacity;
}
return std::max(new_capacity, doubled);
}

/// \brief Append the given data to the buffer
///
/// The buffer is automatically expanded if necessary.
Status Append(const void* data, const int64_t length) {
if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
if (ARROW_PREDICT_FALSE(length < 0)) {
return Status::Invalid("Append: negative length");
}
int64_t new_size;
if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(size_, length, &new_size))) {
return Status::CapacityError("Append: size overflow");
}
if (ARROW_PREDICT_FALSE(new_size > capacity_)) {
ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, new_size), false));
}
UnsafeAppend(data, length);
return Status::OK();
Expand Down Expand Up @@ -186,6 +207,9 @@ class ARROW_EXPORT BufferBuilder {
/// mostly for memory allocation).
Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
bool shrink_to_fit = true) {
if (ARROW_PREDICT_FALSE(final_length < 0)) {
return Status::Invalid("FinishWithLength: negative final length");
}
size_ = final_length;
return Finish(shrink_to_fit);
}
Expand Down Expand Up @@ -250,12 +274,22 @@ class TypedBufferBuilder<
}

Status Append(const T* values, int64_t num_elements) {
return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
num_elements * sizeof(T));
if (ARROW_PREDICT_FALSE(num_elements < 0)) {
return Status::Invalid("Append: negative number of elements");
}
int64_t num_bytes;
if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(
num_elements, static_cast<int64_t>(sizeof(T)), &num_bytes))) {
return Status::CapacityError("Append: size overflow");
}
return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values), num_bytes);
}

Status Append(const int64_t num_copies, T value) {
ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
if (ARROW_PREDICT_FALSE(num_copies < 0)) {
return Status::Invalid("Append: negative number of copies");
}
ARROW_RETURN_NOT_OK(Reserve(num_copies));
UnsafeAppend(num_copies, value);
return Status::OK();
}
Expand Down Expand Up @@ -284,15 +318,39 @@ class TypedBufferBuilder<
}

Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
return Status::Invalid("Resize: negative capacity");
}
int64_t num_bytes;
if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(
new_capacity, static_cast<int64_t>(sizeof(T)), &num_bytes))) {
return Status::CapacityError("Resize: capacity overflow");
}
return bytes_builder_.Resize(num_bytes, shrink_to_fit);
}

Status Reserve(const int64_t additional_elements) {
return bytes_builder_.Reserve(additional_elements * sizeof(T));
if (ARROW_PREDICT_FALSE(additional_elements < 0)) {
return Status::Invalid("Reserve: negative additional_elements");
}
int64_t num_bytes;
if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(
additional_elements, static_cast<int64_t>(sizeof(T)), &num_bytes))) {
return Status::CapacityError("Reserve: size overflow");
}
return bytes_builder_.Reserve(num_bytes);
}

Status Advance(const int64_t length) {
return bytes_builder_.Advance(length * sizeof(T));
if (ARROW_PREDICT_FALSE(length < 0)) {
return Status::Invalid("Advance: negative length");
}
int64_t num_bytes;
if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(
length, static_cast<int64_t>(sizeof(T)), &num_bytes))) {
return Status::CapacityError("Advance: size overflow");
}
return bytes_builder_.Advance(num_bytes);
}

void UnsafeAdvance(const int64_t length) {
Expand All @@ -316,7 +374,15 @@ class TypedBufferBuilder<
/// only for memory allocation).
Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
bool shrink_to_fit = true) {
return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
if (ARROW_PREDICT_FALSE(final_length < 0)) {
return Status::Invalid("FinishWithLength: negative final length");
}
int64_t num_bytes;
if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(
final_length, static_cast<int64_t>(sizeof(T)), &num_bytes))) {
return Status::CapacityError("FinishWithLength: final length overflow");
}
return bytes_builder_.FinishWithLength(num_bytes, shrink_to_fit);
}

void Reset() { bytes_builder_.Reset(); }
Expand Down Expand Up @@ -429,9 +495,15 @@ class TypedBufferBuilder<bool> {
}

Status Reserve(const int64_t additional_elements) {
return Resize(
BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
false);
if (ARROW_PREDICT_FALSE(additional_elements < 0)) {
return Status::Invalid("Reserve: negative additional_elements");
}
int64_t min_length;
if (ARROW_PREDICT_FALSE(
internal::AddWithOverflow(bit_length_, additional_elements, &min_length))) {
return Status::CapacityError("Reserve: capacity overflow");
}
return Resize(min_length, false);
}

Status Advance(const int64_t length) {
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/arrow/buffer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,19 @@ TEST(TestBufferBuilder, ResizeReserve) {
ASSERT_EQ(9, builder.length());
}

TEST(TestBufferBuilder, InvalidReserveAndAppendLengths) {
const std::string data = "x";
auto data_ptr = data.c_str();
BufferBuilder builder;

ASSERT_RAISES(Invalid, builder.Append(data_ptr, -1));
ASSERT_RAISES(Invalid, builder.Reserve(-1));
ASSERT_RAISES(Invalid, builder.Advance(-1));

const int64_t overflow_add = std::numeric_limits<int64_t>::max() - 8;
ASSERT_RAISES(CapacityError, builder.Reserve(overflow_add));
}

TEST(TestBufferBuilder, Alignment) {
const std::string data = "some data";
auto data_ptr = data.c_str();
Expand Down Expand Up @@ -862,6 +875,16 @@ TYPED_TEST(TypedTestBufferBuilder, AppendCopies) {
}
}

TYPED_TEST(TypedTestBufferBuilder, NegativeAndOverflowAppend) {
TypedBufferBuilder<TypeParam> builder;

ASSERT_RAISES(Invalid, builder.Append(-1, static_cast<TypeParam>(0)));

const int64_t max_num_elements = std::numeric_limits<int64_t>::max() / sizeof(TypeParam) + 1;
ASSERT_RAISES(CapacityError,
builder.Append(max_num_elements, static_cast<TypeParam>(0)));
}

TEST(TestBoolBufferBuilder, Basics) {
TypedBufferBuilder<bool> builder;

Expand Down
24 changes: 15 additions & 9 deletions cpp/src/arrow/memory_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ namespace {

constexpr char kDefaultBackendEnvVar[] = "ARROW_DEFAULT_MEMORY_POOL";
constexpr char kDebugMemoryEnvVar[] = "ARROW_DEBUG_MEMORY_POOL";
constexpr int64_t kMaxBufferCapacity = std::numeric_limits<int64_t>::max() - 63;

Status ValidateBufferCapacity(int64_t capacity, const char* operation) {
if (ARROW_PREDICT_FALSE(capacity < 0)) {
return Status::Invalid(operation, ": negative capacity");
}
if (ARROW_PREDICT_FALSE(capacity > kMaxBufferCapacity)) {
return Status::CapacityError(operation, ": capacity overflow");
}
return Status::OK();
}

enum class MemoryPoolBackend : uint8_t { System, Jemalloc, Mimalloc };

Expand Down Expand Up @@ -920,9 +931,7 @@ class PoolBuffer final : public ResizableBuffer {
}

Status Reserve(const int64_t capacity) override {
if (capacity < 0) {
return Status::Invalid("Negative buffer capacity: ", capacity);
}
RETURN_NOT_OK(ValidateBufferCapacity(capacity, "Reserve"));
uint8_t* ptr = mutable_data();
if (!ptr || capacity > capacity_) {
ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(capacity));
Expand All @@ -938,9 +947,7 @@ class PoolBuffer final : public ResizableBuffer {
}

Status Resize(const int64_t new_size, bool shrink_to_fit = true) override {
if (ARROW_PREDICT_FALSE(new_size < 0)) {
return Status::Invalid("Negative buffer resize: ", new_size);
}
RETURN_NOT_OK(ValidateBufferCapacity(new_size, "Resize"));
uint8_t* ptr = mutable_data();
if (ptr && shrink_to_fit && new_size <= size_) {
// Buffer is non-null and is not growing, so shrink to the requested size without
Expand Down Expand Up @@ -984,9 +991,7 @@ class PoolBuffer final : public ResizableBuffer {

private:
static Result<int64_t> RoundCapacity(int64_t capacity) {
if (capacity > std::numeric_limits<int64_t>::max() - 63) {
return Status::OutOfMemory("capacity too large");
}
DCHECK_LE(capacity, kMaxBufferCapacity);
return bit_util::RoundUpToMultipleOf64(capacity);
}

Expand Down Expand Up @@ -1026,6 +1031,7 @@ Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t s
Result<std::unique_ptr<ResizableBuffer>> AllocateResizableBuffer(const int64_t size,
const int64_t alignment,
MemoryPool* pool) {
RETURN_NOT_OK(ValidateBufferCapacity(size, "AllocateResizableBuffer"));
return ResizePoolBuffer<std::unique_ptr<ResizableBuffer>>(
PoolBuffer::MakeUnique(pool, alignment), size);
}
Expand Down
Loading