Skip to content

Commit b3c6635

Browse files
committed
MINOR: [C++][CSV] reject blocks whose value count overflows int32
1 parent e89f61d commit b3c6635

2 files changed

Lines changed: 22 additions & 0 deletions

File tree

cpp/src/arrow/csv/parser.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,17 @@ class BlockParserImpl {
605605
rows_in_chunk = std::min(kTargetChunkSize, max_num_rows_ - batch_.num_rows_);
606606
}
607607

608+
// The values array holds one ParsedValueDesc per cell and those offsets
609+
// are 31-bit, so the number of values in a chunk must fit in an int32.
610+
// A first line with millions of fields can drive `num_cols_` high enough
611+
// to overflow that, so error out rather than presize past the limit.
612+
if (static_cast<int64_t>(rows_in_chunk) * batch_.num_cols_ >
613+
std::numeric_limits<int32_t>::max()) {
614+
return Status::Invalid("CSV parser: row group of ", rows_in_chunk, " rows x ",
615+
batch_.num_cols_,
616+
" columns exceeds the maximum number of values");
617+
}
618+
608619
ARROW_ASSIGN_OR_RAISE(
609620
auto values_writer,
610621
PresizedValueDescWriter::Make(pool_, rows_in_chunk, batch_.num_cols_));

cpp/src/arrow/csv/parser_test.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,17 @@ TEST(BlockParser, MismatchingNumColumns) {
666666
}
667667
}
668668

669+
TEST(BlockParser, TooManyValues) {
670+
// A first line carrying millions of fields drives num_cols high enough that
671+
// the per-chunk value count (rows x columns) would overflow the 31-bit value
672+
// offset, so the parser errors out instead of overflowing.
673+
uint32_t out_size;
674+
BlockParser parser(ParseOptions::Defaults(), /*num_cols=*/5000000);
675+
Status st = Parse(parser, MakeCSVData({"a,b\n"}), &out_size);
676+
EXPECT_RAISES_WITH_MESSAGE_THAT(
677+
Invalid, testing::HasSubstr("exceeds the maximum number of values"), st);
678+
}
679+
669680
TEST(BlockParser, MismatchingNumColumnsHandler) {
670681
struct CustomHandler {
671682
operator InvalidRowHandler() {

0 commit comments

Comments
 (0)