Skip to content

Commit 9a0afd9

Browse files
authored
GH-40742: [R] fix max_rows_per_group must be a positive number (#49709)
### Rationale for this change Error message due to faulty checking ### What changes are included in this PR? Change checking circumstances ### Are these changes tested? Yes ### Are there any user-facing changes? No ### AI usage Used Claude to generate this - will double check before marking ready for review * GitHub Issue: #40742 Authored-by: Nic Crane <thisisnic@gmail.com> Signed-off-by: Nic Crane <thisisnic@gmail.com>
1 parent a2d8e9c commit 9a0afd9

2 files changed

Lines changed: 34 additions & 4 deletions

File tree

r/R/dataset-write.R

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,12 @@ write_dataset <- function(
218218
existing_data_behavior_opts <- c("delete_matching", "overwrite", "error")
219219
existing_data_behavior <- match(match.arg(existing_data_behavior), existing_data_behavior_opts) - 1L
220220

221-
if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) {
221+
if (
222+
!missing(max_rows_per_file) &&
223+
missing(max_rows_per_group) &&
224+
max_rows_per_file > 0 &&
225+
max_rows_per_group > max_rows_per_file
226+
) {
222227
max_rows_per_group <- max_rows_per_file
223228
}
224229

@@ -290,7 +295,12 @@ write_delim_dataset <- function(
290295
quote = c("needed", "all", "none"),
291296
preserve_order = FALSE
292297
) {
293-
if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) {
298+
if (
299+
!missing(max_rows_per_file) &&
300+
missing(max_rows_per_group) &&
301+
max_rows_per_file > 0 &&
302+
max_rows_per_group > max_rows_per_file
303+
) {
294304
max_rows_per_group <- max_rows_per_file
295305
}
296306

@@ -343,7 +353,12 @@ write_csv_dataset <- function(
343353
quote = c("needed", "all", "none"),
344354
preserve_order = FALSE
345355
) {
346-
if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) {
356+
if (
357+
!missing(max_rows_per_file) &&
358+
missing(max_rows_per_group) &&
359+
max_rows_per_file > 0 &&
360+
max_rows_per_group > max_rows_per_file
361+
) {
347362
max_rows_per_group <- max_rows_per_file
348363
}
349364

@@ -395,7 +410,12 @@ write_tsv_dataset <- function(
395410
quote = c("needed", "all", "none"),
396411
preserve_order = FALSE
397412
) {
398-
if (!missing(max_rows_per_file) && missing(max_rows_per_group) && max_rows_per_group > max_rows_per_file) {
413+
if (
414+
!missing(max_rows_per_file) &&
415+
missing(max_rows_per_group) &&
416+
max_rows_per_file > 0 &&
417+
max_rows_per_group > max_rows_per_file
418+
) {
399419
max_rows_per_group <- max_rows_per_file
400420
}
401421

r/tests/testthat/test-dataset-write.R

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,16 @@ test_that("max_rows_per_group is adjusted if at odds with max_rows_per_file", {
576576
)
577577
})
578578

579+
test_that("max_rows_per_file = 0 does not trigger max_rows_per_group adjustment (ARROW-40742)", {
580+
skip_if_not_available("parquet")
581+
582+
# max_rows_per_file = 0 means "no limit" and should not error
583+
dst_dir <- make_temp_dir()
584+
expect_no_error(
585+
write_dataset(df1, dst_dir, max_rows_per_file = 0L)
586+
)
587+
})
588+
579589

580590
test_that("write_dataset checks for format-specific arguments", {
581591
df <- tibble::tibble(

0 commit comments

Comments
 (0)