Skip to content

Commit 30d1e8e

Browse files
Add a second fuzzer for pipe equivalency (#2819)
* use maybe_write_content for easier 'mocking' * initial progress * getting very close i think... * skip Rmd files * caught a live one! * need to match original file extension? * caught another one! * simpler approach, avoid rex() due to bug * also ignore warnings * finally getting somewhere... * progressively more complicated :( * round of fixes & first working nofuzz * looks like we got another live one... break time * another true positive * more ignores, need '.' in file extension, restore test * wrapping up * Write up the GHA config * annotation * comment for future work * vestigial * skips on old R * expect_no_lint * new tests * NEWS * bad copy-paste * need stop_on_failure for batch? * delint, fix last skip for R<4.1.0 * more extensible structure * expect_no_lint * progress, incl. many 'nofuzz' & 'no_lint' * another round of nofuzz * another batch * tweak * another nofuzz case, attempting to reduce nofuzz requirements * fix; scale back nofuzz for an attempt * reinstate more legit nofuzz * failed merge * shrink diff * shrink diff * shrink diff * also check if lines are not edited at all Co-authored-by: AshesITR <[email protected]> * restore * revert * spurious * skip, not return --------- Co-authored-by: AshesITR <[email protected]>
1 parent c09fe73 commit 30d1e8e

13 files changed

+255
-270
lines changed

.dev/ast_fuzz_test.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ pkgload::load_all()
5454
# of getting top-level exclusions done for 'nofuzz start|end' ranges, except
5555
# maybe if it enabled us to reuse lintr's own exclude() system.
5656
# therefore we take this approach: pass over the test suite first and comment out
57-
# any tests/units that have been marked 'nofuzz'. restore later.
57+
# any tests/units that have been marked 'nofuzz'. restore later. one consequence
58+
# is there's no support for fuzzer-specific exclusion, e.g. we fully disable
59+
# the unnecessary_placeholder_linter() tests because |> and _ placeholders differ.
5860
test_restorations <- list()
5961
for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) {
6062
xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE)))
@@ -114,15 +116,15 @@ failures <- reporter$failures$as_list()
114116
valid_failure <- vapply(
115117
failures,
116118
function(failure) {
117-
if (grepl('(column_number|ranges|line) .* did not match', failure$message)) {
119+
if (grepl("(column_number|ranges|line) .* did not match", failure$message)) {
118120
return(TRUE)
119121
}
120122
FALSE
121123
},
122124
logical(1L)
123125
)
124-
if (!all(valid_failure)) {
125-
failures <- failures[!valid_failure]
126+
failures <- failures[!valid_failure]
127+
if (length(failures) > 0L) {
126128
names(failures) <- vapply(failures, `[[`, "test", FUN.VALUE = character(1L))
127129
cat("Some fuzzed tests failed unexpectedly!\n")
128130
print(failures)

.dev/maybe_fuzz_content.R

Lines changed: 45 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,48 +14,66 @@ maybe_fuzz_content <- function(file, lines) {
1414
new_file
1515
}
1616

17-
function_lambda_fuzzer <- function(pd, lines) {
18-
fun_tokens <- c(`'\\\\'` = "\\", `FUNCTION` = "function")
19-
fun_idx <- which(pd$token %in% names(fun_tokens))
20-
n_fun <- length(fun_idx)
17+
# skip errors for e.g. Rmd files, and ignore warnings.
18+
# We could use get_source_expressions(), but with little benefit & much slower.
19+
# also avoid over-use of 'nofuzz' induced by some incompatible swaps, e.g. not all '%>%' can be
20+
# swapped to '|>' (if '.' is used, or if RHS is not an allowed simple call)
21+
error_or_parse_data <- function(f) {
22+
tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE))), error = identity)
23+
}
2124

22-
if (n_fun == 0L) {
23-
return(invisible())
24-
}
25+
simple_swap_fuzzer <- function(pd_filter, replacements) {
26+
function(pd, lines) {
27+
idx <- which(pd_filter(pd))
28+
n <- length(idx)
29+
30+
if (n == 0L) {
31+
return(invisible())
32+
}
2533

26-
pd$new_text <- NA_character_
27-
pd$new_text[fun_idx] <- sample(fun_tokens, n_fun, replace = TRUE)
34+
pd$new_text <- NA_character_
35+
pd$new_text[idx] <- sample(replacements, n, replace = TRUE)
2836

29-
for (ii in rev(fun_idx)) {
30-
if (pd$text[ii] == pd$new_text[ii]) next
31-
# Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
32-
ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
33-
lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
37+
for (ii in rev(idx)) {
38+
if (pd$text[ii] == pd$new_text[ii]) next
39+
# Tried, with all rex(), hit a bug: https://github.com/r-lib/rex/issues/96
40+
ptn = paste0("^(.{", pd$col1[ii] - 1L, "})", rex::rex(pd$text[ii]))
41+
lines[pd$line1[ii]] <- rex::re_substitutes(lines[pd$line1[ii]], ptn, paste0("\\1", rex::rex(pd$new_text[ii])))
42+
}
43+
lines
3444
}
35-
lines
3645
}
3746

47+
function_lambda_fuzzer <- simple_swap_fuzzer(
48+
\(pd) pd$token %in% c("'\\\\'", "FUNCTION"),
49+
replacements = c("\\", "function")
50+
)
51+
52+
pipe_fuzzer <- simple_swap_fuzzer(
53+
\(pd) (pd$token == "SPECIAL" & pd$text == "%>%") | pd$token == "PIPE",
54+
replacements = c("%>%", "|>")
55+
)
56+
3857
# we could also consider just passing any test where no fuzzing takes place,
3958
# i.e. letting the other GHA handle whether unfuzzed tests pass as expected.
4059
apply_fuzzers <- function(f) {
41-
# skip errors for e.g. Rmd files, and ignore warnings.
42-
# We could use get_source_expressions(), but with little benefit & much slower.
43-
pd <- tryCatch(getParseData(suppressWarnings(parse(f, keep.source = TRUE, encoding = "UTF-8"))), error = identity)
60+
pd <- error_or_parse_data(f)
4461
if (inherits(pd, "error")) {
4562
return(invisible())
4663
}
4764

48-
reparse <- FALSE
49-
lines <- readLines(f)
50-
for (fuzzer in list(function_lambda_fuzzer)) {
51-
if (reparse) {
52-
pd <- getParseData(parse(f, keep.source = TRUE))
53-
lines <- readLines(f)
54-
}
65+
unedited <- lines <- readLines(f)
66+
for (fuzzer in list(function_lambda_fuzzer, pipe_fuzzer)) {
5567
updated_lines <- fuzzer(pd, lines)
56-
reparse <- !is.null(updated_lines)
57-
if (!reparse) next # skip some I/O if we can
68+
if (is.null(updated_lines) || identical(unedited, updated_lines)) next # skip some I/O if we can
5869
writeLines(updated_lines, f)
70+
# check if our attempted edit introduced some error; skip applying this fuzzer only if so
71+
pd <- error_or_parse_data(f)
72+
if (inherits(pd, "error")) {
73+
writeLines(lines, f)
74+
next
75+
}
76+
lines <- readLines(f)
5977
}
6078

6179
invisible()

0 commit comments

Comments
 (0)