r-lib · MichaelChirico · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025
diff --git a/.dev/ast_fuzz_test.R b/.dev/ast_fuzz_test.R
@@ -53,7 +53,7 @@ if (
 }
 
 contents[wrong_number_def_idx] <-
-  'wrong_number_fmt <- "got %d lints instead of %d%s\\nFile contents:\\n%s"'
+  '  wrong_number_fmt <- "got %d lints instead of %d%s\\nFile contents:\\n%s"'
 contents[wrong_number_use_idx] <-
   gsub("\\)$", ", readChar(file, file.size(file)))", contents[wrong_number_use_idx])
 writeLines(contents, expect_lint_file)
@@ -66,61 +66,81 @@ withr::defer({
 
 suppressMessages(pkgload::load_all())
 
+can_parse <- \(lines) !inherits(tryCatch(parse(text = lines), error = identity), "error")
+get_str <- \(x) tail(unlist(strsplit(x, ": ", fixed = TRUE)), 1L)
+
 # beware lazy eval: originally tried adding a withr::defer() in each iteration, but
 #   this effectively only runs the last 'defer' expression as the names are only
 #   evaluated at run-time. So instead keep track of all edits in this object.
-# this approach to implementing 'nofuzz' feels painfully manual, but I couldn't
-#   figure out how else to get 'testthat' to give us what we need -- the failures
-#   object in the reporter is frustratingly inconsistent in whether the trace
-#   exists, and even if it does, we'd have to text-mangle to get the corresponding
-#   file names out. Also, the trace 'srcref' happens under keep.source=FALSE,
-#   so we lose any associated comments anyway. even that would not solve the issue
-#   of getting top-level exclusions done for 'nofuzz start|end' ranges, except
-#   maybe if it enabled us to reuse lintr's own exclude() system.
-# therefore we take this approach: pass over the test suite first and comment out
-#   any tests/units that have been marked 'nofuzz'. restore later. one consequence
-#   is there's no support for fuzzer-specific exclusion, e.g. we fully disable
-#   the unnecessary_placeholder_linter() tests because |> and _ placeholders differ.
+# these have to be enabled/disabled at runtime as it's not possible to disentagle which
+#   fuzzer caused the error ex-post (and it might be the interaction of >1 at issue).
+#   an earlier approach was like the current 'nofuzz' -- just comment out the troublesome
+#   tests from being run at all. But that led to a very quickly growing set of tests being
+#   skipped totally, which also hid some issues that are surfaced by the current approach.
+#   Another idea would be to just leave the enable/disable calls as code in the test suite,
+#   but I prefer the current approach of leaving them as comments: (1) it's more consistent
+#   with the 'nolint' exclusion system and (2) it doesn't distract the casual reader as much.
 test_restorations <- list()
 for (test_file in list.files("tests/testthat", pattern = "^test-", full.names = TRUE)) {
-  xml <- read_xml(xmlparsedata::xml_parse_data(parse(test_file, keep.source = TRUE)))
-  # parent::* to catch top-level comments (exprlist). matches one-line nofuzz and start/end ranges.
-  nofuzz_lines <- xml_find_all(xml, "//COMMENT[contains(text(), 'nofuzz')]/parent::*")
-  if (length(nofuzz_lines) == 0L) next
-
-  test_original <- test_lines <- readLines(test_file)
-
-  for (nofuzz_line in nofuzz_lines) {
-    comments <- xml_find_all(nofuzz_line, "COMMENT[contains(text(), 'nofuzz')]")
-    comment_text <- xml_text(comments)
-    # handle start/end ranges first.
-    start_idx <- grep("nofuzz start", comment_text, fixed = TRUE)
-    end_idx <- grep("nofuzz end", comment_text, fixed = TRUE)
-    if (length(start_idx) != length(end_idx) || any(end_idx < start_idx)) {
-      stop(sprintf(
-        "Mismatched '# nofuzz start' (%s), '# nofuzz end' (%s) in %s",
-        toString(start_idx), toString(end_idx), test_file
-      ))
+  test_lines <- readLines(test_file)
+  one_expr_idx <- grep("# nofuzz", test_lines, fixed = TRUE)
+  range_start_idx <- grep("^\\s*# fuzzer disable:", test_lines)
+  if (length(one_expr_idx) == 0L && length(range_start_idx) == 0L) next
+
+  test_original <- test_lines
+  pd <- getParseData(parse(test_file))
+
+  for (start_line in rev(one_expr_idx)) {
+    end_line <- start_line
+    while (end_line <= length(test_lines) && !can_parse(test_lines[start_line:end_line])) {
+      end_line <- end_line + 1L
     }
-
-    comment_ranges <- Map(`:`,
-      as.integer(xml_attr(comments[start_idx], "line1")),
-      as.integer(xml_attr(comments[end_idx], "line1"))
-    )
-    for (comment_range in comment_ranges) {
-      test_lines[comment_range] <- paste("#", test_lines[comment_range])
+    if (end_line > length(test_lines)) {
+      stop("Unable to parse any expression starting from line ", start_line)
+    }
+    comment_txt <- subset(pd, line1 == start_line & token == "COMMENT", select = "text", drop = TRUE)
+    # blanket disable means the test cannot be run. this happens e.g. for tests of encoding
+    #   that are too complicated to deal with in this GHA.
+    if (comment_txt == "# nofuzz") {
+      test_lines[start_line:end_line] <- ""
+    } else {
+      deactivated <- get_str(comment_txt)
+      test_lines <- c(
+        head(test_lines, start_line - 1L),
+        sprintf("deactivate_fuzzers('%s')", deactivated),
+        test_lines[start_line:end_line],
+        sprintf("activate_fuzzers('%s')", deactivated),
+        tail(test_lines, -end_line)
+      )
     }
+  }
 
-    if (length(start_idx) > 0L && !any(!start_idx & !end_idx)) next
+  if (length(one_expr_idx)) {
+    writeLines(test_lines, test_file)
+    pd <- getParseData(parse(test_file))
+    range_start_idx <- grep("^\\s*# fuzzer disable:", test_lines)
+  }
 
-    # NB: one-line tests line expect_lint(...) # nofuzz are not supported,
-    #   since the comment will attach to the parent test_that() & thus comment
-    #   out the whole unit. Easiest solution is just to spread out those few tests for now.
-    comment_range <- as.integer(xml_attr(nofuzz_line, "line1")):as.integer(xml_attr(nofuzz_line, "line2"))
-    test_lines[comment_range] <- paste("#", test_lines[comment_range])
+  range_end_idx <- grep("^\\s*# fuzzer enable:", test_lines)
+
+  if (length(range_start_idx) != length(range_end_idx) || any(range_end_idx < range_start_idx)) {
+    stop(sprintf(
+      "Mismatched '# fuzzer disable' (%s), '# fuzzer enable' (%s) in %s",
+      toString(range_start_idx), toString(range_end_idx), test_file
+    ))
+  }
+
+  for (ii in seq_along(range_start_idx)) {
+    start_line <- test_lines[range_start_idx[ii]]
+    test_lines[range_start_idx[ii]] <-
+      gsub("#.*", sprintf("deactivate_fuzzers('%s')", get_str(start_line)), start_line)
+    end_line <- test_lines[range_end_idx[ii]]
+    test_lines[range_end_idx[ii]] <-
+      gsub("#.*", sprintf("activate_fuzzers('%s')", get_str(end_line)), end_line)
   }
 
-  writeLines(test_lines, test_file)
+  if (length(range_start_idx)) writeLines(test_lines, test_file)
+
   test_restorations <- c(test_restorations, list(list(file = test_file, lines = test_original)))
 }
 withr::defer(for (restoration in test_restorations) writeLines(restoration$lines, restoration$file))
@@ -134,8 +154,7 @@ all_classes <- unlist(lapply(
   reporter$get_results(),
   \(test) lapply(test$results, \(x) class(x)[1L])
 ))
-cat("Summary of test statuses:\n")
-print(table(all_classes))
+print(table(`Summary of test statuses:` = all_classes))
 
 # ignore any test that failed for expected reasons, e.g. some known lint metadata changes
 #   about line numbers or the contents of the line. this saves us having to pepper tons of
@@ -160,7 +179,7 @@ if (length(invalid_failures) > 0L) {
     \(x) sprintf("%s:%s", x$file, x$test),
     character(1L)
   )
-  cat("Some fuzzed tests failed unexpectedly!\n")
+  cat(sprintf("%d fuzzed tests failed unexpectedly!\n", length(invalid_failures)))
   print(invalid_failures)
-  stop("Use # nofuzz [start|end] to mark false positives or fix any bugs.")
+  stop("Fix any bugs, or use '# nofuzz'/'# fuzzer [dis|en]able' to mark false positives.")
 }
diff --git a/.dev/maybe_fuzz_content.R b/.dev/maybe_fuzz_content.R
@@ -9,13 +9,7 @@ maybe_fuzz_content <- function(file, lines) {
     file.copy(file, new_file, copy.mode = FALSE)
   }
 
-  apply_fuzzers(new_file, fuzzers = list(
-    function_lambda_fuzzer,
-    pipe_fuzzer,
-    dollar_at_fuzzer,
-    comment_injection_fuzzer,
-    assignment_fuzzer
-  ))
+  apply_fuzzers(new_file, fuzzers = .fuzzers$active)
 
   new_file
 }
@@ -106,7 +100,7 @@ apply_fuzzers <- function(f, fuzzers) {
     return(invisible())
   }
 
-  unedited <- lines <- readLines(f)
+  unedited <- lines <- readLines(f, warn = FALSE)
   for (fuzzer in fuzzers) {
     updated_lines <- fuzzer(pd, lines)
     if (is.null(updated_lines)) next # skip some I/O if we can
@@ -122,3 +116,39 @@ apply_fuzzers <- function(f, fuzzers) {
 
   invisible()
 }
+
+.fuzzers <- new.env()
+.fuzzers$active <- list(
+  assignment = assignment_fuzzer,
+  comment_injection = comment_injection_fuzzer,
+  dollar_at = dollar_at_fuzzer,
+  function_lambda = function_lambda_fuzzer,
+  pipe = pipe_fuzzer
+)
+.fuzzers$inactive <- list()
+
+deactivate_fuzzers <- function(names_str) {
+  req <- unlist(strsplit(names_str, " ", fixed = TRUE))
+  if (!all(req %in% names(.fuzzers$active))) {
+    stop(sprintf(
+      "Invalid attempt to deactivate fuzzers: '%s'\n  Currently active fuzzers: %s\n  Currently inactive fuzzers: %s",
+      names_str, toString(names(.fuzzers$active)), toString(names(.fuzzers$inactive))
+    ))
+  }
+  .fuzzers$inactive[req] <- .fuzzers$active[req]
+  .fuzzers$active[req] <- NULL
+  invisible()
+}
+
+activate_fuzzers <- function(names_str) {
+  req <- unlist(strsplit(names_str, " ", fixed = TRUE))
+  if (!all(req %in% names(.fuzzers$inactive))) {
+    stop(sprintf(
+      "Invalid attempt to activate fuzzers: '%s'\n  Currently active fuzzers: %s\n  Currently inactive fuzzers: %s",
+      names_str, toString(names(.fuzzers$active)), toString(names(.fuzzers$inactive))
+    ))
+  }
+  .fuzzers$active[req] <- .fuzzers$inactive[req]
+  .fuzzers$inactive[req] <- NULL
+  invisible()
+}
diff --git a/NEWS.md b/NEWS.md
@@ -33,6 +33,7 @@
 * `boolean_arithmetic_linter()` finds many more cases like `sum(x | y) == 0` where the total of a known-logical vector is compared to 0 (#1580, @MichaelChirico).
 * New argument `include_s4_slots` for the `xml_find_function_calls()` entry in the `get_source_expressions()` to govern whether calls of the form `s4Obj@fun()` are included in the result (#2820, @MichaelChirico).
 * General handling of logic around where comments can appear in code has been improved (#2822, @MichaelChirico). In many cases, this is a tiny robustness fix for weird edge cases unlikely to be found in practice, but in others, this improves practical linter precision (reduced false positives and/or false negatives). The affected linters (with annotations for changes noteworthy enough to have gotten a dedicated bug) are:
+   + `any_duplicated_linter()`
    + `brace_linter()`
    + `coalesce_linter()`
    + `comparison_negation_linter()` #2826
@@ -42,6 +43,8 @@
    + `if_switch_linter()`
    + `ifelse_censor_linter()` #2826
    + `implicit_assignment_linter()`
+   + `is_numeric_linter()`
+   + `keyword_quote_linter()`
    + `length_test_linter()`
    + `literal_coercion_linter()` #2824
    + `matrix_apply_linter()` #2825

diff --git a/R/any_duplicated_linter.R b/R/any_duplicated_linter.R
@@ -52,50 +52,53 @@ any_duplicated_linter <- function() {
   #  the final parent::expr/expr gets us to the expr on the other side of EQ;
   #  this lets us match on either side of EQ, where following-sibling
   #  assumes we are before EQ, preceding-sibling assumes we are after EQ.
-  length_unique_xpath_parts <- glue("
-  //{ c('EQ', 'NE', 'GT', 'LT') }
-    /parent::expr
-    /expr[
-      expr[1][SYMBOL_FUNCTION_CALL[text() = 'length']]
-      and expr[expr[1][
-        SYMBOL_FUNCTION_CALL[text() = 'unique']
-        and (
-          following-sibling::expr =
-            parent::expr
-            /parent::expr
-            /parent::expr
-            /expr
-            /expr[1][SYMBOL_FUNCTION_CALL[text()= 'length']]
-            /following-sibling::expr
-          or
-          following-sibling::expr[OP-DOLLAR or LBB]/expr[1] =
-            parent::expr
-            /parent::expr
-            /parent::expr
-            /expr
-            /expr[1][SYMBOL_FUNCTION_CALL[text()= 'nrow']]
-            /following-sibling::expr
-        )
-      ]]
-    ]
-  ")
-  length_unique_xpath <- paste(length_unique_xpath_parts, collapse = " | ")
+  length_comparison_xpath <- "
+  parent::expr
+    /parent::expr[expr/SYMBOL_FUNCTION_CALL[text() = 'length']]
+    /parent::expr[EQ or NE or GT or LT]
+  "
+  length_unique_xpath <- "
+  expr/expr/expr[1][
+    SYMBOL_FUNCTION_CALL[text() = 'unique']
+    and (
+      following-sibling::expr =
+        parent::expr
+        /parent::expr
+        /parent::expr
+        /expr
+        /expr[1][SYMBOL_FUNCTION_CALL[text() = 'length']]
+        /following-sibling::expr
+      or
+      following-sibling::expr[OP-DOLLAR or LBB]/expr[1] =
+        parent::expr
+        /parent::expr
+        /parent::expr
+        /expr
+        /expr[1][SYMBOL_FUNCTION_CALL[text() = 'nrow']]
+        /following-sibling::expr
+    )
+  ]"
 
-  uses_nrow_xpath <- "./parent::expr/expr/expr[1]/SYMBOL_FUNCTION_CALL[text() = 'nrow']"
+  uses_nrow_xpath <- "./expr/expr[1]/SYMBOL_FUNCTION_CALL[text() = 'nrow']"
 
   Linter(linter_level = "expression", function(source_expression) {
-    xml <- source_expression$xml_parsed_content
-    xml_calls <- source_expression$xml_find_function_calls("any")
+    any_calls <- source_expression$xml_find_function_calls("any")
+    unique_calls <- source_expression$xml_find_function_calls("unique")
 
-    any_duplicated_expr <- xml_find_all(xml_calls, any_duplicated_xpath)
+    any_duplicated_expr <- xml_find_all(any_calls, any_duplicated_xpath)
     any_duplicated_lints <- xml_nodes_to_lints(
       any_duplicated_expr,
       source_expression = source_expression,
       lint_message = "anyDuplicated(x, ...) > 0 is better than any(duplicated(x), ...).",
       type = "warning"
     )
 
-    length_unique_expr <- xml_find_all(xml, length_unique_xpath)
+    in_length_comparison <- !is.na(xml_find_first(unique_calls, length_comparison_xpath))
+    unique_calls <- strip_comments_from_subtree(
+      xml_parent(xml_parent(xml_parent(unique_calls[in_length_comparison])))
+    )
+    is_length_unique <- !is.na(xml_find_first(unique_calls, length_unique_xpath))
+    length_unique_expr <- unique_calls[is_length_unique]
     lint_message <- ifelse(
       is.na(xml_find_first(length_unique_expr, uses_nrow_xpath)),
       "anyDuplicated(x) == 0L is better than length(unique(x)) == length(x).",

diff --git a/R/is_numeric_linter.R b/R/is_numeric_linter.R
@@ -42,15 +42,12 @@ is_numeric_linter <- function() {
 
   # testing things like is.numeric(x) || is.integer(x)
   or_xpath <- glue("
-  //OR2
-    /parent::expr[
-      expr/{is_numeric_expr}
-      and expr/{is_integer_expr}
-      and
-        expr/{is_numeric_expr}/following-sibling::expr[1]
-        = expr/{is_integer_expr}/following-sibling::expr[1]
-    ]
+    //OR2/parent::expr[expr/{is_numeric_expr} and expr/{is_integer_expr}]
   ")
+  node_match_xpath <- glue("self::*[
+    expr/{is_numeric_expr}/following-sibling::expr[1]
+    = expr/{is_integer_expr}/following-sibling::expr[1]
+  ]")
 
   # testing class(x) %in% c("numeric", "integer")
   class_xpath <- "
@@ -69,9 +66,10 @@ is_numeric_linter <- function() {
   Linter(linter_level = "expression", function(source_expression) {
     xml <- source_expression$xml_parsed_content
 
-    or_expr <- xml_find_all(xml, or_xpath)
+    or_expr <- strip_comments_from_subtree(xml_find_all(xml, or_xpath))
+    expr_match <- !is.na(xml_find_first(or_expr, node_match_xpath))
     or_lints <- xml_nodes_to_lints(
-      or_expr,
+      or_expr[expr_match],
       source_expression = source_expression,
       lint_message = paste(
         "Use `is.numeric(x)` instead of the equivalent `is.numeric(x) || is.integer(x)`.",

diff --git a/R/keyword_quote_linter.R b/R/keyword_quote_linter.R
@@ -136,7 +136,7 @@ keyword_quote_linter <- function() {
     )
 
     extraction_expr <- extraction_expr[invalid_extraction_quoting]
-    extractor <- xml_find_chr(extraction_expr, "string(preceding-sibling::*[1])")
+    extractor <- xml_find_chr(extraction_expr, "string(preceding-sibling::*[not(self::COMMENT)][1])")
     gen_extractor <- ifelse(extractor == "$", "[[", "slot()")
 
     extraction_lints <- xml_nodes_to_lints(