From 9c3d7c05bab92ef800bea4ad4af6b9e3ae27b202 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 26 Jul 2025 16:35:24 +0000 Subject: [PATCH 1/3] Improve robustness to comments --- R/literal_coercion_linter.R | 3 +- tests/testthat/test-literal_coercion_linter.R | 74 ++++++++++++++----- 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/R/literal_coercion_linter.R b/R/literal_coercion_linter.R index 12b8cf39c5..b458eaf434 100644 --- a/R/literal_coercion_linter.R +++ b/R/literal_coercion_linter.R @@ -61,7 +61,7 @@ literal_coercion_linter <- function() { not(OP-DOLLAR or OP-AT) and ( NUM_CONST[not(contains(translate(text(), 'E', 'e'), 'e'))] - or STR_CONST[not(following-sibling::*[1][self::EQ_SUB])] + or STR_CONST[not(following-sibling::*[not(self::COMMENT)][1][self::EQ_SUB])] ) " xpath <- glue(" @@ -89,6 +89,7 @@ literal_coercion_linter <- function() { ) # nocov end } else { + bad_expr <- strip_comments_from_subtree(bad_expr) # duplicate, unless we add 'rlang::' and it wasn't there originally coercion_str <- report_str <- xml_text(bad_expr) if (any(is_rlang_coercer) && !("package:rlang" %in% search())) { diff --git a/tests/testthat/test-literal_coercion_linter.R b/tests/testthat/test-literal_coercion_linter.R index aa5f752fbb..823c442fd2 100644 --- a/tests/testthat/test-literal_coercion_linter.R +++ b/tests/testthat/test-literal_coercion_linter.R @@ -2,42 +2,50 @@ test_that("literal_coercion_linter skips allowed usages", { linter <- literal_coercion_linter() # naive xpath includes the "_f0" here as a literal - expect_lint('as.numeric(x$"_f0")', NULL, linter) - expect_lint('as.numeric(x@"_f0")', NULL, linter) + expect_no_lint('as.numeric(x$"_f0")', linter) + expect_no_lint('as.numeric(x@"_f0")', linter) # only examine the first method for as. methods - expect_lint("as.character(as.Date(x), '%Y%m%d')", NULL, linter) + expect_no_lint("as.character(as.Date(x), '%Y%m%d')", linter) # we are as yet agnostic on whether to prefer literals over coerced vectors - expect_lint("as.integer(c(1, 2, 3))", NULL, linter) + expect_no_lint("as.integer(c(1, 2, 3))", linter) # even more ambiguous for character vectors like here, where quotes are much # more awkward to type than a sequence of numbers - expect_lint("as.character(c(1, 2, 3))", NULL, linter) + expect_no_lint("as.character(c(1, 2, 3))", linter) # not possible to declare raw literals - expect_lint("as.raw(c(1, 2, 3))", NULL, linter) + expect_no_lint("as.raw(c(1, 2, 3))", linter) # also not taking a stand on as.complex(0) vs. 0 + 0i - expect_lint("as.complex(0)", NULL, linter) + expect_no_lint("as.complex(0)", linter) # ditto for as.integer(1e6) vs. 1000000L - expect_lint("as.integer(1e6)", NULL, linter) + expect_no_lint("as.integer(1e6)", linter) # ditto for as.numeric(1:3) vs. c(1, 2, 3) - expect_lint("as.numeric(1:3)", NULL, linter) + expect_no_lint("as.numeric(1:3)", linter) }) test_that("literal_coercion_linter skips allowed rlang usages", { linter <- literal_coercion_linter() - expect_lint("int(1, 2.0, 3)", NULL, linter) - expect_lint("chr('e', 'ab', 'xyz')", NULL, linter) - expect_lint("lgl(0, 1)", NULL, linter) - expect_lint("lgl(0L, 1)", NULL, linter) - expect_lint("dbl(1.2, 1e5, 3L, 2E4)", NULL, linter) + expect_no_lint("int(1, 2.0, 3)", linter) + expect_no_lint("chr('e', 'ab', 'xyz')", linter) + expect_no_lint("lgl(0, 1)", linter) + expect_no_lint("lgl(0L, 1)", linter) + expect_no_lint("dbl(1.2, 1e5, 3L, 2E4)", linter) # make sure using namespace (`rlang::`) doesn't create problems - expect_lint("rlang::int(1, 2, 3)", NULL, linter) + expect_no_lint("rlang::int(1, 2, 3)", linter) # even if scalar, carve out exceptions for the following - expect_lint("int(1.0e6)", NULL, linter) + expect_no_lint("int(1.0e6)", linter) }) test_that("literal_coercion_linter skips quoted keyword arguments", { - expect_lint("as.numeric(foo('a' = 1))", NULL, literal_coercion_linter()) + linter <- literal_coercion_linter() + expect_no_lint("as.numeric(foo('a' = 1))", linter) + expect_no_lint( + trim_some(" + as.numeric(foo('a' # comment + = 1)) + "), + linter + ) }) test_that("no warnings surfaced by running coercion", { @@ -50,6 +58,18 @@ test_that("no warnings surfaced by running coercion", { expect_no_warning( expect_lint("as.integer(2147483648)", "Use NA_integer_", linter) ) + + expect_no_warning( + expect_lint( + trim_some(" + as.double( + NA # comment + ) + "), + "Use NA_real_", + linter + ) + ) }) skip_if_not_installed("tibble") @@ -81,6 +101,7 @@ patrick::with_parameters_test_that( skip_if_not_installed("rlang") test_that("multiple lints return custom messages", { + linter <- literal_coercion_linter() expect_lint( trim_some("{ as.integer(1) @@ -90,7 +111,24 @@ test_that("multiple lints return custom messages", { list(rex::rex("Use 1L instead of as.integer(1)"), line_number = 2L), list(rex::rex("Use TRUE instead of lgl(1L)"), line_number = 3L) ), - literal_coercion_linter() + linter + ) + + # also ensure comment remove logic works across several lints + expect_lint( + trim_some("{ + as.integer( # comment + 1 # comment + ) # comment + lgl( # comment + 1L # comment + ) # comment + }"), + list( + list(rex::rex("Use 1L instead of as.integer(1)"), line_number = 2L), + list(rex::rex("Use TRUE instead of lgl(1L)"), line_number = 5L) + ), + linter ) }) From 8e1425afcd9e2a660d1d401bbf9db0c2af15a382 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 26 Jul 2025 16:38:20 +0000 Subject: [PATCH 2/3] need strip_comments_from_subtree --- R/xml_utils.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/xml_utils.R b/R/xml_utils.R index 3b0546da62..a4fe98e9be 100644 --- a/R/xml_utils.R +++ b/R/xml_utils.R @@ -12,6 +12,29 @@ xml2lang <- function(x) { str2lang(paste(xml_text(x_strip_comments), collapse = " ")) } +# TODO(r-lib/xml2#341): Use xml_clone() instead. +clone_xml_ <- function(x) { + tmp_doc <- tempfile() + on.exit(unlink(tmp_doc)) + + doc <- xml2::xml_new_root("root") + for (ii in seq_along(x)) { + xml2::write_xml(x[[ii]], tmp_doc) + xml2::xml_add_child(doc, xml2::read_xml(tmp_doc)) + } + xml_find_all(doc, "*") +} + +# caveat: whether this is a copy or not is inconsistent. assume the output is read-only! +strip_comments_from_subtree <- function(expr) { + comments <- xml_find_all(expr, ".//COMMENT") + if (length(comments) == 0L) { + return(expr) + } + expr <- clone_xml_(expr) + for (comment in xml_find_all(expr, ".//COMMENT")) xml2::xml_remove(comment) + expr +} safe_parse_to_xml <- function(parsed_content) { if (is.null(parsed_content)) { From 30ec549ffe37783d7ad6d3c16d1ac2fb70946e16 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 26 Jul 2025 16:41:04 +0000 Subject: [PATCH 3/3] Improve robustness to comments --- R/matrix_apply_linter.R | 1 + tests/testthat/test-matrix_apply_linter.R | 43 ++++++++++------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/R/matrix_apply_linter.R b/R/matrix_apply_linter.R index 6691bc00fb..2a99100d43 100644 --- a/R/matrix_apply_linter.R +++ b/R/matrix_apply_linter.R @@ -97,6 +97,7 @@ matrix_apply_linter <- function() { Linter(linter_level = "expression", function(source_expression) { xml_calls <- source_expression$xml_find_function_calls("apply") bad_expr <- xml_find_all(xml_calls, xpath) + bad_expr <- strip_comments_from_subtree(bad_expr) variable <- xml_text(xml_find_all(bad_expr, variable_xpath)) diff --git a/tests/testthat/test-matrix_apply_linter.R b/tests/testthat/test-matrix_apply_linter.R index 0a30b3ce1a..5503fb8b21 100644 --- a/tests/testthat/test-matrix_apply_linter.R +++ b/tests/testthat/test-matrix_apply_linter.R @@ -1,78 +1,74 @@ test_that("matrix_apply_linter skips allowed usages", { linter <- matrix_apply_linter() - expect_lint("apply(x, 1, prod)", NULL, linter) + expect_no_lint("apply(x, 1, prod)", linter) - expect_lint("apply(x, 1, function(i) sum(i[i > 0]))", NULL, linter) + expect_no_lint("apply(x, 1, function(i) sum(i[i > 0]))", linter) # sum as FUN argument - expect_lint("apply(x, 1, f, sum)", NULL, linter) + expect_no_lint("apply(x, 1, f, sum)", linter) # mean() with named arguments other than na.rm is skipped because they are not # implemented in colMeans() or rowMeans() - expect_lint("apply(x, 1, mean, trim = 0.2)", NULL, linter) + expect_no_lint("apply(x, 1, mean, trim = 0.2)", linter) }) test_that("matrix_apply_linter is not implemented for complex MARGIN values", { linter <- matrix_apply_linter() # Could be implemented at some point - expect_lint("apply(x, seq(2, 4), sum)", NULL, linter) + expect_no_lint("apply(x, seq(2, 4), sum)", linter) # No equivalent - expect_lint("apply(x, c(2, 4), sum)", NULL, linter) + expect_no_lint("apply(x, c(2, 4), sum)", linter) # Beyond the scope of static analysis - expect_lint("apply(x, m, sum)", NULL, linter) - - expect_lint("apply(x, 1 + 2:4, sum)", NULL, linter) + expect_no_lint("apply(x, m, sum)", linter) + expect_no_lint("apply(x, 1 + 2:4, sum)", linter) }) test_that("matrix_apply_linter simple disallowed usages", { linter <- matrix_apply_linter() - lint_message <- rex::rex("rowSums(x)") + lint_message <- rex::rex("rowSums(x)") expect_lint("apply(x, 1, sum)", lint_message, linter) - expect_lint("apply(x, MARGIN = 1, FUN = sum)", lint_message, linter) - expect_lint("apply(x, 1L, sum)", lint_message, linter) - expect_lint("apply(x, 1:4, sum)", rex::rex("rowSums(x, dims = 4)"), linter) - expect_lint("apply(x, 2, sum)", rex::rex("rowSums(colSums(x))"), linter) - expect_lint("apply(x, 2:4, sum)", rex::rex("rowSums(colSums(x), dims = 3)"), linter) lint_message <- rex::rex("rowMeans") - expect_lint("apply(x, 1, mean)", lint_message, linter) - expect_lint("apply(x, MARGIN = 1, FUN = mean)", lint_message, linter) # Works with extra args in mean() expect_lint("apply(x, 1, mean, na.rm = TRUE)", lint_message, linter) lint_message <- rex::rex("colMeans") - expect_lint("apply(x, 2, mean)", lint_message, linter) - expect_lint("apply(x, 2:4, mean)", lint_message, linter) + # adversarial comments + expect_lint( + trim_some(" + apply(x, 2, #comment + mean) + "), + lint_message, + linter + ) }) test_that("matrix_apply_linter recommendation includes na.rm if present in original call", { linter <- matrix_apply_linter() - lint_message <- rex::rex("na.rm = TRUE") + lint_message <- rex::rex("na.rm = TRUE") expect_lint("apply(x, 1, sum, na.rm = TRUE)", lint_message, linter) - expect_lint("apply(x, 2, sum, na.rm = TRUE)", lint_message, linter) - expect_lint("apply(x, 1, mean, na.rm = TRUE)", lint_message, linter) - expect_lint("apply(x, 2, mean, na.rm = TRUE)", lint_message, linter) lint_message <- rex::rex("rowSums(x)") @@ -80,7 +76,6 @@ test_that("matrix_apply_linter recommendation includes na.rm if present in origi lint_message <- rex::rex("na.rm = foo") expect_lint("apply(x, 1, sum, na.rm = foo)", lint_message, linter) - }) test_that("matrix_apply_linter works with multiple lints in a single expression", {