Skip to content

Commit

Permalink
catch raw strings in file path inputs (#2116)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico authored Sep 9, 2023
1 parent c8f0074 commit 33335e8
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 58 deletions.
107 changes: 50 additions & 57 deletions R/paste_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ paste_linter <- function(allow_empty_sep = FALSE,
allow_to_string = FALSE,
allow_file_path = c("double_slash", "always", "never")) {
allow_file_path <- match.arg(allow_file_path)
check_file_paths <- allow_file_path %in% c("double_slash", "never")

sep_xpath <- "
paste_sep_xpath <- "
//SYMBOL_FUNCTION_CALL[text() = 'paste']
/parent::expr
/following-sibling::SYMBOL_SUB[text() = 'sep' and following-sibling::expr[1][STR_CONST]]
Expand Down Expand Up @@ -137,32 +138,9 @@ paste_linter <- function(allow_empty_sep = FALSE,
/parent::expr
"

slash_str <- sprintf("STR_CONST[%s]", xp_text_in_table(c("'/'", '"/"')))
str_not_start_with_slash <-
"STR_CONST[not(substring(text(), 2, 1) = '/')]"
str_not_end_with_slash <-
"STR_CONST[not(substring(text(), string-length(text()) - 1, 1) = '/')]"
non_str <- "SYMBOL or expr"

# Type I: paste(..., sep = "/")
paste_file_path_xpath <- glue("
//SYMBOL_FUNCTION_CALL[text() = 'paste']
/parent::expr
/parent::expr[
SYMBOL_SUB[text() = 'sep']/following-sibling::expr[1][{slash_str}]
and not(SYMBOL_SUB[text() = 'collapse'])
]
")

if (allow_file_path %in% c("always", "double_slash")) {
slash_cond <- "contains(text(), '//')"
} else {
# NB: substring() signature is (text, initial_index, n_characters)
slash_cond <- "substring(text(), 2, 2) = '//' or substring(text(), string-length(text()) - 2, 2) = '//'"
}

# Type II: paste0(x, "/", y, "/", z)
paste0_file_path_xpath <- xp_strip_comments(glue("
# NB: some conditions require evaluating the R string, only a few can be done in pure XPath. See below.
paste0_file_path_xpath <- xp_strip_comments("
//SYMBOL_FUNCTION_CALL[text() = 'paste0']
/parent::expr
/parent::expr[
Expand All @@ -174,32 +152,11 @@ paste_linter <- function(allow_empty_sep = FALSE,
expr/NUM_CONST
(: A call using collapse= :)
or SYMBOL_SUB[text() = 'collapse']
(: First input is '/', meaning file.path() would need to start with '' :)
or expr[2][{slash_str}]
(: Last input is '/', meaning file.path() would need to end with '' :)
or expr[last()][{slash_str}]
(: String starting or ending with multiple / :)
(: TODO(#2075): run this logic on the actual R string :)
or expr/STR_CONST[{ slash_cond }]
(: Consecutive non-strings like paste0(x, y) :)
or expr[({non_str}) and following-sibling::expr[1][{non_str}]]
(: A string not ending with /, followed by non-string or string not starting with / :)
or expr[
{str_not_end_with_slash}
and following-sibling::expr[1][
{non_str}
or {str_not_start_with_slash}
]
]
(: A string not starting with /, preceded by a non-string :)
(: NB: consecutive strings is covered by the previous condition :)
or expr[
{str_not_start_with_slash}
and preceding-sibling::expr[1][{non_str}]
]
or expr[(SYMBOL or expr) and following-sibling::expr[1][SYMBOL or expr]]
)
]
"))
")

empty_paste_note <-
'Note that paste() converts empty inputs to "", whereas file.path() leaves it empty.'
Expand All @@ -212,12 +169,16 @@ paste_linter <- function(allow_empty_sep = FALSE,
xml <- source_expression$xml_parsed_content
optional_lints <- list()

if (!allow_empty_sep) {
empty_sep_expr <- xml_find_all(xml, sep_xpath)
sep_value <- get_r_string(empty_sep_expr, xpath = "./SYMBOL_SUB[text() = 'sep']/following-sibling::expr[1]")
# Both of these look for paste(..., sep = "..."), differing in which 'sep' is linted,
# so run the expensive XPath search/R parse only once
if (!allow_empty_sep || check_file_paths) {
paste_sep_expr <- xml_find_all(xml, paste_sep_xpath)
paste_sep_value <- get_r_string(paste_sep_expr, xpath = "./SYMBOL_SUB[text() = 'sep']/following-sibling::expr[1]")
}

if (!allow_empty_sep) {
optional_lints <- c(optional_lints, xml_nodes_to_lints(
empty_sep_expr[!nzchar(sep_value)],
paste_sep_expr[!nzchar(paste_sep_value)],
source_expression = source_expression,
lint_message = 'paste0(...) is better than paste(..., sep = "").',
type = "warning"
Expand Down Expand Up @@ -263,10 +224,11 @@ paste_linter <- function(allow_empty_sep = FALSE,
type = "warning"
)

if (allow_file_path %in% c("double_slash", "never")) {
paste_file_path_expr <- xml_find_all(xml, paste_file_path_xpath)
if (check_file_paths) {
paste_sep_slash_expr <- paste_sep_expr[paste_sep_value == "/"]
optional_lints <- c(optional_lints, xml_nodes_to_lints(
paste_file_path_expr,
# in addition to paste(..., sep = "/") we ensure collapse= is not present
paste_sep_slash_expr[is.na(xml_find_first(paste_sep_slash_expr, "./SYMBOL_SUB[text() = 'collapse']"))],
source_expression = source_expression,
lint_message = paste(
'Construct file paths with file.path(...) instead of paste(..., sep = "/").',
Expand All @@ -278,8 +240,10 @@ paste_linter <- function(allow_empty_sep = FALSE,
))

paste0_file_path_expr <- xml_find_all(xml, paste0_file_path_xpath)
is_file_path <-
!vapply(paste0_file_path_expr, check_is_not_file_path, logical(1L), allow_file_path = allow_file_path)
optional_lints <- c(optional_lints, xml_nodes_to_lints(
paste0_file_path_expr,
paste0_file_path_expr[is_file_path],
source_expression = source_expression,
lint_message = paste(
'Construct file paths with file.path(...) instead of paste0(x, "/", y, "/", z).',
Expand All @@ -292,3 +256,32 @@ paste_linter <- function(allow_empty_sep = FALSE,
c(optional_lints, paste0_sep_lints, paste_strrep_lints)
})
}

check_is_not_file_path <- function(expr, allow_file_path) {
args <- xml_find_all(expr, "expr[position() > 1]")

is_string <- !is.na(xml_find_first(args, "STR_CONST"))
string_values <- character(length(args))
string_values[is_string] <- get_r_string(args[is_string])
not_start_slash <- which(!startsWith(string_values, "/"))
not_end_slash <- which(!endsWith(string_values, "/"))

if (allow_file_path == "double_slash") {
check_double_slash <- function(str) any(grepl("//", str, fixed = TRUE))
} else {
# always skip on strings starting/ending with //, since switching to
# file.path() would require file.path("", ...) or file.path(..., "")
check_double_slash <- function(str) any(grepl("^//|//$", str))
}

# First input is '/', meaning file.path() would need to start with ''
string_values[1L] == "/" ||
# Last input is '/', meaning file.path() would need to end with ''
string_values[length(string_values)] == "/" ||
check_double_slash(string_values) ||
# A string not ending with /, followed by non-string,
# or a string not starting with /, preceded by a non-string
any(!is_string[c(not_end_slash + 1L, not_start_slash - 1L)], na.rm = TRUE) ||
# A string not starting with / preceded by a string not ending with /
any(not_start_slash %in% (not_end_slash + 1L))
}
34 changes: 33 additions & 1 deletion tests/testthat/test-paste_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ test_that("paste_linter doesn't skip all initial/terminal '/' for paths", {
})

test_that("multiple path lints are generated correctly", {
linter <- paste_linter()

expect_lint(
trim_some("{
paste(x, y, sep = '/')
Expand All @@ -198,7 +200,25 @@ test_that("multiple path lints are generated correctly", {
rex::rex('paste(..., sep = "/")'),
rex::rex('paste0(x, "/", y, "/", z)')
),
paste_linter()
linter
)

# check vectorization of multiple paste0 file paths
expect_lint(
trim_some('{
paste0(x, y)
paste0("a/", x, y, "/b")
paste0("a", "b")
paste0("a", x)
paste0(a, "x")
paste0("a/", NA_character_, "/b")
paste0("a/", "b")
paste0("a/", "bcd//efg", "/h")
paste0("/", a)
paste0(a, "/")
}'),
list(message = rex::rex("Construct file paths with file.path(...)"), line_number = 8L),
linter
)
})

Expand All @@ -213,3 +233,15 @@ test_that("URLs are ignored by default, linted optionally", {
expect_lint("paste0('http://site.com/', x)", NULL, linter)
expect_lint("paste0('http://site.com/', x)", rex::rex("Construct file paths with file.path(...)"), linter_url)
})

test_that("raw strings are detected in file path logic", {
skip_if_not_r_version("4.0.0")

linter <- paste_linter()
lint_msg <- rex::rex("Construct file paths with file.path(...) instead of ")

expect_lint("paste0(x, R'{abc}', y)", NULL, linter)
expect_lint("paste0(x, R'{/abc/}', y)", lint_msg, linter)
expect_lint("paste(x, y, sep = R'{//}')", NULL, linter)
expect_lint("paste(x, y, sep = R'{/}')", lint_msg, linter)
})

0 comments on commit 33335e8

Please sign in to comment.