r-lib · MichaelChirico · Feb 27, 2025 · Feb 27, 2025 · Feb 27, 2025 · Mar 3, 2025
diff --git a/NEWS.md b/NEWS.md
@@ -49,6 +49,7 @@
 * New argument `include_s4_slots` for the `xml_find_function_calls()` entry in the `get_source_expressions()` to govern whether calls of the form `s4Obj@fun()` are included in the result (#2820, @MichaelChirico).
 * `sprintf_linter()` lints `sprintf()` and `gettextf()` calls when a constant string is passed to `fmt` (#2894, @Bisaloo).
 * `use_lintr()` adds the created `.lintr` file to the `.Rbuildignore` if run in a package (#2926, initial work by @MEO265, finalized by @Bisaloo).
+* `line_length_linter()` has a new argument `ignore_string_bodies` (defaulting to `FALSE`) which governs whether the contents of multi-line string bodies should be linted (#856, @MichaelChirico). We think the biggest use case for this is writing SQL in R strings, especially in cases where the recommended string width for SQL & R differ.
 
 ### New linters
 

diff --git a/R/line_length_linter.R b/R/line_length_linter.R
@@ -2,7 +2,10 @@
 #'
 #' Check that the line length of both comments and code is less than `length`.
 #'
-#' @param length maximum line length allowed. Default is 80L (Hollerith limit).
+#' @param length Maximum line length allowed. Default is `80L` (Hollerith limit).
+#' @param ignore_string_bodies Logical, default `FALSE`. If `TRUE`, the contents
+#'   of string literals are ignored. The quotes themselves are included, so this
+#'   mainly affects wide multiline strings, e.g. SQL queries.
-#'   mainly affects wide multiline strings, e.g. SQL queries.
+#'   only affects wide multiline strings, e.g. SQL queries.
-#'   mainly affects wide multiline strings, e.g. SQL queries.
+#'   only affects wide multiline strings, e.g. SQL queries.
 #'
 #' @examples
 #' # will produce lints
@@ -22,14 +25,20 @@
 #' - [linters] for a complete list of linters available in lintr.
 #' - <https://style.tidyverse.org/syntax.html#long-lines>
 #' @export
-line_length_linter <- function(length = 80L) {
+line_length_linter <- function(length = 80L, ignore_string_bodies = FALSE) {
   general_msg <- paste("Lines should not be more than", length, "characters.")
 
   Linter(linter_level = "file", function(source_expression) {
     # Only go over complete file
     line_lengths <- nchar(source_expression$file_lines)
     long_lines <- which(line_lengths > length)
 
+    if (ignore_string_bodies) {
+      in_string_body_idx <-
+        is_in_string_body(source_expression$full_parsed_content, length, long_lines)
+      long_lines <- long_lines[!in_string_body_idx]
+    }
+
     Map(
       function(long_line, line_length) {
         Lint(
@@ -47,3 +56,39 @@ line_length_linter <- function(length = 80L) {
     )
   })
 }
+
+is_in_string_body <- function(parse_data, max_length, long_idx) {
+  str_idx <- parse_data$token == "STR_CONST"
+  if (!any(str_idx)) {
+    return(rep(FALSE, length(long_idx)))
+  }
+  str_data <- parse_data[str_idx, ]
+  if (all(str_data$line1 == str_data$line2)) {
+    return(rep(FALSE, length(long_idx)))
+  }
+  # right delimiter just ends at 'col2', but 'col1' takes some sleuthing
+  str_data$line1_width <- nchar(vapply(
+    strsplit(str_data$text, "\n", fixed = TRUE),
+    function(x) x[1L],
+    FUN.VALUE = character(1L),
+    USE.NAMES = FALSE
+  ))
+  str_data$col1_end <- str_data$col1 + str_data$line1_width
+  vapply(
+    long_idx,
+    function(line) {
+      # strictly inside a multi-line string body
+      if (any(str_data$line1 < line & str_data$line2 > line)) {
+        return(TRUE)
+      }
+      on_line1_idx <- str_data$line1 == line
+      if (any(on_line1_idx)) {
+        return(max(str_data$col1_end[on_line1_idx]) <= max_length)
+      }
+      # use parse data to capture possible trailing expressions on this line
+      on_line2_idx <- parse_data$line2 == line
+      any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length
+    },
+    logical(1L)
+  )
+}
-is_in_string_body <- function(parse_data, max_length, long_idx) {
-  str_idx <- parse_data$token == "STR_CONST"
-  if (!any(str_idx)) {
-    return(rep(FALSE, length(long_idx)))
-  }
-  str_data <- parse_data[str_idx, ]
-  if (all(str_data$line1 == str_data$line2)) {
-    return(rep(FALSE, length(long_idx)))
-  }
-  # right delimiter just ends at 'col2', but 'col1' takes some sleuthing
-  str_data$line1_width <- nchar(vapply(
-    strsplit(str_data$text, "\n", fixed = TRUE),
-    function(x) x[1L],
-    FUN.VALUE = character(1L),
-    USE.NAMES = FALSE
-  ))
-  str_data$col1_end <- str_data$col1 + str_data$line1_width
-  vapply(
-    long_idx,
-    function(line) {
-      # strictly inside a multi-line string body
-      if (any(str_data$line1 < line & str_data$line2 > line)) {
-        return(TRUE)
-      }
-      on_line1_idx <- str_data$line1 == line
-      if (any(on_line1_idx)) {
-        return(max(str_data$col1_end[on_line1_idx]) <= max_length)
-      }
-      # use parse data to capture possible trailing expressions on this line
-      on_line2_idx <- parse_data$line2 == line
-      any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length
-    },
-    logical(1L)
-  )
-}
+is_in_string_body <- function(parse_data, max_length, long_idx) {
+  str_idx <- parse_data$token == "STR_CONST"
+  if (!any(str_idx)) {
+    return(rep(FALSE, length(long_idx)))
+  }
+  str_data <- parse_data[str_idx, ]
+  if (all(str_data$line1 == str_data$line2)) {
+    return(rep(FALSE, length(long_idx)))
+  }
+  vapply(
+    long_idx,
+    function(line) {
+      # strictly inside a multi-line string body
+      any(str_data$line1 < line & str_data$line2 > line)
+    },
+    logical(1L)
+  )
+}
-is_in_string_body <- function(parse_data, max_length, long_idx) {
-  str_idx <- parse_data$token == "STR_CONST"
-  if (!any(str_idx)) {
-    return(rep(FALSE, length(long_idx)))
-  }
-  str_data <- parse_data[str_idx, ]
-  if (all(str_data$line1 == str_data$line2)) {
-    return(rep(FALSE, length(long_idx)))
-  }
-  # right delimiter just ends at 'col2', but 'col1' takes some sleuthing
-  str_data$line1_width <- nchar(vapply(
-    strsplit(str_data$text, "\n", fixed = TRUE),
-    function(x) x[1L],
-    FUN.VALUE = character(1L),
-    USE.NAMES = FALSE
-  ))
-  str_data$col1_end <- str_data$col1 + str_data$line1_width
-  vapply(
-    long_idx,
-    function(line) {
-      # strictly inside a multi-line string body
-      if (any(str_data$line1 < line & str_data$line2 > line)) {
-        return(TRUE)
-      }
-      on_line1_idx <- str_data$line1 == line
-      if (any(on_line1_idx)) {
-        return(max(str_data$col1_end[on_line1_idx]) <= max_length)
-      }
-      # use parse data to capture possible trailing expressions on this line
-      on_line2_idx <- parse_data$line2 == line
-      any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length
-    },
-    logical(1L)
-  )
-}
+is_in_string_body <- function(parse_data, max_length, long_idx) {
+  str_idx <- parse_data$token == "STR_CONST"
+  if (!any(str_idx)) {
+    return(rep(FALSE, length(long_idx)))
+  }
+  str_data <- parse_data[str_idx, ]
+  if (all(str_data$line1 == str_data$line2)) {
+    return(rep(FALSE, length(long_idx)))
+  }
+  vapply(
+    long_idx,
+    function(line) {
+      # strictly inside a multi-line string body
+      any(str_data$line1 < line & str_data$line2 > line)
+    },
+    logical(1L)
+  )
+}
diff --git a/R/utils.R b/R/utils.R
@@ -228,8 +228,12 @@ get_r_string <- function(s, xpath = NULL) {
       s <- xml_find_chr(s, sprintf("string(%s)", xpath))
     }
   }
-  # parse() skips "" elements --> offsets the length of the output,
-  #   but NA in --> NA out
+  r_string_from_parse_text(s)
+}
+
+# parse() skips "" elements --> offsets the length of the output,
+#   but NA in --> NA out
+r_string_from_parse_text <- function(s) {
   is.na(s) <- !nzchar(s)
   out <- as.character(parse(text = s, keep.source = FALSE))
   is.na(out) <- is.na(s)

diff --git a/man/line_length_linter.Rd b/man/line_length_linter.Rd
diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R
@@ -1,8 +1,8 @@
 test_that("line_length_linter skips allowed usages", {
   linter <- line_length_linter(80L)
 
-  expect_lint("blah", NULL, linter)
-  expect_lint(strrep("x", 80L), NULL, linter)
+  expect_no_lint("blah", linter)
+  expect_no_lint(strrep("x", 80L), linter)
 })
 
 test_that("line_length_linter blocks disallowed usages", {
@@ -37,7 +37,7 @@ test_that("line_length_linter blocks disallowed usages", {
 
   linter <- line_length_linter(20L)
   lint_msg <- rex::rex("Lines should not be more than 20 characters. This line is 22 characters.")
-  expect_lint(strrep("a", 20L), NULL, linter)
+  expect_no_lint(strrep("a", 20L), linter)
   expect_lint(
     strrep("a", 22L),
     list(
@@ -71,3 +71,86 @@ test_that("Multiple lints give custom messages", {
     line_length_linter(5L)
   )
 })
+
+test_that("string bodies can be ignored", {
+  linter <- line_length_linter(10L, ignore_string_bodies = TRUE)
+  lint_msg <- rex::rex("Lines should not be more than 10 characters. This line is 15 characters.")
+
+  expect_no_lint(
+    trim_some("
+      1234567890
+      str <- '
+      123456789012345
+      '
+    "),
+    linter
+  )
+
+  expect_no_lint(
+    trim_some("
+      1234567890
+      str45 <- '
+      123456789012345
+               '
+    "),
+    linter
+  )
+
+  expect_no_lint(
+    trim_some("
+      1234567890
+      str <- '90
+      123456789012345
+      123456789'
+    "),
+    linter
+  )
+
+  expect_lint(
+    trim_some("
+      1234567890
+      str456 <- '
+      123456789012345
+                '
+    "),
+    list(
+      list("11 characters", line_number = 2L),
+      list("11 characters", line_number = 4L)
+    ),
+    linter
+  )
+
+  expect_lint(
+    trim_some("
+      1234567890
+      str <- '9012345
+      1234567890
+      123456789'
+    "),
+    lint_msg,
+    linter
+  )
+
+  expect_lint(
+    trim_some("
+      1234567890
+      str <- '90
+      1234567890
+      12345678'; 2345
+    "),
+    lint_msg,
+    linter
+  )
+
+  expect_lint(
+    "'1'; '2'; '345'",
+    lint_msg,
+    linter
+  )
+
+  expect_lint(
+    "123456789012345",
+    lint_msg,
+    linter
+  )
+})