diff --git a/DESCRIPTION b/DESCRIPTION index 956ff3c..e9923fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: pensar Type: Package Title: LLM Wiki Engine -Version: 0.6.3.3 -Date: 2026-06-03 +Version: 0.6.3.4 +Date: 2026-06-06 Authors@R: c( person("Troy", "Hernandez", role = c("aut", "cre"), email = "troy@cornball.ai", diff --git a/NEWS.md b/NEWS.md index 2932898..aafdc52 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# pensar 0.6.3.4 (dev) + +## Changes + +* Wikilink parsing now ignores Markdown code. Fenced blocks (` ``` ` or + `~~~`) and inline spans (`` `...` ``) are stripped before extracting + `[[...]]`, so R's `[[ ]]` list indexing in a code sample no longer + registers as a broken wikilink. Affects `lint()`, `outlinks()`, + `backlinks()`, and `vault_graph()`. + # pensar 0.6.3.3 (dev) ## Bug fixes diff --git a/R/parse.R b/R/parse.R index ea70f0d..6322074 100644 --- a/R/parse.R +++ b/R/parse.R @@ -20,13 +20,33 @@ parse_frontmatter <- function(filepath) { tryCatch(yaml::yaml.load(yaml_text), error = function(e) list()) } +#' Blank out code regions so they are not scanned for wikilinks +#' +#' Markdown code is literal text, not wiki markup. R's \code{[[ ]]} list +#' indexing inside a code block would otherwise be misread as a wikilink. +#' This drops fenced code blocks (\code{```} or \code{~~~}) entirely and +#' strips inline code spans (\code{`...`}) from the remaining lines. +#' +#' An unterminated fence swallows everything to end of file, which is the +#' safe choice: better to miss a link in malformed markup than to invent one. +#' @noRd +strip_code <- function(lines) { + fence <- grepl("^\\s*(`{3,}|~{3,})", lines) + # cumsum is odd on the opening fence and its contents; the closing + # fence lands on an even count, so include it via `| fence`. + inside <- (cumsum(fence) %% 2L == 1L) | fence + gsub("`+[^`]*`+", " ", lines[!inside]) +} + #' Parse all wikilinks from a markdown file #' +#' Code regions are ignored via \code{strip_code()}. +#' #' @param filepath Path to a markdown file. #' @return Character vector of link targets. #' @noRd parse_wikilinks <- function(filepath) { - lines <- readLines(filepath, warn = FALSE) + lines <- strip_code(readLines(filepath, warn = FALSE)) all_links <- regmatches(lines, gregexpr("\\[\\[([^]]+)\\]\\]", lines)) all_links <- unlist(all_links) if (length(all_links) == 0L) { diff --git a/inst/tinytest/test_parse.R b/inst/tinytest/test_parse.R index 1ba3253..f32c080 100644 --- a/inst/tinytest/test_parse.R +++ b/inst/tinytest/test_parse.R @@ -52,6 +52,26 @@ parsed <- pensar:::parse_wikilink("[[Delta|display text]]") expect_equal(parsed$target, "Delta") expect_equal(parsed$label, "display text") +# Code regions are not scanned: R's [[ ]] indexing is not a wikilink. +tmp5 <- tempfile(fileext = ".md") +writeLines(c( + "Real link [[Alpha]] in prose.", + "Inline code `merged[[name]] <- project[[name]]` is not a link.", + "```r", + "x <- registry[[uuid]]", + "y <- [[Beta]]", + "```", + "After the fence [[Gamma]] counts again." +), tmp5) + +wl2 <- pensar:::parse_wikilinks(tmp5) +expect_true("Alpha" %in% wl2) +expect_true("Gamma" %in% wl2) +expect_false("name" %in% wl2) +expect_false("uuid" %in% wl2) +expect_false("Beta" %in% wl2) +unlink(tmp5) + # --- name_from_path --- expect_equal(pensar:::name_from_path("/vault/Neural Networks.md"), "Neural Networks")