cornball-ai · TroyHernandez · May 19, 2026 · May 19, 2026
diff --git a/R/audits.R b/R/audits.R
@@ -34,13 +34,12 @@
 #' @export
 dedup <- function(vault = default_vault(), threshold = 0.7) {
     if (!requireNamespace("stringdist", quietly = TRUE)) {
-        stop("dedup() requires the 'stringdist' package.",
-             call. = FALSE)
+        stop("dedup() requires the 'stringdist' package.", call. = FALSE)
     }
     vault <- normalizePath(vault, mustWork = TRUE)
 
     reg <- vault_registry(vault)
-    pages <- reg[!reg$system_file, , drop = FALSE]
+    pages <- reg[!reg$system_file,, drop = FALSE]
     n <- nrow(pages)
     if (n < 2L) {
         proposals <- empty_dedup_result()
@@ -49,8 +48,7 @@ dedup <- function(vault = default_vault(), threshold = 0.7) {
     }
 
     titles <- ifelse(!is.na(pages$title) & nzchar(pages$title),
-                     tolower(trimws(pages$title)),
-                     tolower(pages$node_id))
+                     tolower(trimws(pages$title)), tolower(pages$node_id))
 
     page_a <- character(0L)
     page_b <- character(0L)
@@ -61,7 +59,7 @@ dedup <- function(vault = default_vault(), threshold = 0.7) {
     for (i in seq_len(n - 1L)) {
         for (j in seq.int(i + 1L, n)) {
             jw_dist <- stringdist::stringdist(titles[i], titles[j],
-                                              method = "jw")
+                method = "jw")
             sim <- 1 - jw_dist
             jacc <- tag_jaccard(pages$tags[[i]], pages$tags[[j]])
             score <- 0.6 * sim + 0.4 * jacc
@@ -70,9 +68,8 @@ dedup <- function(vault = default_vault(), threshold = 0.7) {
                 page_b <- c(page_b, pages$path[j])
                 title_sim <- c(title_sim, sim)
                 tag_overlap <- c(
-                    tag_overlap,
-                    length(intersect(pages$tags[[i]],
-                                     pages$tags[[j]])))
+                                 tag_overlap,
+                                 length(intersect(pages$tags[[i]], pages$tags[[j]])))
                 combined <- c(combined, score)
             }
         }
@@ -84,8 +81,8 @@ dedup <- function(vault = default_vault(), threshold = 0.7) {
                             combined_score = combined,
                             stringsAsFactors = FALSE)
     if (nrow(proposals) > 0L) {
-        proposals <- proposals[order(-proposals$combined_score), ,
-                               drop = FALSE]
+        proposals <- proposals[order(-proposals$combined_score),,
+            drop = FALSE]
         rownames(proposals) <- NULL
     }
     write_dedup_proposals(vault, proposals)
@@ -95,10 +92,8 @@ dedup <- function(vault = default_vault(), threshold = 0.7) {
 #' @noRd
 empty_dedup_result <- function() {
     data.frame(page_a = character(0L), page_b = character(0L),
-               title_similarity = numeric(0L),
-               tag_overlap = integer(0L),
-               combined_score = numeric(0L),
-               stringsAsFactors = FALSE)
+               title_similarity = numeric(0L), tag_overlap = integer(0L),
+               combined_score = numeric(0L), stringsAsFactors = FALSE)
 }
 
 #' Jaccard overlap of two tag sets
@@ -123,13 +118,9 @@ write_dedup_proposals <- function(vault, proposals) {
     out_path <- file.path(out_dir, "dedup.md")
 
     if (nrow(proposals) == 0L) {
-        writeLines(c("---",
-                     "title: Dedup proposals",
-                     sprintf("updated: %s", now_ts()),
-                     "---",
-                     "",
-                     "# Dedup proposals",
-                     "",
+        writeLines(c("---", "title: Dedup proposals",
+                     sprintf("updated: %s", now_ts()), "---", "",
+                     "# Dedup proposals", "",
                      "No candidate duplicates above threshold."),
                    out_path)
         return(invisible(out_path))
@@ -142,8 +133,7 @@ write_dedup_proposals <- function(vault, proposals) {
                "",
                "# Dedup proposals",
                "",
-               sprintf("%d candidate pair(s) above threshold.",
-                       nrow(proposals)),
+               sprintf("%d candidate pair(s) above threshold.", nrow(proposals)),
                "")
     for (i in seq_len(nrow(proposals))) {
         lines <- c(lines,
@@ -214,7 +204,7 @@ tags <- function(vault = default_vault(), taxonomy = NULL,
     }
 
     reg <- vault_registry(vault)
-    pages <- reg[!reg$system_file, , drop = FALSE]
+    pages <- reg[!reg$system_file,, drop = FALSE]
 
     all_tags <- unlist(pages$tags, use.names = FALSE)
     if (is.null(all_tags)) {
@@ -245,9 +235,8 @@ tags <- function(vault = default_vault(), taxonomy = NULL,
             unk_tags <- used$tag[unk_mask]
             suggestion <- vapply(unk_tags,
                                  function(t) {
-                                     near_miss(t, allowed,
-                                               near_miss_threshold)
-                                 },
+                near_miss(t, allowed, near_miss_threshold)
+            },
                                  character(1L))
             data.frame(tag = unk_tags,
                        count = used$count[unk_mask],
@@ -262,8 +251,7 @@ tags <- function(vault = default_vault(), taxonomy = NULL,
         sort(setdiff(allowed, used$tag))
     }
 
-    write_tags_proposals(vault, used, unknown, unused_taxonomy,
-                         taxonomy)
+    write_tags_proposals(vault, used, unknown, unused_taxonomy, taxonomy)
     invisible(list(used = used, unknown = unknown,
                    unused_taxonomy = unused_taxonomy))
 }
@@ -280,8 +268,9 @@ read_taxonomy <- function(taxonomy_path) {
     # across R's default regex engine. `\\s` is not.
     pat <- "^[[:space:]]*[-*][[:space:]]+`?([^`[:space:]]+)`?[[:space:]]*$"
     m <- regmatches(lines, regexec(pat, lines))
-    tags <- vapply(m, function(x) if (length(x) >= 2L) x[[2L]]
-                   else NA_character_, character(1L))
+    tags <- vapply(m,
+                   function(x) if (length(x) >= 2L) x[[2L]] else NA_character_,
+                   character(1L))
     tags <- tags[!is.na(tags) & nzchar(tags)]
     unique(tags)
 }
@@ -308,19 +297,14 @@ near_miss <- function(tag, allowed, threshold) {
 
 #' Write the tags proposals report
 #' @noRd
-write_tags_proposals <- function(vault, used, unknown,
-                                 unused_taxonomy, taxonomy_path) {
+write_tags_proposals <- function(vault, used, unknown, unused_taxonomy,
+                                 taxonomy_path) {
     out_dir <- file.path(vault, "_proposals")
     dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
     out_path <- file.path(out_dir, "tags.md")
 
-    lines <- c("---",
-               "title: Tag audit",
-               sprintf("updated: %s", now_ts()),
-               "---",
-               "",
-               "# Tag audit",
-               "")
+    lines <- c("---", "title: Tag audit", sprintf("updated: %s", now_ts()),
+               "---", "", "# Tag audit", "")
 
     if (is.null(taxonomy_path) || !file.exists(taxonomy_path)) {
         lines <- c(lines,
@@ -347,8 +331,7 @@ write_tags_proposals <- function(vault, used, unknown,
             suggestion <- if (is.na(unknown$suggestion[i])) {
                 ""
             } else {
-                sprintf(" -- did you mean `%s`?",
-                        unknown$suggestion[i])
+                sprintf(" -- did you mean `%s`?", unknown$suggestion[i])
             }
             lines <- c(lines, sprintf("- `%s` (%d)%s",
                                       unknown$tag[i], unknown$count[i],
@@ -368,3 +351,4 @@ write_tags_proposals <- function(vault, used, unknown,
     writeLines(lines, out_path)
     invisible(out_path)
 }
+
diff --git a/R/autoresearch.R b/R/autoresearch.R
@@ -77,11 +77,9 @@ autoresearch <- function(topic, vault = default_vault(),
     if (!is.character(topic) || length(topic) != 1L || !nzchar(topic)) {
         stop("`topic` must be a single non-empty string.", call. = FALSE)
     }
-    if (!is.null(slug) &&
-        (!is.character(slug) || length(slug) != 1L)) {
+    if (!is.null(slug) && (!is.character(slug) || length(slug) != 1L)) {
         stop("`slug` must be NULL or a single character value (got ",
-             typeof(slug), " of length ", length(slug), ").",
-             call. = FALSE)
+             typeof(slug), " of length ", length(slug), ").", call. = FALSE)
     }
     vault <- normalizePath(vault, mustWork = TRUE)
     if (!file.exists(file.path(vault, "schema.md"))) {
@@ -99,7 +97,7 @@ autoresearch <- function(topic, vault = default_vault(),
     fetch_backend <- fetch_backend %||% .autoresearch_default_fetch_backend()
     if (is.null(model_backend)) {
         model_backend <- .autoresearch_default_model_backend(provider = provider,
-                                                             model = model)
+            model = model)
     }
     model_backend <- .verbose_model_backend(model_backend, verbose)
     t_start <- Sys.time()
@@ -134,8 +132,8 @@ autoresearch <- function(topic, vault = default_vault(),
         .ar_msg(verbose, "round ", round, ": running ", nrow(round_queries),
                 " search ", if (nrow(round_queries) == 1L) "query" else "queries")
         round_search <- autoresearch_run_searches(round_queries,
-                                                  search_backend, program,
-                                                  verbose = verbose)
+            search_backend, program,
+            verbose = verbose)
         round_search$round <- rep.int(as.integer(round), nrow(round_search))
         search_results <- .autoresearch_bind_rows(search_results, round_search)
         .ar_msg(verbose, "round ", round, ": ", nrow(round_search),
@@ -144,32 +142,31 @@ autoresearch <- function(topic, vault = default_vault(),
         .ar_msg(verbose, "round ", round, ": selecting sources from ",
                 nrow(round_search), " results")
         round_selected <- autoresearch_select_sources(topic, round_search,
-                                                      program, model_backend)
+            program, model_backend)
         if (nrow(round_selected) > 0L && nrow(selected) > 0L) {
             round_selected <- round_selected[
                 !round_selected$url %in% selected$url,, drop = FALSE]
         }
-        round_selected$round <- rep.int(as.integer(round),
-                                        nrow(round_selected))
+        round_selected$round <- rep.int(as.integer(round), nrow(round_selected))
         selected <- .autoresearch_bind_rows(selected, round_selected)
         .ar_msg(verbose, "round ", round, ": selected ", nrow(round_selected),
                 " new ", if (nrow(round_selected) == 1L) "source" else "sources")
 
         .ar_msg(verbose, "round ", round, ": fetching ",
                 nrow(round_selected), " ",
-                if (nrow(round_selected) == 1L) "source" else "sources")
+            if (nrow(round_selected) == 1L) "source" else "sources")
         round_sources <- autoresearch_fetch_and_ingest(round_selected,
-                                                       fetch_backend, vault,
-                                                       topic, force = force,
-                                                       verbose = verbose)
+            fetch_backend, vault,
+            topic, force = force,
+            verbose = verbose)
         round_sources$round <- rep.int(as.integer(round), nrow(round_sources))
         sources <- .autoresearch_bind_rows(sources, round_sources)
 
         .ar_msg(verbose, "round ", round, ": extracting evidence from ",
                 nrow(round_sources), " ",
-                if (nrow(round_sources) == 1L) "source" else "sources")
+            if (nrow(round_sources) == 1L) "source" else "sources")
         round_claims <- autoresearch_extract_claims(topic, round_sources,
-                                                    program, model_backend)
+            program, model_backend)
         round_claims$round <- rep.int(as.integer(round), nrow(round_claims))
         claims <- .autoresearch_bind_rows(claims, round_claims)
         .ar_msg(verbose, "round ", round, ": extracted ", nrow(round_claims),
@@ -189,20 +186,20 @@ autoresearch <- function(topic, vault = default_vault(),
         }
         .ar_msg(verbose, "round ", round, ": analyzing gaps")
         gap_plan <- autoresearch_analyze_gaps(topic, claims, sources,
-                                              all_queries, program,
-                                              model_backend, round)
+            all_queries, program,
+            model_backend, round)
         gaps <- .autoresearch_bind_rows(gaps, gap_plan$gaps)
         next_queries <- gap_plan$queries
         .ar_msg(verbose, "round ", round, ": ", nrow(gap_plan$gaps),
                 " gaps, ", nrow(gap_plan$queries),
                 " follow-up ",
-                if (nrow(gap_plan$queries) == 1L) "query" else "queries")
+            if (nrow(gap_plan$queries) == 1L) "query" else "queries")
     }
 
     existing_pages <- autoresearch_existing_pages(vault)
     .ar_msg(verbose, "vault has ", nrow(existing_pages),
             " existing wiki ",
-            if (nrow(existing_pages) == 1L) "page" else "pages")
+        if (nrow(existing_pages) == 1L) "page" else "pages")
 
     .ar_msg(verbose, "planning pages from ", nrow(claims), " claims and ",
             nrow(sources), " sources")
@@ -212,22 +209,22 @@ autoresearch <- function(topic, vault = default_vault(),
             " ", if (nrow(pages$pages) == 1L) "page" else "pages")
     pages$pages <- .ar_apply_user_slug(pages$pages, slug, verbose)
     pages$pages <- .ar_dedupe_planned_slugs(pages$pages, topic, vault,
-                                            verbose = verbose)
+        verbose = verbose)
 
     if (isTRUE(update) && nrow(pages$pages) > 0L) {
         update_targets <- .ar_update_targets(pages$pages, vault)
         if (length(update_targets) > 0L) {
             .ar_msg(verbose, "revising ", length(update_targets),
                     " existing ",
-                    if (length(update_targets) == 1L) "page" else "pages",
+                if (length(update_targets) == 1L) "page" else "pages",
                     ": ", paste(update_targets, collapse = ", "))
         } else {
             .ar_msg(verbose, "no existing pages to revise; writing fresh")
         }
         pages$pages <- autoresearch_revise_pages(pages$pages, topic, claims,
-                                                 sources, vault, program,
-                                                 model_backend,
-                                                 verbose = verbose)
+            sources, vault, program,
+            model_backend,
+            verbose = verbose)
     }
 
     .ar_msg(verbose, "writing ", nrow(pages$pages),
@@ -383,8 +380,7 @@ print.pensar_research <- function(x, ...) {
         stop("TAVILY_API_KEY not set", call. = FALSE)
     }
     if (!requireNamespace("jsonlite", quietly = TRUE)) {
-        stop("Tavily search requires the 'jsonlite' package.",
-             call. = FALSE)
+        stop("Tavily search requires the 'jsonlite' package.", call. = FALSE)
     }
     body <- jsonlite::toJSON(
                              list(api_key = api_key, query = query, max_results = as.integer(n)),
@@ -423,3 +419,4 @@ print.pensar_research <- function(x, ...) {
                stringsAsFactors = FALSE
     )
 }
+
diff --git a/R/autoresearch_backends.R b/R/autoresearch_backends.R
@@ -137,17 +137,17 @@
             "Return JSON: {\"claims\":[{\"source_path\":\"...\",\"source_slug\":\"...\",\"claim\":\"...\",\"confidence\":\"low|medium|high\",\"quote\":\"...\"}]}",
             payload, sep = "\n\n"),
                    analyze_gaps = paste(
-            "Review the current source-grounded claims and identify important research gaps.",
-            "Return empty arrays if the topic is sufficiently covered or if additional searches would duplicate prior queries.",
-            "Return JSON: {\"gaps\":[{\"gap\":\"...\",\"reason\":\"...\"}],\"queries\":[{\"query\":\"...\",\"angle\":\"...\",\"gap\":\"...\"}]}",
-            payload, sep = "\n\n"),
+                                        "Review the current source-grounded claims and identify important research gaps.",
+                                        "Return empty arrays if the topic is sufficiently covered or if additional searches would duplicate prior queries.",
+                                        "Return JSON: {\"gaps\":[{\"gap\":\"...\",\"reason\":\"...\"}],\"queries\":[{\"query\":\"...\",\"angle\":\"...\",\"gap\":\"...\"}]}",
+                                        payload, sep = "\n\n"),
                    plan_pages = paste(
                                       "Draft the wiki pages for the research run.",
                                       "Every non-obvious claim must cite raw source wikilinks.",
                                       "Use only source-grounded claims, quotes, source metadata, and existing_pages. Raw source bodies are intentionally unavailable at this stage.",
                                       "Prefer updating an existing page when existing_pages contains a matching node_id, title, alias, or page_uid.",
                                       sprintf("Slugs MUST be derived from the actual research topic, not the literal word 'topic'. Kebab-case, ASCII letters/digits/hyphens only. A reasonable default for this run's synthesis page is '%s'.",
-                                              paste0("Research-", slugify(input$topic))),
+                paste0("Research-", slugify(input$topic))),
                                       "Return JSON: {\"headline\":\"<one-sentence headline>\",\"pages\":[{\"slug\":\"<kebab-slug-derived-from-topic>\",\"title\":\"<human title for the page>\",\"type\":\"analysis\",\"source\":\"<autoresearch session reference>\",\"body\":\"<markdown body>\"}]}",
                                       payload, sep = "\n\n"),
                    revise_page = paste(
@@ -185,11 +185,9 @@
 #' @noRd
 .heuristic_plan_queries <- function(input, program) {
     topic <- input$topic
-    base <- c(topic,
-              paste(topic, "overview"),
+    base <- c(topic, paste(topic, "overview"),
               paste(topic, "primary sources"),
-              paste(topic, "official documentation"),
-              paste(topic, "review"))
+              paste(topic, "official documentation"), paste(topic, "review"))
     base <- unique(base)
     max_n <- min(length(base), program$max_queries_per_round)
     list(queries = lapply(seq_len(max_n), function(i) {
@@ -248,12 +246,9 @@
     if (!nzchar(new_draft)) {
         return(list(body = existing))
     }
-    revised <- paste(existing,
-                     "",
-                     sprintf("## Update %s", as.character(Sys.Date())),
-                     "",
-                     new_draft,
-                     sep = "\n")
+    revised <- paste(existing, "",
+                     sprintf("## Update %s", as.character(Sys.Date())), "",
+                     new_draft, sep = "\n")
     list(body = revised)
 }
 
@@ -331,3 +326,4 @@
     }
     substr(pieces[[1L]], 1L, 240L)
 }
+