diff --git a/R/audits.R b/R/audits.R index 53d1075..3c472ea 100644 --- a/R/audits.R +++ b/R/audits.R @@ -34,13 +34,12 @@ #' @export dedup <- function(vault = default_vault(), threshold = 0.7) { if (!requireNamespace("stringdist", quietly = TRUE)) { - stop("dedup() requires the 'stringdist' package.", - call. = FALSE) + stop("dedup() requires the 'stringdist' package.", call. = FALSE) } vault <- normalizePath(vault, mustWork = TRUE) reg <- vault_registry(vault) - pages <- reg[!reg$system_file, , drop = FALSE] + pages <- reg[!reg$system_file,, drop = FALSE] n <- nrow(pages) if (n < 2L) { proposals <- empty_dedup_result() @@ -49,8 +48,7 @@ dedup <- function(vault = default_vault(), threshold = 0.7) { } titles <- ifelse(!is.na(pages$title) & nzchar(pages$title), - tolower(trimws(pages$title)), - tolower(pages$node_id)) + tolower(trimws(pages$title)), tolower(pages$node_id)) page_a <- character(0L) page_b <- character(0L) @@ -61,7 +59,7 @@ dedup <- function(vault = default_vault(), threshold = 0.7) { for (i in seq_len(n - 1L)) { for (j in seq.int(i + 1L, n)) { jw_dist <- stringdist::stringdist(titles[i], titles[j], - method = "jw") + method = "jw") sim <- 1 - jw_dist jacc <- tag_jaccard(pages$tags[[i]], pages$tags[[j]]) score <- 0.6 * sim + 0.4 * jacc @@ -70,9 +68,8 @@ dedup <- function(vault = default_vault(), threshold = 0.7) { page_b <- c(page_b, pages$path[j]) title_sim <- c(title_sim, sim) tag_overlap <- c( - tag_overlap, - length(intersect(pages$tags[[i]], - pages$tags[[j]]))) + tag_overlap, + length(intersect(pages$tags[[i]], pages$tags[[j]]))) combined <- c(combined, score) } } @@ -84,8 +81,8 @@ dedup <- function(vault = default_vault(), threshold = 0.7) { combined_score = combined, stringsAsFactors = FALSE) if (nrow(proposals) > 0L) { - proposals <- proposals[order(-proposals$combined_score), , - drop = FALSE] + proposals <- proposals[order(-proposals$combined_score),, + drop = FALSE] rownames(proposals) <- NULL } write_dedup_proposals(vault, proposals) @@ -95,10 +92,8 @@ dedup <- function(vault = default_vault(), threshold = 0.7) { #' @noRd empty_dedup_result <- function() { data.frame(page_a = character(0L), page_b = character(0L), - title_similarity = numeric(0L), - tag_overlap = integer(0L), - combined_score = numeric(0L), - stringsAsFactors = FALSE) + title_similarity = numeric(0L), tag_overlap = integer(0L), + combined_score = numeric(0L), stringsAsFactors = FALSE) } #' Jaccard overlap of two tag sets @@ -123,13 +118,9 @@ write_dedup_proposals <- function(vault, proposals) { out_path <- file.path(out_dir, "dedup.md") if (nrow(proposals) == 0L) { - writeLines(c("---", - "title: Dedup proposals", - sprintf("updated: %s", now_ts()), - "---", - "", - "# Dedup proposals", - "", + writeLines(c("---", "title: Dedup proposals", + sprintf("updated: %s", now_ts()), "---", "", + "# Dedup proposals", "", "No candidate duplicates above threshold."), out_path) return(invisible(out_path)) @@ -142,8 +133,7 @@ write_dedup_proposals <- function(vault, proposals) { "", "# Dedup proposals", "", - sprintf("%d candidate pair(s) above threshold.", - nrow(proposals)), + sprintf("%d candidate pair(s) above threshold.", nrow(proposals)), "") for (i in seq_len(nrow(proposals))) { lines <- c(lines, @@ -214,7 +204,7 @@ tags <- function(vault = default_vault(), taxonomy = NULL, } reg <- vault_registry(vault) - pages <- reg[!reg$system_file, , drop = FALSE] + pages <- reg[!reg$system_file,, drop = FALSE] all_tags <- unlist(pages$tags, use.names = FALSE) if (is.null(all_tags)) { @@ -245,9 +235,8 @@ tags <- function(vault = default_vault(), taxonomy = NULL, unk_tags <- used$tag[unk_mask] suggestion <- vapply(unk_tags, function(t) { - near_miss(t, allowed, - near_miss_threshold) - }, + near_miss(t, allowed, near_miss_threshold) + }, character(1L)) data.frame(tag = unk_tags, count = used$count[unk_mask], @@ -262,8 +251,7 @@ tags <- function(vault = default_vault(), taxonomy = NULL, sort(setdiff(allowed, used$tag)) } - write_tags_proposals(vault, used, unknown, unused_taxonomy, - taxonomy) + write_tags_proposals(vault, used, unknown, unused_taxonomy, taxonomy) invisible(list(used = used, unknown = unknown, unused_taxonomy = unused_taxonomy)) } @@ -280,8 +268,9 @@ read_taxonomy <- function(taxonomy_path) { # across R's default regex engine. `\\s` is not. pat <- "^[[:space:]]*[-*][[:space:]]+`?([^`[:space:]]+)`?[[:space:]]*$" m <- regmatches(lines, regexec(pat, lines)) - tags <- vapply(m, function(x) if (length(x) >= 2L) x[[2L]] - else NA_character_, character(1L)) + tags <- vapply(m, + function(x) if (length(x) >= 2L) x[[2L]] else NA_character_, + character(1L)) tags <- tags[!is.na(tags) & nzchar(tags)] unique(tags) } @@ -308,19 +297,14 @@ near_miss <- function(tag, allowed, threshold) { #' Write the tags proposals report #' @noRd -write_tags_proposals <- function(vault, used, unknown, - unused_taxonomy, taxonomy_path) { +write_tags_proposals <- function(vault, used, unknown, unused_taxonomy, + taxonomy_path) { out_dir <- file.path(vault, "_proposals") dir.create(out_dir, recursive = TRUE, showWarnings = FALSE) out_path <- file.path(out_dir, "tags.md") - lines <- c("---", - "title: Tag audit", - sprintf("updated: %s", now_ts()), - "---", - "", - "# Tag audit", - "") + lines <- c("---", "title: Tag audit", sprintf("updated: %s", now_ts()), + "---", "", "# Tag audit", "") if (is.null(taxonomy_path) || !file.exists(taxonomy_path)) { lines <- c(lines, @@ -347,8 +331,7 @@ write_tags_proposals <- function(vault, used, unknown, suggestion <- if (is.na(unknown$suggestion[i])) { "" } else { - sprintf(" -- did you mean `%s`?", - unknown$suggestion[i]) + sprintf(" -- did you mean `%s`?", unknown$suggestion[i]) } lines <- c(lines, sprintf("- `%s` (%d)%s", unknown$tag[i], unknown$count[i], @@ -368,3 +351,4 @@ write_tags_proposals <- function(vault, used, unknown, writeLines(lines, out_path) invisible(out_path) } + diff --git a/R/autoresearch.R b/R/autoresearch.R index 4ca9b76..6329739 100644 --- a/R/autoresearch.R +++ b/R/autoresearch.R @@ -77,11 +77,9 @@ autoresearch <- function(topic, vault = default_vault(), if (!is.character(topic) || length(topic) != 1L || !nzchar(topic)) { stop("`topic` must be a single non-empty string.", call. = FALSE) } - if (!is.null(slug) && - (!is.character(slug) || length(slug) != 1L)) { + if (!is.null(slug) && (!is.character(slug) || length(slug) != 1L)) { stop("`slug` must be NULL or a single character value (got ", - typeof(slug), " of length ", length(slug), ").", - call. = FALSE) + typeof(slug), " of length ", length(slug), ").", call. = FALSE) } vault <- normalizePath(vault, mustWork = TRUE) if (!file.exists(file.path(vault, "schema.md"))) { @@ -99,7 +97,7 @@ autoresearch <- function(topic, vault = default_vault(), fetch_backend <- fetch_backend %||% .autoresearch_default_fetch_backend() if (is.null(model_backend)) { model_backend <- .autoresearch_default_model_backend(provider = provider, - model = model) + model = model) } model_backend <- .verbose_model_backend(model_backend, verbose) t_start <- Sys.time() @@ -134,8 +132,8 @@ autoresearch <- function(topic, vault = default_vault(), .ar_msg(verbose, "round ", round, ": running ", nrow(round_queries), " search ", if (nrow(round_queries) == 1L) "query" else "queries") round_search <- autoresearch_run_searches(round_queries, - search_backend, program, - verbose = verbose) + search_backend, program, + verbose = verbose) round_search$round <- rep.int(as.integer(round), nrow(round_search)) search_results <- .autoresearch_bind_rows(search_results, round_search) .ar_msg(verbose, "round ", round, ": ", nrow(round_search), @@ -144,32 +142,31 @@ autoresearch <- function(topic, vault = default_vault(), .ar_msg(verbose, "round ", round, ": selecting sources from ", nrow(round_search), " results") round_selected <- autoresearch_select_sources(topic, round_search, - program, model_backend) + program, model_backend) if (nrow(round_selected) > 0L && nrow(selected) > 0L) { round_selected <- round_selected[ !round_selected$url %in% selected$url,, drop = FALSE] } - round_selected$round <- rep.int(as.integer(round), - nrow(round_selected)) + round_selected$round <- rep.int(as.integer(round), nrow(round_selected)) selected <- .autoresearch_bind_rows(selected, round_selected) .ar_msg(verbose, "round ", round, ": selected ", nrow(round_selected), " new ", if (nrow(round_selected) == 1L) "source" else "sources") .ar_msg(verbose, "round ", round, ": fetching ", nrow(round_selected), " ", - if (nrow(round_selected) == 1L) "source" else "sources") + if (nrow(round_selected) == 1L) "source" else "sources") round_sources <- autoresearch_fetch_and_ingest(round_selected, - fetch_backend, vault, - topic, force = force, - verbose = verbose) + fetch_backend, vault, + topic, force = force, + verbose = verbose) round_sources$round <- rep.int(as.integer(round), nrow(round_sources)) sources <- .autoresearch_bind_rows(sources, round_sources) .ar_msg(verbose, "round ", round, ": extracting evidence from ", nrow(round_sources), " ", - if (nrow(round_sources) == 1L) "source" else "sources") + if (nrow(round_sources) == 1L) "source" else "sources") round_claims <- autoresearch_extract_claims(topic, round_sources, - program, model_backend) + program, model_backend) round_claims$round <- rep.int(as.integer(round), nrow(round_claims)) claims <- .autoresearch_bind_rows(claims, round_claims) .ar_msg(verbose, "round ", round, ": extracted ", nrow(round_claims), @@ -189,20 +186,20 @@ autoresearch <- function(topic, vault = default_vault(), } .ar_msg(verbose, "round ", round, ": analyzing gaps") gap_plan <- autoresearch_analyze_gaps(topic, claims, sources, - all_queries, program, - model_backend, round) + all_queries, program, + model_backend, round) gaps <- .autoresearch_bind_rows(gaps, gap_plan$gaps) next_queries <- gap_plan$queries .ar_msg(verbose, "round ", round, ": ", nrow(gap_plan$gaps), " gaps, ", nrow(gap_plan$queries), " follow-up ", - if (nrow(gap_plan$queries) == 1L) "query" else "queries") + if (nrow(gap_plan$queries) == 1L) "query" else "queries") } existing_pages <- autoresearch_existing_pages(vault) .ar_msg(verbose, "vault has ", nrow(existing_pages), " existing wiki ", - if (nrow(existing_pages) == 1L) "page" else "pages") + if (nrow(existing_pages) == 1L) "page" else "pages") .ar_msg(verbose, "planning pages from ", nrow(claims), " claims and ", nrow(sources), " sources") @@ -212,22 +209,22 @@ autoresearch <- function(topic, vault = default_vault(), " ", if (nrow(pages$pages) == 1L) "page" else "pages") pages$pages <- .ar_apply_user_slug(pages$pages, slug, verbose) pages$pages <- .ar_dedupe_planned_slugs(pages$pages, topic, vault, - verbose = verbose) + verbose = verbose) if (isTRUE(update) && nrow(pages$pages) > 0L) { update_targets <- .ar_update_targets(pages$pages, vault) if (length(update_targets) > 0L) { .ar_msg(verbose, "revising ", length(update_targets), " existing ", - if (length(update_targets) == 1L) "page" else "pages", + if (length(update_targets) == 1L) "page" else "pages", ": ", paste(update_targets, collapse = ", ")) } else { .ar_msg(verbose, "no existing pages to revise; writing fresh") } pages$pages <- autoresearch_revise_pages(pages$pages, topic, claims, - sources, vault, program, - model_backend, - verbose = verbose) + sources, vault, program, + model_backend, + verbose = verbose) } .ar_msg(verbose, "writing ", nrow(pages$pages), @@ -383,8 +380,7 @@ print.pensar_research <- function(x, ...) { stop("TAVILY_API_KEY not set", call. = FALSE) } if (!requireNamespace("jsonlite", quietly = TRUE)) { - stop("Tavily search requires the 'jsonlite' package.", - call. = FALSE) + stop("Tavily search requires the 'jsonlite' package.", call. = FALSE) } body <- jsonlite::toJSON( list(api_key = api_key, query = query, max_results = as.integer(n)), @@ -423,3 +419,4 @@ print.pensar_research <- function(x, ...) { stringsAsFactors = FALSE ) } + diff --git a/R/autoresearch_backends.R b/R/autoresearch_backends.R index 62db39e..34f7ded 100644 --- a/R/autoresearch_backends.R +++ b/R/autoresearch_backends.R @@ -137,17 +137,17 @@ "Return JSON: {\"claims\":[{\"source_path\":\"...\",\"source_slug\":\"...\",\"claim\":\"...\",\"confidence\":\"low|medium|high\",\"quote\":\"...\"}]}", payload, sep = "\n\n"), analyze_gaps = paste( - "Review the current source-grounded claims and identify important research gaps.", - "Return empty arrays if the topic is sufficiently covered or if additional searches would duplicate prior queries.", - "Return JSON: {\"gaps\":[{\"gap\":\"...\",\"reason\":\"...\"}],\"queries\":[{\"query\":\"...\",\"angle\":\"...\",\"gap\":\"...\"}]}", - payload, sep = "\n\n"), + "Review the current source-grounded claims and identify important research gaps.", + "Return empty arrays if the topic is sufficiently covered or if additional searches would duplicate prior queries.", + "Return JSON: {\"gaps\":[{\"gap\":\"...\",\"reason\":\"...\"}],\"queries\":[{\"query\":\"...\",\"angle\":\"...\",\"gap\":\"...\"}]}", + payload, sep = "\n\n"), plan_pages = paste( "Draft the wiki pages for the research run.", "Every non-obvious claim must cite raw source wikilinks.", "Use only source-grounded claims, quotes, source metadata, and existing_pages. Raw source bodies are intentionally unavailable at this stage.", "Prefer updating an existing page when existing_pages contains a matching node_id, title, alias, or page_uid.", sprintf("Slugs MUST be derived from the actual research topic, not the literal word 'topic'. Kebab-case, ASCII letters/digits/hyphens only. A reasonable default for this run's synthesis page is '%s'.", - paste0("Research-", slugify(input$topic))), + paste0("Research-", slugify(input$topic))), "Return JSON: {\"headline\":\"\",\"pages\":[{\"slug\":\"\",\"title\":\"\",\"type\":\"analysis\",\"source\":\"\",\"body\":\"\"}]}", payload, sep = "\n\n"), revise_page = paste( @@ -185,11 +185,9 @@ #' @noRd .heuristic_plan_queries <- function(input, program) { topic <- input$topic - base <- c(topic, - paste(topic, "overview"), + base <- c(topic, paste(topic, "overview"), paste(topic, "primary sources"), - paste(topic, "official documentation"), - paste(topic, "review")) + paste(topic, "official documentation"), paste(topic, "review")) base <- unique(base) max_n <- min(length(base), program$max_queries_per_round) list(queries = lapply(seq_len(max_n), function(i) { @@ -248,12 +246,9 @@ if (!nzchar(new_draft)) { return(list(body = existing)) } - revised <- paste(existing, - "", - sprintf("## Update %s", as.character(Sys.Date())), - "", - new_draft, - sep = "\n") + revised <- paste(existing, "", + sprintf("## Update %s", as.character(Sys.Date())), "", + new_draft, sep = "\n") list(body = revised) } @@ -331,3 +326,4 @@ } substr(pieces[[1L]], 1L, 240L) } + diff --git a/R/autoresearch_steps.R b/R/autoresearch_steps.R index 4d84201..6f9c442 100644 --- a/R/autoresearch_steps.R +++ b/R/autoresearch_steps.R @@ -61,7 +61,8 @@ autoresearch_select_sources <- function(topic, search_results, program, #' @noRd autoresearch_fetch_and_ingest <- function(selected, fetch_backend, vault, - topic, force = FALSE, verbose = FALSE) { + topic, force = FALSE, + verbose = FALSE) { if (nrow(selected) == 0L) { return(.empty_sources()) } @@ -81,12 +82,12 @@ autoresearch_fetch_and_ingest <- function(selected, fetch_backend, vault, } else { .ar_msg(verbose, " fetch ", i, "/", nrow(selected), ": ", url) fetched <- tryCatch( - .validate_autoresearch_fetch_result(fetch_backend(url)), - error = function(e) { - .ar_msg(verbose, " fetch ", i, "/", nrow(selected), - " skipped: ", conditionMessage(e)) - NULL - }) + .validate_autoresearch_fetch_result(fetch_backend(url)), + error = function(e) { + .ar_msg(verbose, " fetch ", i, "/", nrow(selected), + " skipped: ", conditionMessage(e)) + NULL + }) if (is.null(fetched)) { next } @@ -107,8 +108,7 @@ autoresearch_fetch_and_ingest <- function(selected, fetch_backend, vault, slug = tools::file_path_sans_ext(basename(rel)), content_type = content_type, injection_flag = length(injection_reasons) > 0L, - injection_reasons = paste(injection_reasons, - collapse = "; "), + injection_reasons = paste(injection_reasons, collapse = "; "), body = I(list(body)), stringsAsFactors = FALSE ) @@ -147,8 +147,8 @@ autoresearch_analyze_gaps <- function(topic, claims, sources, queries, res <- model_backend("analyze_gaps", list(topic = topic, claims = claims, - sources = .autoresearch_source_records( - sources, include_body = FALSE), + sources = .autoresearch_source_records(sources, + include_body = FALSE), previous_queries = queries, completed_round = as.integer(round)), program) @@ -163,8 +163,7 @@ autoresearch_analyze_gaps <- function(topic, claims, sources, queries, gaps <- .empty_gaps() } - planned <- .list_to_df(res$queries, - columns = c("query", "angle", "gap")) + planned <- .list_to_df(res$queries, columns = c("query", "angle", "gap")) if (nrow(planned) == 0L && !is.null(res$gap_queries)) { planned <- .list_to_df(res$gap_queries, columns = c("query", "angle", "gap")) @@ -214,8 +213,8 @@ autoresearch_plan_pages <- function(topic, claims, sources, existing_pages, res <- model_backend("plan_pages", list(topic = topic, claims = claims, - sources = .autoresearch_source_records( - sources, include_body = FALSE), + sources = .autoresearch_source_records(sources, + include_body = FALSE), existing_pages = existing_pages), program) pages <- .list_to_df(res$pages, @@ -272,8 +271,8 @@ autoresearch_plan_pages <- function(topic, claims, sources, existing_pages, return(TRUE) } normalized %in% c("topic", "research-topic", "research_topic", - "research:topic", "research", "page", - "research-page", "kebab-slug-derived-from-topic", + "research:topic", "research", "page", "research-page", + "kebab-slug-derived-from-topic", "") } @@ -314,9 +313,13 @@ autoresearch_plan_pages <- function(topic, claims, sources, existing_pages, return(planned) } analysis_idx <- which(planned$type == "analysis") - idx <- if (length(analysis_idx) > 0L) analysis_idx[[1L]] else 1L - .ar_msg(verbose, " user-supplied slug: forcing synthesis row ", - idx, " to '", forced_slug, "' (overlap guard skipped)") + if (length(analysis_idx) > 0L) { + idx <- analysis_idx[[1L]] + } else { + idx <- 1L + } + .ar_msg(verbose, " user-supplied slug: forcing synthesis row ", idx, + " to '", forced_slug, "' (overlap guard skipped)") planned$slug[[idx]] <- forced_slug planned$user_forced[[idx]] <- TRUE planned @@ -374,17 +377,16 @@ autoresearch_revise_pages <- function(planned, topic, claims, sources, vault, slug <- planned$slug[[i]] path <- file.path(vault, "wiki", paste0(slug, ".md")) forced <- "user_forced" %in% names(planned) && - isTRUE(planned$user_forced[[i]]) + isTRUE(planned$user_forced[[i]]) if (file.exists(path) && !forced) { - existing_title <- tryCatch( - parse_frontmatter(path)$title %||% slug, - error = function(e) slug) + existing_title <- tryCatch(parse_frontmatter(path)$title %||% slug, + error = function(e) slug) if (!.ar_titles_overlap(planned$title[[i]], existing_title)) { other_slugs <- planned$slug[-i] alt_slug <- .ar_unique_slug(slug, topic, vault, - planned$title[[i]], - also_taken = other_slugs) + planned$title[[i]], + also_taken = other_slugs) .ar_msg(verbose, " slug '", slug, "' collides with unrelated page '", existing_title, @@ -411,7 +413,7 @@ autoresearch_revise_pages <- function(planned, topic, claims, sources, vault, new_draft_body = planned$body[[i]], claims = claims, sources = .autoresearch_source_records( - sources, include_body = FALSE)), + sources, include_body = FALSE)), program) revised <- if (!is.null(res$body) && nzchar(res$body)) { as.character(res$body) @@ -474,7 +476,7 @@ autoresearch_revise_pages <- function(planned, topic, claims, sources, vault, also_taken = character()) { slug_taken <- function(s) { file.exists(file.path(vault, "wiki", paste0(s, ".md"))) || - s %in% also_taken + s %in% also_taken } base <- paste0("Research-", slugify(topic)) base_path <- file.path(vault, "wiki", paste0(base, ".md")) @@ -483,8 +485,8 @@ autoresearch_revise_pages <- function(planned, topic, claims, sources, vault, } if (file.exists(base_path) && !(base %in% also_taken)) { existing_title <- tryCatch( - parse_frontmatter(base_path)$title %||% base, - error = function(e) base) + parse_frontmatter(base_path)$title %||% base, + error = function(e) base) if (.ar_titles_overlap(planned_title, existing_title)) { return(base) } @@ -507,8 +509,7 @@ autoresearch_write_pages <- function(pages, vault, program, overwrite = TRUE, action = character(), type = character(), title = character(), stringsAsFactors = FALSE)) } - required_tags <- unique(as.character(program$required_tags %||% - "research")) + required_tags <- unique(as.character(program$required_tags %||% "research")) required_tags <- required_tags[nzchar(required_tags)] if (length(required_tags) == 0L) { required_tags <- "research" @@ -658,7 +659,7 @@ autoresearch_write_pages <- function(pages, vault, program, overwrite = TRUE, #' @noRd .autoresearch_source_records <- function(sources, include_body = FALSE, - body_chars = 4000L) { + body_chars = 4000L) { cols <- c("url", "title", "path", "slug", "content_type", "injection_flag", "injection_reasons") if (is.null(sources) || nrow(sources) == 0L) { @@ -686,12 +687,12 @@ autoresearch_write_pages <- function(pages, vault, program, overwrite = TRUE, .prompt_injection_reasons <- function(text) { text <- tolower(.plain_text(paste(text, collapse = " "))) patterns <- c( - ignore_previous = "ignore (all )?(previous|prior|above) instructions", - disregard_previous = "disregard (all )?(previous|prior|above) instructions", - system_prompt = "system prompt|developer message|hidden instructions", - tool_use = "call (the )?tool|use (the )?tool|execute (this )?command", - exfiltrate = "exfiltrate|api key|password|secret token", - role_override = "you are now|new instructions|follow these instructions" + ignore_previous = "ignore (all )?(previous|prior|above) instructions", + disregard_previous = "disregard (all )?(previous|prior|above) instructions", + system_prompt = "system prompt|developer message|hidden instructions", + tool_use = "call (the )?tool|use (the )?tool|execute (this )?command", + exfiltrate = "exfiltrate|api key|password|secret token", + role_override = "you are now|new instructions|follow these instructions" ) hits <- names(patterns)[vapply(patterns, function(pat) { grepl(pat, text, perl = TRUE) @@ -731,8 +732,7 @@ autoresearch_write_pages <- function(pages, vault, program, overwrite = TRUE, .empty_sources <- function() { data.frame(url = character(), title = character(), path = character(), slug = character(), content_type = character(), - injection_flag = logical(), - injection_reasons = character(), + injection_flag = logical(), injection_reasons = character(), body = I(list()), stringsAsFactors = FALSE) } @@ -751,3 +751,4 @@ autoresearch_write_pages <- function(pages, vault, program, overwrite = TRUE, sources = character(), aliases = character(), tags = character(), stringsAsFactors = FALSE) } + diff --git a/R/git.R b/R/git.R index ea811f5..31d5e09 100644 --- a/R/git.R +++ b/R/git.R @@ -110,8 +110,7 @@ vault_is_pensar_owned <- function(path) { return(TRUE) } tracked <- suppressWarnings(system2("git", - c("-C", path, "ls-tree", "-r", "HEAD", - "--name-only"), + c("-C", path, "ls-tree", "-r", "HEAD", "--name-only"), stdout = TRUE, stderr = FALSE)) if (length(tracked) == 0L) { return(TRUE) diff --git a/R/index.R b/R/index.R index fb67b82..1efbebb 100644 --- a/R/index.R +++ b/R/index.R @@ -107,20 +107,18 @@ update_index_adopted <- function(vault) { sorted_types <- sort(unique(type_col)) for (t in sorted_types) { in_type_idx <- type_col == t - in_type <- page_rows[in_type_idx, , drop = FALSE] + in_type <- page_rows[in_type_idx,, drop = FALSE] in_type_links <- link_text[in_type_idx] - lines <- c(lines, sprintf("## %s (%d)", t, nrow(in_type)), - "") + lines <- c(lines, sprintf("## %s (%d)", t, nrow(in_type)), "") for (i in seq_len(nrow(in_type))) { title <- if (!is.na(in_type$title[i]) && - nzchar(in_type$title[i])) { + nzchar(in_type$title[i])) { in_type$title[i] } else { in_type$node_id[i] } lines <- c(lines, - sprintf("- [[%s]] -- %s", - in_type_links[i], title)) + sprintf("- [[%s]] -- %s", in_type_links[i], title)) } lines <- c(lines, "") } diff --git a/R/ingest_agent_context.R b/R/ingest_agent_context.R index 0d20b72..0948503 100644 --- a/R/ingest_agent_context.R +++ b/R/ingest_agent_context.R @@ -44,16 +44,14 @@ #' unlink(v, recursive = TRUE) #' } #' @export -ingest_agent_context <- function(agent = c("claude", "codex", - "corteza"), +ingest_agent_context <- function(agent = c("claude", "codex", "corteza"), vault = default_vault(), - project_dir = getwd(), - workspace_dir = NULL, ...) { + project_dir = getwd(), workspace_dir = NULL, + ...) { agent <- match.arg(agent) if (!requireNamespace("saber", quietly = TRUE)) { stop("ingest_agent_context() requires the 'saber' package.\n", - " Install with: install.packages('saber')", - call. = FALSE) + " Install with: install.packages('saber')", call. = FALSE) } # Dynamic resolution: gates on the agent_context export so older # saber versions (pre-0.4 on CRAN) that lack the function fail @@ -81,10 +79,10 @@ ingest_agent_context <- function(agent = c("claude", "codex", } source_id <- sprintf("saber::agent_context(%s)", agent) - title <- sprintf("%s context %s", agent, - format(Sys.Date(), "%Y-%m-%d")) + title <- sprintf("%s context %s", agent, format(Sys.Date(), "%Y-%m-%d")) fp <- ingest(content = context, type = "chats", source = source_id, title = title, tags = c("agent-context", agent), vault = vault) invisible(substring(fp, nchar(vault) + 2L)) } + diff --git a/R/ingest_url.R b/R/ingest_url.R index bac0285..ac25208 100644 --- a/R/ingest_url.R +++ b/R/ingest_url.R @@ -209,8 +209,8 @@ content_type_allowed <- function(ctype) { #' @noRd extract_html_title <- function(html) { m <- regmatches(html, - regexec("]*>([\\s\\S]*?)", html, - ignore.case = TRUE, perl = TRUE))[[1L]] + regexec("]*>([\\s\\S]*?)", html, ignore.case = TRUE, + perl = TRUE))[[1L]] if (length(m) < 2L) { return(NULL) } @@ -226,3 +226,4 @@ extract_html_title <- function(html) { title <- gsub(">", ">", title, fixed = TRUE) title } + diff --git a/R/lint.R b/R/lint.R index bcb7400..c711098 100644 --- a/R/lint.R +++ b/R/lint.R @@ -26,14 +26,13 @@ lint <- function(vault = default_vault(), min_cluster_size = 3L) { # link extraction all agree with find_page() / outlinks() / # backlinks() on path-style links and aliases. reg <- vault_registry(vault) - page_rows <- reg[!reg$system_file, , drop = FALSE] + page_rows <- reg[!reg$system_file,, drop = FALSE] all_md <- file.path(vault, page_rows$path) page_names <- page_rows$node_id page_paths <- page_rows$path is_wiki <- startsWith(normalizePath(all_md, mustWork = FALSE), - normalizePath(file.path(vault, "wiki"), - mustWork = FALSE)) + normalizePath(file.path(vault, "wiki"), mustWork = FALSE)) # Build link graph: resolve every outbound link to its target path. # Broken links are the ones that don't resolve at all. diff --git a/R/manifest.R b/R/manifest.R index e9e54ab..d4ad198 100644 --- a/R/manifest.R +++ b/R/manifest.R @@ -62,10 +62,9 @@ read_manifest <- function(vault) { } parsed$version <- parsed$version %||% 1L parsed$created <- parsed$created %||% format(Sys.Date(), "%Y-%m-%d") - parsed$sources <- coerce_manifest_map(parsed$sources, "sources", - fp) + parsed$sources <- coerce_manifest_map(parsed$sources, "sources", fp) parsed$address_map <- coerce_manifest_map(parsed$address_map, - "address_map", fp) + "address_map", fp) parsed } @@ -83,8 +82,7 @@ coerce_manifest_map <- function(x, field, fp) { } if (!is.list(x) || (length(x) > 0L && is.null(names(x)))) { warning("manifest field '", field, "' at ", fp, - " is not a named list; treating as empty", - call. = FALSE) + " is not a named list; treating as empty", call. = FALSE) return(list()) } x @@ -133,7 +131,7 @@ update_manifest <- function(vault, source = NULL, path = NULL, ingested_at = NULL) { vault <- normalizePath(vault, mustWork = TRUE) other_fields_set <- !is.null(source) || !is.null(page_uid) || - !is.null(address) || !is.null(hash) || !is.null(ingested_at) + !is.null(address) || !is.null(hash) || !is.null(ingested_at) if (is.null(path)) { if (other_fields_set) { stop("update_manifest(): `path` is required when any of ", diff --git a/R/outlinks.R b/R/outlinks.R index f45e3f0..093400b 100644 --- a/R/outlinks.R +++ b/R/outlinks.R @@ -60,8 +60,8 @@ outlinks <- function(page, vault = default_vault()) { resolve_target_path <- function(query, vault) { fp <- withCallingHandlers(find_page(query, vault), warning = function(w) { - invokeRestart("muffleWarning") - }) + invokeRestart("muffleWarning") + }) if (is.null(fp)) { return(NA_character_) } @@ -115,12 +115,11 @@ find_page <- function(page, vault) { # `wiki/tags.md` rather than `_proposals/tags.md`. Exact-path # queries above can still target system files when the caller # writes the full path. - content <- reg[!reg$system_file, , drop = FALSE] + content <- reg[!reg$system_file,, drop = FALSE] uid_match <- !is.na(content$page_uid) & content$page_uid == page if (any(uid_match)) { - return(file.path(vault, - content$path[which(uid_match)[1L]])) + return(file.path(vault, content$path[which(uid_match)[1L]])) } nid_match <- content$node_id == page @@ -131,8 +130,7 @@ find_page <- function(page, vault) { if (nmatches > 1L) { candidates <- sort(content$path[nid_match]) warning("ambiguous wikilink: '", page, "' matches ", nmatches, - " pages: ", paste(candidates, collapse = ", "), - call. = FALSE) + " pages: ", paste(candidates, collapse = ", "), call. = FALSE) return(file.path(vault, candidates[1L])) } @@ -140,8 +138,7 @@ find_page <- function(page, vault) { function(a) is.character(a) && page %in% a, logical(1L)) if (any(alias_match)) { - return(file.path(vault, - content$path[which(alias_match)[1L]])) + return(file.path(vault, content$path[which(alias_match)[1L]])) } NULL diff --git a/R/registry.R b/R/registry.R index e220ab3..a333918 100644 --- a/R/registry.R +++ b/R/registry.R @@ -136,8 +136,8 @@ build_registry_row <- function(filepath, vault) { # returns backslashes (so `dirname(filepath) == vault` was false). rel_dir <- dirname(rel_path) is_root_ctrl <- basename(filepath) %in% - c("schema.md", "index.md", "log.md") && - identical(rel_dir, ".") + c("schema.md", "index.md", "log.md") && + identical(rel_dir, ".") in_proposals <- identical(rel_dir, "_proposals") system_file <- is_root_ctrl || in_proposals diff --git a/R/retrieval.R b/R/retrieval.R index 3b20c44..74af175 100644 --- a/R/retrieval.R +++ b/R/retrieval.R @@ -39,9 +39,9 @@ search_pages <- function(query, vault = default_vault(), type = NULL, } # Drop system control files (schema.md, log.md, index.md) so a # query like "vault" doesn't surface the seeded index/log/schema. - reg <- reg[!reg$system_file, , drop = FALSE] + reg <- reg[!reg$system_file,, drop = FALSE] if (!is.null(type)) { - reg <- reg[!is.na(reg$type) & reg$type == type, , drop = FALSE] + reg <- reg[!is.na(reg$type) & reg$type == type,, drop = FALSE] } if (nrow(reg) == 0L) { return(empty_search_result()) @@ -82,8 +82,7 @@ search_pages <- function(query, vault = default_vault(), type = NULL, } if (isTRUE(in_body)) { body <- extract_body(file.path(vault, reg$path[i])) - if (nzchar(body) && - grepl(pat, tolower(body), fixed = TRUE)) { + if (nzchar(body) && grepl(pat, tolower(body), fixed = TRUE)) { paths <- c(paths, reg$path[i]) nodes <- c(nodes, reg$node_id[i]) titles <- c(titles, title %||% NA_character_) @@ -92,9 +91,8 @@ search_pages <- function(query, vault = default_vault(), type = NULL, } } } - data.frame(path = paths, node_id = nodes, title = titles, - type = types, matched_in = where, - stringsAsFactors = FALSE) + data.frame(path = paths, node_id = nodes, title = titles, type = types, + matched_in = where, stringsAsFactors = FALSE) } #' @noRd @@ -128,8 +126,7 @@ empty_search_result <- function() { #' names(ctx) #' unlink(v, recursive = TRUE) #' @export -page_context <- function(name, vault = default_vault(), - body_chars = 300L) { +page_context <- function(name, vault = default_vault(), body_chars = 300L) { vault <- normalizePath(vault, mustWork = TRUE) fp <- find_page(name, vault) if (is.null(fp)) { @@ -137,26 +134,25 @@ page_context <- function(name, vault = default_vault(), } rel <- substring(fp, nchar(vault) + 2L) reg <- vault_registry(vault) - row <- reg[reg$path == rel, , drop = FALSE] + row <- reg[reg$path == rel,, drop = FALSE] fm <- parse_frontmatter(fp) body <- extract_body(fp, body_chars) ol <- tryCatch(outlinks(name, vault = vault), error = function(e) { - data.frame(target = character(0L), - exists = logical(0L), - stringsAsFactors = FALSE) - }) + data.frame(target = character(0L), exists = logical(0L), + stringsAsFactors = FALSE) + }) bl <- backlinks(name, vault = vault) structure( - list(path = rel, - node_id = row$node_id %||% name, - frontmatter = fm, - body_head = body, - outlinks = ol, - backlinks = bl), - class = "pensar_page_context" + list(path = rel, + node_id = row$node_id %||% name, + frontmatter = fm, + body_head = body, + outlinks = ol, + backlinks = bl), + class = "pensar_page_context" ) } @@ -193,15 +189,15 @@ related_pages <- function(name, vault = default_vault(), k = 10L) { target_rel <- substring(fp, nchar(vault) + 2L) reg <- vault_registry(vault) - self_row <- reg[reg$path == target_rel, , drop = FALSE] + self_row <- reg[reg$path == target_rel,, drop = FALSE] if (nrow(self_row) == 0L) { return(empty_related_result()) } target_tags <- self_row$tags[[1L]] target_links <- self_row$links_out[[1L]] - others <- reg[reg$path != target_rel & !reg$system_file, , - drop = FALSE] + others <- reg[reg$path != target_rel & !reg$system_file,, + drop = FALSE] if (nrow(others) == 0L) { return(empty_related_result()) } @@ -213,19 +209,16 @@ related_pages <- function(name, vault = default_vault(), k = 10L) { scores <- integer(nrow(others)) for (i in seq_len(nrow(others))) { - shared_tags <- length(intersect(target_tags, - others$tags[[i]])) - peer_link_paths <- resolved_link_set(others$links_out[[i]], - vault) - shared_links <- length(intersect(target_link_paths, - peer_link_paths)) + shared_tags <- length(intersect(target_tags, others$tags[[i]])) + peer_link_paths <- resolved_link_set(others$links_out[[i]], vault) + shared_links <- length(intersect(target_link_paths, peer_link_paths)) scores[i] <- shared_tags + shared_links } keep <- scores > 0L if (!any(keep)) { return(empty_related_result()) } - others <- others[keep, , drop = FALSE] + others <- others[keep,, drop = FALSE] scores <- scores[keep] ord <- order(-scores, others$path) top <- utils::head(ord, k) @@ -244,8 +237,7 @@ resolved_link_set <- function(links, vault) { if (length(links) == 0L) { return(character(0L)) } - resolved <- vapply(links, resolve_target_path, character(1L), - vault = vault) + resolved <- vapply(links, resolve_target_path, character(1L), vault = vault) unique(resolved[!is.na(resolved)]) } @@ -289,8 +281,7 @@ recent_activity <- function(vault = default_vault(), days = 7L) { op <- vapply(parsed, function(m) m[[3L]], character(1L)) msg <- vapply(parsed, function(m) m[[4L]], character(1L)) - ts <- as.POSIXct(ts_str, format = "%Y-%m-%dT%H:%M:%S", - tz = "") + ts <- as.POSIXct(ts_str, format = "%Y-%m-%dT%H:%M:%S", tz = "") cutoff <- Sys.time() - as.difftime(days, units = "days") keep <- !is.na(ts) & ts >= cutoff if (!any(keep)) { @@ -300,8 +291,8 @@ recent_activity <- function(vault = default_vault(), days = 7L) { op <- op[keep] msg <- msg[keep] ord <- order(ts, decreasing = TRUE) - data.frame(timestamp = ts[ord], operation = op[ord], - message = msg[ord], stringsAsFactors = FALSE) + data.frame(timestamp = ts[ord], operation = op[ord], message = msg[ord], + stringsAsFactors = FALSE) } #' @noRd @@ -335,3 +326,4 @@ extract_body <- function(filepath, n_chars = NULL) { substring(body, 1L, n_chars) } } + diff --git a/R/skills.R b/R/skills.R index 00b8baa..68b90d2 100644 --- a/R/skills.R +++ b/R/skills.R @@ -32,3 +32,4 @@ pensar_skill_path <- function(skill = NULL) { } candidate } + diff --git a/R/vault.R b/R/vault.R index 10563f4..1040895 100644 --- a/R/vault.R +++ b/R/vault.R @@ -86,8 +86,7 @@ init_vault <- function(path = default_vault(), rproj = TRUE, # log entry into a pre-existing user log.md. log_existed_before <- file.exists(file.path(path, "log.md")) - writeLines(adopted_schema_template(), - file.path(path, "schema.md")) + writeLines(adopted_schema_template(), file.path(path, "schema.md")) if (!log_existed_before) { writeLines(log_seed(), file.path(path, "log.md")) } diff --git a/R/wiki_write.R b/R/wiki_write.R index 2f53cd6..36a7941 100644 --- a/R/wiki_write.R +++ b/R/wiki_write.R @@ -70,8 +70,7 @@ write_wiki_page <- function(slug, frontmatter, body, vault = default_vault(), #' #' @noRd .merge_wiki_frontmatter <- function(path, new) { - existing <- tryCatch(parse_frontmatter(path), - error = function(e) list()) + existing <- tryCatch(parse_frontmatter(path), error = function(e) list()) if (!is.list(existing)) { existing <- list() }