-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
#' Run an R script with provenance collection and summarize inputs/outputs to YAML
#'
#' This function executes a specified R script using rdtLite to capture provenance.
#' It then parses the generated PROV-JSON to identify input and output files,
#' saves this summary to a YAML file, and cleans up the generated provenance directory.
#'
#' @param script_path Character string. The relative or absolute path to the R script to be executed.
#' @param output_yaml_path Character string. The path where the output YAML summary should be saved.
#'
#' @return NULL. The function is called for its side effects (running the script and creating a YAML file).
#' @importFrom rdtLite prov.run
#' @importFrom jsonlite fromJSON
#' @importFrom yaml write_yaml
#' @importFrom tools file_path_sans_ext
#' @importFrom fs path_abs
#' @export
run_and_summarize <- function(script_path, output_yaml_path) {
# 1. Setup paths
# Resolve full path for the YAML report using fs
# We use as.character to ensure it outputs a simple string
full_script_path <- as.character(fs::path_abs(script_path))
# Get the script name for directory handling
script_name <- tools::file_path_sans_ext(basename(script_path))
# Define the expected provenance directory name
# rdtLite creates a folder named "prov_" + script name
prov_dir_name <- paste0("prov_", script_name)
prov_json_path <- file.path(prov_dir_name, "prov.json")
# 2. Run the script using rdtLite
cat(sprintf("Running %s with provenance collection...\n", script_path))
rdtLite::prov.run(script_path, prov.dir = ".", overwrite = TRUE)
# 3. Parse the generated JSON
if (!file.exists(prov_json_path)) {
stop("Error: prov.json was not found. The script may have failed to run.")
}
prov_data <- jsonlite::fromJSON(prov_json_path)
entities <- prov_data$entity
used_block <- prov_data$used
generated_block <- prov_data$wasGeneratedBy
# --- Helper Logic (Internal) ---
extract_files <- function(relationship_block, entity_key_label) {
file_list <- list()
if (length(relationship_block) == 0) return(list())
for (i in 1:length(relationship_block)) {
rel_item <- relationship_block[[i]]
entity_id <- rel_item[[entity_key_label]]
entity_details <- entities[[entity_id]]
if (!is.null(entity_details)) {
# Robust check for File type using isTRUE
if (isTRUE(entity_details[["rdt:type"]] == "File")) {
# Only saving location as requested (name removed)
file_info <- list(
location = entity_details[["rdt:location"]]
)
file_list[[length(file_list) + 1]] <- file_info
}
}
}
return(file_list)
}
# Extract Inputs and Outputs
inputs <- extract_files(used_block, "prov:entity")
outputs <- extract_files(generated_block, "prov:entity")
# 4. Save to YAML
yaml_data <- list(
script = full_script_path,
inputs = inputs,
outputs = outputs
)
yaml::write_yaml(yaml_data, output_yaml_path)
cat(sprintf("Summary saved to %s\n", output_yaml_path))
# 5. Cleanup
# Remove the directory created by rdtLite
if (dir.exists(prov_dir_name)) {
cat(sprintf("Cleaning up provenance directory: %s\n", prov_dir_name))
unlink(prov_dir_name, recursive = TRUE)
}
}
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels