diff --git a/DESCRIPTION b/DESCRIPTION index d3c893c37..ec0d5f41f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -118,6 +118,7 @@ Collate: 'expressions.R' 'fill-variable.R' 'filters.R' + 'findVariables.R' 'folders.R' 'fork-and-merge.R' 'formula.R' diff --git a/NAMESPACE b/NAMESPACE index 637ae241f..ad7662db8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -191,6 +191,7 @@ export(exportDataset) export(exportDeck) export(extendDataset) export(filter) +export(findVariables) export(flattenOrder) export(flipArrays) export(folder) diff --git a/NEWS.md b/NEWS.md index ed556b167..54e69ef87 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # crunch 1.30.4 (Development Version) +* New function `findVariables` accepts a Crunch dataset or variable folder and returns a data.frame whose rows correspond to variables and their location (#641). + # crunch 1.30.3 * Fix typo which relied on partial argument matching when using the variable catalog cache (#625, thanks @rossellhayes) diff --git a/R/findVariables.R b/R/findVariables.R new file mode 100644 index 000000000..d5736193c --- /dev/null +++ b/R/findVariables.R @@ -0,0 +1,78 @@ +#' Find variables and their paths in a Crunch dataset or folder +#' +#' Returns a data.frame whose rows correspond to Crunch variables found in \code{x}. +#' By default, only top-level, non-hidden, non-private variables in \code{x} are returned. +#' +#' @param x Crunch dataset or variable folder +#' @param deep Defaults to \code{FALSE}, \code{TRUE} recursively examines any subfolders as well +#' @param include.hidden Defaults to \code{FALSE}, \code{TRUE} finds any hidden variables as well +#' @param include.private Defaults to \code{FALSE}, \code{TRUE} finds any private variables as well +#' +#' @return Data.frame with one row per Crunch variable and columns \code{alias} (Crunch variable alias), +#' \code{path} (location of the variable, with " | " indicating nesting, +#' e.g. "Foo | Bar" indicates that the variable can be found in the folder "Bar" and that "Bar" is located in folder "Foo"), +#' \code{hidden} (\code{TRUE} or \code{FALSE}), \code{private} (\code{TRUE} or \code{FALSE}) +#' @export +findVariables <- function(x, deep = FALSE, include.hidden = FALSE, include.private = FALSE) { + if (is.dataset(x)) { + x <- cd(x, ".") + startpath <- "" + } else if (is.folder(x)) { + startpath <- name(x) + } else { + halt('`x` should be "CrunchDataset" or "VariableFolder", not "', paste(class(x), collapse = ", "), '"') + } + if (!isTRUE(deep) && !isFALSE(deep)) { + halt("`deep` should be TRUE or FALSE") + } + if (!isTRUE(include.hidden) && !isFALSE(include.hidden)) { + halt("`include.hidden` should be TRUE or FALSE") + } + if (!isTRUE(include.private) && !isFALSE(include.private)) { + halt("`include.private` should be TRUE or FALSE") + } + if (!deep) { + vars <- aliases(variables(x)) + nvars <- length(vars) + res <- data.frame(alias = vars, path = rep(startpath, nvars), hidden = rep(FALSE, nvars), private = rep(FALSE, nvars)) + return(res) + } + res <- .findVariables(x, startpath) + res <- do.call(rbind, res) + res$hidden <- rep(FALSE, nrow(res)) + res$private <- rep(FALSE, nrow(res)) + if (include.hidden) { + hidden <- .findVariables(hiddenFolder(x), startpath) + hidden <- do.call(rbind, hidden) + hidden$hidden <- rep(TRUE, nrow(hidden)) + hidden$private <- rep(FALSE, nrow(hidden)) + res <- rbind(res, hidden) + } + if (include.private) { + private <- .findVariables(privateFolder(x), startpath) + private <- do.call(rbind, private) + private$hidden <- rep(FALSE, nrow(private)) + private$private <- rep(TRUE, nrow(private)) + res <- rbind(res, private) + } + res +} + +.findVariables <- function(x, path) { + vars <- variables(x) + res <- list(data.frame(alias = aliases(vars), path = rep(path, length(vars)))) + dirs <- x[types(x) %in% "folder"] + if (length(dirs) == 0) { + return(res) + } + dirnames <- names(dirs) + res2 <- lapply(seq_along(dirnames), function(i) { + if (identical(path, "")) { + new_path <- dirnames[i] + } else { + new_path <- paste(path, dirnames[i], sep = " | ") + } + .findVariables(dirs[[i]], new_path) + }) + c(res, unlist(res2, recursive = FALSE)) +} diff --git a/R/folders.R b/R/folders.R index ec0ace394..aaddbee7e 100644 --- a/R/folders.R +++ b/R/folders.R @@ -389,6 +389,7 @@ copyFolders <- function(source, target) { # Recursively get all variables below a folder # TODO: Use trampoline? https://community.rstudio.com/t/tidiest-way-to-do-recursion-safely-in-r/1408 # My initial tests say it's slower, but is safer if we ever expect a large number of folders +# NOTE: see ?findVariables for a similar exported function variablesBelowFolder <- function(folder) { vars <- variables(folder) dirs <- folder[types(folder) %in% "folder"] diff --git a/man/findVariables.Rd b/man/findVariables.Rd new file mode 100644 index 000000000..63fce2fd1 --- /dev/null +++ b/man/findVariables.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/findVariables.R +\name{findVariables} +\alias{findVariables} +\title{Find variables and their paths in a Crunch dataset or folder} +\usage{ +findVariables(x, deep = FALSE, include.hidden = FALSE, include.private = FALSE) +} +\arguments{ +\item{x}{Crunch dataset or variable folder} + +\item{deep}{Defaults to \code{FALSE}, \code{TRUE} recursively examines any subfolders as well} + +\item{include.hidden}{Defaults to \code{FALSE}, \code{TRUE} finds any hidden variables as well} + +\item{include.private}{Defaults to \code{FALSE}, \code{TRUE} finds any private variables as well} +} +\value{ +Data.frame with one row per Crunch variable and columns \code{alias} (Crunch variable alias), +\code{path} (location of the variable, with " | " indicating nesting, +e.g. "Foo | Bar" indicates that the variable can be found in the folder "Bar" and that "Bar" is located in folder "Foo"), +\code{hidden} (\code{TRUE} or \code{FALSE}), \code{private} (\code{TRUE} or \code{FALSE}) +} +\description{ +Returns a data.frame whose rows correspond to Crunch variables found in \code{x}. +By default, only top-level, non-hidden, non-private variables in \code{x} are returned. +} diff --git a/man/hide.Rd b/man/hide.Rd index 49661ba08..6cb2209d0 100644 --- a/man/hide.Rd +++ b/man/hide.Rd @@ -142,9 +142,9 @@ back in the main variable catalog). \item \code{hide()} / \code{privatize()} - take a \code{CrunchVariable} or \code{VariableCatalog} and make them hidden/private. (\code{unhide()} / \code{deprivatize()} put them back in the main variable catalog). -\item \code{hiddenFolder()} / \code{privateFolder()} / \code{publicFolder()} - take a dataset and return a folder that -contains the public/hidden/private variables. This folder is like other \code{CrunchFolder}s and -so you can use \code{\link[=mkdir]{mkdir()}} to create subfolders and \code{\link[=mv]{mv()}} to move them in/out. +\item \code{hiddenFolder()} / \code{privateFolder()} / \code{publicFolder()} - take a dataset and return a folder +that contains the public/hidden/private variables. This folder is like other \code{CrunchFolder}s +and so you can use \code{\link[=mkdir]{mkdir()}} to create subfolders and \code{\link[=mv]{mv()}} to move them in/out. \item \code{hiddenVariables()} / \code{privateVariabiles()} - return a character vector of variables that are hidden/private. You can assign into the catalog to add variables or assign to \code{NULL} to remove all of them.