Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[187192596] Find variables and their paths in a Crunch dataset or variable folder #641

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Collate:
'expressions.R'
'fill-variable.R'
'filters.R'
'findVariables.R'
'folders.R'
'fork-and-merge.R'
'formula.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ export(exportDataset)
export(exportDeck)
export(extendDataset)
export(filter)
export(findVariables)
export(flattenOrder)
export(flipArrays)
export(folder)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# crunch 1.30.4 (Development Version)

* New function `findVariables` accepts a Crunch dataset or variable folder and returns a data.frame whose rows correspond to variables and their location (#641).

# crunch 1.30.3
* Fix typo which relied on partial argument matching when using the variable catalog cache
(#625, thanks @rossellhayes)
Expand Down
78 changes: 78 additions & 0 deletions R/findVariables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Find variables and their paths in a Crunch dataset or folder
#'

Check warning on line 2 in R/findVariables.R

View workflow job for this annotation

GitHub Actions / test-coverage

file=R/findVariables.R,line=2,col=3,[trailing_whitespace_linter] Trailing whitespace is superfluous.
#' Returns a data.frame whose rows correspond to Crunch variables found in \code{x}.
#' By default, only top-level, non-hidden, non-private variables in \code{x} are returned.
#'

Check warning on line 5 in R/findVariables.R

View workflow job for this annotation

GitHub Actions / test-coverage

file=R/findVariables.R,line=5,col=3,[trailing_whitespace_linter] Trailing whitespace is superfluous.
#' @param x Crunch dataset or variable folder
#' @param deep Defaults to \code{FALSE}, \code{TRUE} recursively examines any subfolders as well
#' @param include.hidden Defaults to \code{FALSE}, \code{TRUE} finds any hidden variables as well
#' @param include.private Defaults to \code{FALSE}, \code{TRUE} finds any private variables as well
#'
#' @return Data.frame with one row per Crunch variable and columns \code{alias} (Crunch variable alias),

Check warning on line 11 in R/findVariables.R

View workflow job for this annotation

GitHub Actions / test-coverage

file=R/findVariables.R,line=11,col=101,[line_length_linter] Lines should not be more than 100 characters. This line is 104 characters.
#' \code{path} (location of the variable, with " | " indicating nesting,
#' e.g. "Foo | Bar" indicates that the variable can be found in the folder "Bar" and that "Bar" is located in folder "Foo"),

Check warning on line 13 in R/findVariables.R

View workflow job for this annotation

GitHub Actions / test-coverage

file=R/findVariables.R,line=13,col=101,[line_length_linter] Lines should not be more than 100 characters. This line is 124 characters.
#' \code{hidden} (\code{TRUE} or \code{FALSE}), \code{private} (\code{TRUE} or \code{FALSE})
#' @export
findVariables <- function(x, deep = FALSE, include.hidden = FALSE, include.private = FALSE) {
if (is.dataset(x)) {
x <- cd(x, ".")
startpath <- ""
} else if (is.folder(x)) {
startpath <- name(x)
} else {
halt('`x` should be "CrunchDataset" or "VariableFolder", not "', paste(class(x), collapse = ", "), '"')

Check warning on line 23 in R/findVariables.R

View workflow job for this annotation

GitHub Actions / test-coverage

file=R/findVariables.R,line=23,col=101,[line_length_linter] Lines should not be more than 100 characters. This line is 111 characters.
}
if (!isTRUE(deep) && !isFALSE(deep)) {
halt("`deep` should be TRUE or FALSE")
}
if (!isTRUE(include.hidden) && !isFALSE(include.hidden)) {
halt("`include.hidden` should be TRUE or FALSE")
}
if (!isTRUE(include.private) && !isFALSE(include.private)) {
halt("`include.private` should be TRUE or FALSE")
}
if (!deep) {
vars <- aliases(variables(x))
nvars <- length(vars)
res <- data.frame(alias = vars, path = rep(startpath, nvars), hidden = rep(FALSE, nvars), private = rep(FALSE, nvars))

Check warning on line 37 in R/findVariables.R

View workflow job for this annotation

GitHub Actions / test-coverage

file=R/findVariables.R,line=37,col=101,[line_length_linter] Lines should not be more than 100 characters. This line is 126 characters.
return(res)
}
res <- .findVariables(x, startpath)
res <- do.call(rbind, res)
res$hidden <- rep(FALSE, nrow(res))
res$private <- rep(FALSE, nrow(res))
if (include.hidden) {
hidden <- .findVariables(hiddenFolder(x), startpath)
hidden <- do.call(rbind, hidden)
hidden$hidden <- rep(TRUE, nrow(hidden))
hidden$private <- rep(FALSE, nrow(hidden))
res <- rbind(res, hidden)
}
if (include.private) {
private <- .findVariables(privateFolder(x), startpath)
private <- do.call(rbind, private)
private$hidden <- rep(FALSE, nrow(private))
private$private <- rep(TRUE, nrow(private))
res <- rbind(res, private)
}
res
}

.findVariables <- function(x, path) {
vars <- variables(x)
res <- list(data.frame(alias = aliases(vars), path = rep(path, length(vars))))
dirs <- x[types(x) %in% "folder"]
if (length(dirs) == 0) {
return(res)
}
dirnames <- names(dirs)
res2 <- lapply(seq_along(dirnames), function(i) {
if (identical(path, "")) {
new_path <- dirnames[i]
} else {
new_path <- paste(path, dirnames[i], sep = " | ")
}
.findVariables(dirs[[i]], new_path)
})
c(res, unlist(res2, recursive = FALSE))
}
1 change: 1 addition & 0 deletions R/folders.R
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ copyFolders <- function(source, target) {
# Recursively get all variables below a folder
# TODO: Use trampoline? https://community.rstudio.com/t/tidiest-way-to-do-recursion-safely-in-r/1408
# My initial tests say it's slower, but is safer if we ever expect a large number of folders
# NOTE: see ?findVariables for a similar exported function
variablesBelowFolder <- function(folder) {
vars <- variables(folder)
dirs <- folder[types(folder) %in% "folder"]
Expand Down
27 changes: 27 additions & 0 deletions man/findVariables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/hide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading