Skip to content

Commit

Permalink
Merge branch 'default-new-ds-project-188559300' into rel-1736548784-1…
Browse files Browse the repository at this point in the history
…736548884
  • Loading branch information
crunchbot committed Jan 10, 2025
2 parents 1f59d1d + 30c98e1 commit 6be88f7
Show file tree
Hide file tree
Showing 342 changed files with 10,773 additions and 10,167 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,6 @@ importFrom(crayon,italic)
importFrom(crayon,make_style)
importFrom(crayon,red)
importFrom(crayon,underline)
importFrom(curl,curl_escape)
importFrom(curl,curl_version)
importFrom(grDevices,col2rgb)
importFrom(grDevices,colors)
Expand Down
21 changes: 18 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
# crunch 1.31.0 (Development Version)
* Variables are now created as materialized by default instead of derived. If you prefer the
old behavior set environment variable `R_CRUNCH_DEFAULT_DERIVED` or option
`crunch.default.derived` to TRUE. See `?toVariable` for more information (#648).
* Variables can now be created as materialized by default instead of derived,
by setting environment variable `R_CRUNCH_DEFAULT_DERIVED` or option
`crunch.default.derived` to `FALSE`. See `?toVariable` for more information (#648).

* The concept of a personal folder is being removed from the API imminently. This has
a few implications for rcrunch:

* All datasets must be created with a project (eg via the `project` argument of `newDataset()`)

* Dataset forks will be created in the same folder as their parent

* Because loading datasets by name doesn't work for datasets in projects, it's not really
possible to load a dataset by name without specifying the full project path.

* To make things easier, it is possible to set a default project path with environment
variable `R_CRUNCH_DEFAULT_PROJECT` or option `crunch.default.project`. This will be used
as the default project folder when creating and loading datasets. Forks will still be put
next to parents.

# crunch 1.30.3
* Fix typo which relied on partial argument matching when using the variable catalog cache
Expand Down
4 changes: 2 additions & 2 deletions R/R-to-variable.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ setGeneric("toVariable", function(x, ...) standardGeneric("toVariable"))
#' materialized on creation (saved as data, which can have performance benefits
#' in certain situation) and when `TRUE` indicates it should remain derived
#' (saved as an expression that can update along with the underlying data)
#' Defaults to `FALSE` unless `envOrOption('crunch.default.derived')` has been set.
#' Defaults to `TRUE` unless `envOrOption('crunch.default.derived')` has been set.
setMethod("toVariable", "CrunchVarOrExpr", function(
x,
...,
Expand Down Expand Up @@ -233,5 +233,5 @@ categoriesFromLevels <- function(level_vect) {
}

derivedVariableDefault <- function() {
envOrOption("crunch.default.derived", FALSE, expect_lgl = TRUE)
envOrOption("crunch.default.derived", TRUE, expect_lgl = TRUE)
}
4 changes: 2 additions & 2 deletions R/append-dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
#' variables, appended to it.
#' @examples
#' \dontrun{
#' ds <- loadDataset("Survey, 2016")
#' new_wave <- loadDataset("Survey, 2017")
#' ds <- loadDataset("Survey, 2016", project = "client 1")
#' new_wave <- loadDataset("Survey, 2017", project = "client 1")
#' ds <- appendDataset(ds, new_wave)
#' }
#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/archive-and-publish.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#' is.published<- publish
#' @examples
#' \dontrun{
#' ds <- loadDataset("mtcars")
#' ds <- loadDataset("mtcars", project = "current")
#' is.draft(ds) # FALSE
#' is.published(ds) # TRUE
#' identical(is.draft(ds), !is.published(ds))
Expand Down
2 changes: 1 addition & 1 deletion R/as-data-frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ textCsvParser <- function(col, ...) col
#' provided to the function, and each row represents a entity.
#' @examples
#' \dontrun{
#' ds <- loadDataset("iris")
#' ds <- loadDataset("iris", project = "ACME")
#' vars <- variables(ds)
#' var_df <- as.data.frame(vars, keys = TRUE)
#' # With row names
Expand Down
3 changes: 3 additions & 0 deletions R/auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,13 @@ setupCrunchAuth <- function(id) {
if (is.null(key)) {
halt("Could not find key in `envOrOption('", paste0("crunch.api.key.", id), "')`")
}
# Allowed to be NULL
default_project <- envOrOption(paste0("crunch.default.project.", id))

set_crunch_opts(
crunch.api = api,
crunch.api.key = key,
crunch.default.project = default_project,
.source = paste0("setupCrunchAuth('", id, "')")
)
}
Expand Down
4 changes: 2 additions & 2 deletions R/cut.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#' it as a derived variable on the server.
#' @examples
#' \dontrun{
#' ds <- loadDataset("mtcars")
#' ds <- loadDataset("mtcars", project = "p1")
#' ds$cat_var <- cut(ds$mpg,
#' breaks = c(10, 15, 20),
#' labels = c("small", "medium"), name = "Fuel efficiency"
Expand Down Expand Up @@ -198,7 +198,7 @@ generateNumCutLabels <- function(dig.lab, breaks, nb, right, include.lowest) {
#' it as a derived variable on the server.
#' @examples
#' \dontrun{
#' ds <- loadDataset("example")
#' ds <- loadDataset("example", project = "client 1")
#' ds$month_cat <- cut(ds$date, breaks = "month", name = "monthly")
#' ds$four_weeks_cat <- cut(ds$date, breaks = "4 weeks", name = "four week categorical date")
#'
Expand Down
9 changes: 9 additions & 0 deletions R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -560,3 +560,12 @@ setDashboardURL <- function(x, value) {
#' @rdname dashboard
#' @export
"dashboard<-" <- setDashboardURL


setMethod("rootFolder", "CrunchDataset", function(x) {
halt(
"Can't find root folder of a dataset. To find the root variable folder use",
"`rootFolder(allVariables(ds))` or to find the root project folder use ",
"`rootFolder(folder(ds))`"
)
})
7 changes: 5 additions & 2 deletions R/delete.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,14 @@ setMethod("delete", "ANY", function(x, ...) {
#' `CrunchDataset`. Unless `x` is a parsed folder path, it can only be of
#' length 1--for your protection, this function is not vectorized.
#' @param ... additional parameters passed to [delete()]
#' @param project `ProjectFolder` entity, character name (path) to a project.
#' Defaults to the project set in `envOrOption('crunch.default.project')`
#' or "./" (the project root), if the default is not set.
#' @return (Invisibly) the API response from deleting the dataset
#' @seealso [delete()]; [cd()] for details of parsing and walking dataset
#' folder/project paths.
#' @export
deleteDataset <- function(x, ...) {
deleteDataset <- function(x, ..., project = defaultCrunchProject("./")) {
if (is.dataset(x)) {
return(delete(x, ...))
}
Expand All @@ -178,7 +181,7 @@ deleteDataset <- function(x, ...) {
}
} else {
# Assume it is a path or name
found <- lookupDataset(x)
found <- lookupDataset(x, project = project)
if (length(found) != 1) {
halt(
dQuote(x), " identifies ", length(found),
Expand Down
10 changes: 6 additions & 4 deletions R/folders.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#' `mkdir()` does not do
#' @examples
#' \dontrun{
#' ds <- loadDataset("Example survey")
#' ds <- loadDataset("Example survey", project = "Studies")
#' ds <- mv(ds, c("gender", "age", "educ"), "Demographics")
#' ds <- mkdir(ds, "Key Performance Indicators/Brand X")
#' # These can also be chained together
Expand Down Expand Up @@ -129,7 +129,7 @@ setName <- function(object, nm) {
#' directory in your local file system, which `cd()` does not do
#' @examples
#' \dontrun{
#' ds <- loadDataset("Example survey")
#' ds <- loadDataset("Example survey", project = "Studies")
#' demo <- cd(ds, "Demographics")
#' names(demo)
#' # Or with %>%
Expand Down Expand Up @@ -185,7 +185,7 @@ cd <- function(x, path, create = FALSE) {
#' from your local file system, which `rmdir()` does not do
#' @examples
#' \dontrun{
#' ds <- loadDataset("Example survey")
#' ds <- loadDataset("Example survey", project = "Studies")
#' rmdir(ds, "Demographics")
#' # Or with %>%
#' require(magrittr)
Expand Down Expand Up @@ -216,7 +216,7 @@ rmdir <- function(x, path) {
#' @export
#' @examples
#' \dontrun{
#' ds <- loadDataset("Example survey")
#' ds <- loadDataset("Example survey", project = "Studies")
#' folder(ds$income) <- "Demographics/Economic"
#' folder(ds$income)
#' ## [1] "Demographics" "Economic"
Expand All @@ -228,6 +228,8 @@ folder <- function(x) {
cls <- class(x)
} else if (is.variable(x)) {
cls <- "VariableFolder"
} else if (is.dataset(x)) {
cls <- "ProjectFolder"
} else {
halt("No folder for object of class ", class(x))
}
Expand Down
9 changes: 6 additions & 3 deletions R/fork-and-merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,16 @@ forks <- function(dataset) {
#' @param draft logical: Should the dataset be a draft, visible only to
#' those with edit permissions? Default is `FALSE`.
#' @param ... Additional dataset metadata to provide to the fork
#' @param project A `ProjectFolder` object, string path that could be passed to [`cd()`]
#' relative to the root project, or a URL for a `ProjectFolder`. Defaults to the same
#' folder as the existing dataset.
#' @return The new fork, a `CrunchDataset`.
#' @seealso [mergeFork()]
#' @export
forkDataset <- function(dataset, name = defaultForkName(dataset), draft = FALSE, ...) {
forkDataset <- function(dataset, name = defaultForkName(dataset), draft = FALSE, ..., project = folder(dataset)) {
## TODO: add owner field, default to self(me())
fork_url <- crPOST(shojiURL(dataset, "catalogs", "forks"),
body = toJSON(wrapEntity(name = name, is_published = !draft, ...))
body = toJSON(wrapEntity(name = name, is_published = !draft, ..., project = resolveProjectURL(project)))
)
dropOnly(sessionURL("datasets"))
invisible(loadDatasetFromURL(fork_url))
Expand Down Expand Up @@ -73,7 +76,7 @@ defaultForkName <- function(dataset) {
#' @seealso [forkDataset()]
#' @examples
#' \dontrun{
#' ds <- loadDataset("My survey")
#' ds <- loadDataset("My survey", project = "Studies")
#' fork <- forkDataset(ds)
#' # Do stuff to fork
#' ds <- mergeFork(ds, fork)
Expand Down
69 changes: 41 additions & 28 deletions R/get-datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' names()
#' # The assignment method lets you move a dataset to a project
#' proj <- cd(projects(), "Important Clients")
#' ds <- loadDataset("New important client survey")
#' ds <- loadDataset("New important client survey", project = "Studies")
#' datasets(proj) <- ds
#' }
datasets <- function(x = getAPIRoot()) {
Expand Down Expand Up @@ -116,42 +116,40 @@ listDatasets <- function(kind = c("active", "all", "archived"),
#' and analysis as if the dataset were fully resident on your computer, without
#' having to pull data locally.
#'
#' You can specify a dataset to load by its human-friendly "name", possibly also
#' by indicating a project (folder) to find it in. This makes code more
#' You can specify a dataset to load by its human-friendly "name", within
#' the project (folder) to find it in. This makes code more
#' readable, but it does mean that if the dataset is renamed or moved to a
#' different folder, your code may no longer work. The fastest, most reliable
#' way to use `loadDataset()` is to provide a URL to the dataset--the dataset's
#' URL will never change.
#'
#' @param dataset character, the name or path to a Crunch dataset to load, or a
#' @param dataset character, the path to a Crunch dataset to load, or a
#' dataset URL. If `dataset` is a path to a dataset in a project, the path will
#' be be parsed and walked, relative to `project` if specified, and the
#' function will look for the dataset inside that project. If no path is
#' specified and no `project` provided, the function will call a search API to
#' do an exact string match on dataset names.
#' be be parsed and walked, relative to `project`, and the function will look
#' for the dataset inside that project. If `dataset` is just a string and `project`
#' is set to `NULL`, the function will assume that `dataset` is the dataset id.
#' @param kind character specifying whether to look in active, archived, or all
#' datasets. Default is "active", i.e. non-archived.
#' @param project `ProjectFolder` entity, character name (path) to a project, or
#' `NULL`, the default. If a Project entity or reference is supplied, either
#' here or as a path in `dataset`, the dataset lookup will be limited to that
#' project only.
#' @param project `ProjectFolder` entity, character name (path) to a project.
#' Defaults to the project set in `envOrOption('crunch.default.project')`
#' or "./" (the project root), if the default is not set.
#' @param refresh logical: should the function check the Crunch API for new
#' datasets? Default is `FALSE`.
#' @return An object of class `CrunchDataset`.
#'
#' @examples
#' \dontrun{
#' ds <- loadDatasets("A special dataset")
#' ds2 <- loadDatasets("~/My dataset")
#' ds3 <- loadDataset("My dataset", project = "~") # Same as ds2
#' ds <- loadDatasets("A special dataset", project = "Studies")
#' ds2 <- loadDatasets("~/My dataset", project = "Studies")
#' ds3 <- loadDataset("My dataset", project = projects()[["Studies"]]) # Same as ds2
#' ds4 <- loadDataset("https://app.crunch.io/api/datasets/bd3ad2/")
#' }
#' @export
#' @seealso See [cd()] for details of parsing and walking dataset folder/project
#' paths.
loadDataset <- function(dataset,
kind = c("active", "all", "archived"),
project = NULL,
project = defaultCrunchProject("."),
refresh = FALSE) {
if (inherits(dataset, "DatasetTuple")) {
return(entity(dataset))
Expand All @@ -174,6 +172,12 @@ loadDataset <- function(dataset,
archived = archived(found)
)
if (length(found) == 0) {
if (missing(project)) {
warn_once(
"Finding datasets by name without specifying a path is no longer supported.",
option = "find.dataset.no.project"
)
}
halt(dQuote(dataset), " not found")
}
## This odd selecting behavior handles the multiple matches case
Expand Down Expand Up @@ -239,27 +243,36 @@ lookupDataset <- function(x, project = NULL) {
# `project`
dspath <- parseFolderPath(x)
x <- tail(dspath, 1)

if (length(dspath) == 1 && is.null(project)) {
# If don't have a project, query by name
return(findDatasetsByName(x))
# This code path used to use the datasets by_name endpoint. However
# As of 2024-11, that endpoint is no longer very useful because it only
# surfaces datasets that are in personal folders (going away very soon) &
# direct dataset shares (deprecated).
# So we use this to load by dataset id, a nice convenience feature.
# To get here, a user had to explicitly set `project=NULL` so they're
# presumably not here accidentally
pseudo_shoji <- tryCatch({
ds_base_url <- absoluteURL("datasets/", envOrOption("crunch.api"))
ds_entity <- crGET(paste0(ds_base_url, "/", x))
# Need to make this pseudo DatasetCatalog to match old API call (because `loadDataset`
# will later pass it through `active()`/`archived()`)
structure(list(
self = ds_base_url,
index = setNames(list(ds_entity$body), ds_entity$self)
), class = "shoji")
}, error = function(...) NULL) # But if we don't find it just return empty catalog

return(DatasetCatalog(pseudo_shoji))
}

# Resolve `project`
if (is.null(project)) {
project <- projects()
} else if (!is.project(project)) {
## Project name, URL, or index
project <- projects()[[project]]
}
if (is.null(project)) {
## Means a project was specified (like by name) but it didn't exist
halt(
"Project ", deparseAndFlatten(eval.parent(Call$project)),
" is not valid"
)
project <- ProjectFolder(crGET(resolveProjectURL(project)))
}

# If there is a path in `x`, walk it within `project`
if (length(dspath) > 1) {
project <- cd(project, dspath[-length(dspath)])
}
Expand Down
5 changes: 3 additions & 2 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ vectorOrList <- function(obj, type) {
#' | crunch.warn.private | R_CRUNCH_WARN_PRIVATE | TRUE | Whether to warn when using a private variable |
#' | crunch.names.includes.hidden.private.variables | R_NAMES_INCLUDES_HIDDEN_PRIVATE_VARIABLES| TRUE | Whether to include hidden/private variables from names(ds) |
#' | crunch.order.var.catalog | R_CRUNCH_ORDER_VAR_CATALOG | TRUE | Whether to set the variable catalog in the order of the hierarchical order |
#' | crunch.default.derived | R_CRUNCH_DEFAULT_DERIVED. | FALSE | Whether to create variables from expressions that are derived (TRUE) or materialized (FALSE) |
#' | crunch.default.derived | R_CRUNCH_DEFAULT_DERIVED. | TRUE | Whether to create variables from expressions that are derived (TRUE) or materialized (FALSE) |
#' | crunch.default.project | R_CRUNCH_DEFAULT_PROJECT | - | Path to a project folder to put new datasets in by default. |
#' | crunch.delimiter | R_CRUNCH_DELIMITER | "/" | What to use as a delimiter when printing folder paths |
#' | crunch.check.updates | R_CRUNCH_CHECK_UPDATES | TRUE | Whether to check for updates to the crunch package |
#' | crunch.debug | R_CRUNCH_DEBUG | FALSE | Whether to print verbose information for debugging |
Expand Down Expand Up @@ -292,7 +293,7 @@ get_crunch_opt <- function(opt) {
}

set_crunch_opt <- function(opt, value, source = NULL) {
if (!is.null(source)) value <- structure(value, source = source)
if (!is.null(source) && !is.null(value)) value <- structure(value, source = source)
CRUNCH_OPTIONS[[opt]] <- value
}

Expand Down
Loading

0 comments on commit 6be88f7

Please sign in to comment.