Merge branch 'default-new-ds-project-188559300' into rel-1736548784-1…

…736548884
Crunch-io · Jan 10, 2025 · 6be88f7 · 6be88f7
2 parents 1f59d1d + 30c98e1
commit 6be88f7
Show file tree

Hide file tree

Showing 342 changed files with 10,773 additions and 10,167 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -577,7 +577,6 @@ importFrom(crayon,italic)
 importFrom(crayon,make_style)
 importFrom(crayon,red)
 importFrom(crayon,underline)
-importFrom(curl,curl_escape)
 importFrom(curl,curl_version)
 importFrom(grDevices,col2rgb)
 importFrom(grDevices,colors)

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,22 @@
 # crunch 1.31.0 (Development Version)
-* Variables are now created as materialized by default instead of derived. If you prefer the 
-  old behavior set environment variable `R_CRUNCH_DEFAULT_DERIVED` or option 
-  `crunch.default.derived` to TRUE. See `?toVariable` for more information (#648).
+* Variables can now be created as materialized by default instead of derived, 
+  by setting environment variable `R_CRUNCH_DEFAULT_DERIVED` or option 
+  `crunch.default.derived` to `FALSE`. See `?toVariable` for more information (#648).
+
+* The concept of a personal folder is being removed from the API imminently. This has
+  a few implications for rcrunch:
+
+  * All datasets must be created with a project (eg via the `project` argument of `newDataset()`)
+
+  * Dataset forks will be created in the same folder as their parent
+
+  * Because loading datasets by name doesn't work for datasets in projects, it's not really
+  possible to load a dataset by name without specifying the full project path.
+
+  * To make things easier, it is possible to set a default project path with environment
+  variable `R_CRUNCH_DEFAULT_PROJECT` or option `crunch.default.project`. This will be used
+  as the default project folder when creating and loading datasets. Forks will still be put
+  next to parents.
 
 # crunch 1.30.3
 * Fix typo which relied on partial argument matching when using the variable catalog cache

diff --git a/R/R-to-variable.R b/R/R-to-variable.R
@@ -46,7 +46,7 @@ setGeneric("toVariable", function(x, ...) standardGeneric("toVariable"))
 #' materialized on creation (saved as data, which can have performance benefits
 #' in certain situation) and when `TRUE` indicates it should remain derived
 #' (saved as an expression that can update along with the underlying data)
-#' Defaults to `FALSE` unless `envOrOption('crunch.default.derived')` has been set.
+#' Defaults to `TRUE` unless `envOrOption('crunch.default.derived')` has been set.
 setMethod("toVariable", "CrunchVarOrExpr", function(
         x,
         ...,
@@ -233,5 +233,5 @@ categoriesFromLevels <- function(level_vect) {
 }
 
 derivedVariableDefault <- function() {
-    envOrOption("crunch.default.derived", FALSE, expect_lgl = TRUE)
+    envOrOption("crunch.default.derived", TRUE, expect_lgl = TRUE)
 }
diff --git a/R/append-dataset.R b/R/append-dataset.R
@@ -36,8 +36,8 @@
 #' variables, appended to it.
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("Survey, 2016")
-#' new_wave <- loadDataset("Survey, 2017")
+#' ds <- loadDataset("Survey, 2016", project = "client 1")
+#' new_wave <- loadDataset("Survey, 2017", project = "client 1")
 #' ds <- appendDataset(ds, new_wave)
 #' }
 #' @export

diff --git a/R/archive-and-publish.R b/R/archive-and-publish.R
@@ -17,7 +17,7 @@
 #' is.published<- publish
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("mtcars")
+#' ds <- loadDataset("mtcars", project = "current")
 #' is.draft(ds) # FALSE
 #' is.published(ds) # TRUE
 #' identical(is.draft(ds), !is.published(ds))

diff --git a/R/as-data-frame.R b/R/as-data-frame.R
@@ -261,7 +261,7 @@ textCsvParser <- function(col, ...) col
 #' provided to the function, and each row represents a entity.
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("iris")
+#' ds <- loadDataset("iris", project = "ACME")
 #' vars <- variables(ds)
 #' var_df <- as.data.frame(vars, keys = TRUE)
 #' # With row names

diff --git a/R/auth.R b/R/auth.R
@@ -133,10 +133,13 @@ setupCrunchAuth <- function(id) {
     if (is.null(key)) {
         halt("Could not find key in `envOrOption('", paste0("crunch.api.key.", id), "')`")
     }
+    # Allowed to be NULL
+    default_project <- envOrOption(paste0("crunch.default.project.", id))
 
     set_crunch_opts(
         crunch.api = api,
         crunch.api.key = key,
+        crunch.default.project = default_project,
         .source = paste0("setupCrunchAuth('", id, "')")
     )
 }

diff --git a/R/cut.R b/R/cut.R
@@ -37,7 +37,7 @@
 #' it as a derived variable on the server.
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("mtcars")
+#' ds <- loadDataset("mtcars", project = "p1")
 #' ds$cat_var <- cut(ds$mpg,
 #'     breaks = c(10, 15, 20),
 #'     labels = c("small", "medium"), name = "Fuel efficiency"
@@ -198,7 +198,7 @@ generateNumCutLabels <- function(dig.lab, breaks, nb, right, include.lowest) {
 #' it as a derived variable on the server.
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("example")
+#' ds <- loadDataset("example", project = "client 1")
 #' ds$month_cat <- cut(ds$date, breaks = "month", name = "monthly")
 #' ds$four_weeks_cat <- cut(ds$date, breaks = "4 weeks", name = "four week categorical date")
 #'

diff --git a/R/dataset.R b/R/dataset.R
@@ -560,3 +560,12 @@ setDashboardURL <- function(x, value) {
 #' @rdname dashboard
 #' @export
 "dashboard<-" <- setDashboardURL
+
+
+setMethod("rootFolder", "CrunchDataset", function(x) {
+    halt(
+        "Can't find root folder of a dataset. To find the root variable folder use",
+        "`rootFolder(allVariables(ds))` or to find the root project folder use ",
+        "`rootFolder(folder(ds))`"
+        )
+})
diff --git a/R/delete.R b/R/delete.R
@@ -161,11 +161,14 @@ setMethod("delete", "ANY", function(x, ...) {
 #' `CrunchDataset`. Unless `x` is a parsed folder path, it can only be of
 #' length 1--for your protection, this function is not vectorized.
 #' @param ... additional parameters passed to [delete()]
+#' @param project `ProjectFolder` entity, character name (path) to a project.
+#' Defaults to the project set in `envOrOption('crunch.default.project')`
+#' or "./" (the project root), if the default is not set.
 #' @return (Invisibly) the API response from deleting the dataset
 #' @seealso [delete()]; [cd()] for details of parsing and walking dataset
 #' folder/project paths.
 #' @export
-deleteDataset <- function(x, ...) {
+deleteDataset <- function(x, ..., project = defaultCrunchProject("./")) {
     if (is.dataset(x)) {
         return(delete(x, ...))
     }
@@ -178,7 +181,7 @@ deleteDataset <- function(x, ...) {
             }
         } else {
             # Assume it is a path or name
-            found <- lookupDataset(x)
+            found <- lookupDataset(x, project = project)
             if (length(found) != 1) {
                 halt(
                     dQuote(x), " identifies ", length(found),

diff --git a/R/folders.R b/R/folders.R
@@ -32,7 +32,7 @@
 #' `mkdir()` does not do
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("Example survey")
+#' ds <- loadDataset("Example survey", project = "Studies")
 #' ds <- mv(ds, c("gender", "age", "educ"), "Demographics")
 #' ds <- mkdir(ds, "Key Performance Indicators/Brand X")
 #' # These can also be chained together
@@ -129,7 +129,7 @@ setName <- function(object, nm) {
 #' directory in your local file system, which `cd()` does not do
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("Example survey")
+#' ds <- loadDataset("Example survey", project = "Studies")
 #' demo <- cd(ds, "Demographics")
 #' names(demo)
 #' # Or with %>%
@@ -185,7 +185,7 @@ cd <- function(x, path, create = FALSE) {
 #' from your local file system, which `rmdir()` does not do
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("Example survey")
+#' ds <- loadDataset("Example survey", project = "Studies")
 #' rmdir(ds, "Demographics")
 #' # Or with %>%
 #' require(magrittr)
@@ -216,7 +216,7 @@ rmdir <- function(x, path) {
 #' @export
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("Example survey")
+#' ds <- loadDataset("Example survey", project = "Studies")
 #' folder(ds$income) <- "Demographics/Economic"
 #' folder(ds$income)
 #' ## [1] "Demographics"    "Economic"
@@ -228,6 +228,8 @@ folder <- function(x) {
         cls <- class(x)
     } else if (is.variable(x)) {
         cls <- "VariableFolder"
+    } else if (is.dataset(x)) {
+        cls <- "ProjectFolder"
     } else {
         halt("No folder for object of class ", class(x))
     }

diff --git a/R/fork-and-merge.R b/R/fork-and-merge.R
@@ -24,13 +24,16 @@ forks <- function(dataset) {
 #' @param draft logical: Should the dataset be a draft, visible only to
 #' those with edit permissions? Default is `FALSE`.
 #' @param ... Additional dataset metadata to provide to the fork
+#' @param project A `ProjectFolder` object, string path that could be passed to [`cd()`]
+#' relative to the root project, or a URL for a `ProjectFolder`. Defaults to the same
+#' folder as the existing dataset.
 #' @return The new fork, a `CrunchDataset`.
 #' @seealso [mergeFork()]
 #' @export
-forkDataset <- function(dataset, name = defaultForkName(dataset), draft = FALSE, ...) {
+forkDataset <- function(dataset, name = defaultForkName(dataset), draft = FALSE, ..., project = folder(dataset)) {
     ## TODO: add owner field, default to self(me())
     fork_url <- crPOST(shojiURL(dataset, "catalogs", "forks"),
-        body = toJSON(wrapEntity(name = name, is_published = !draft, ...))
+        body = toJSON(wrapEntity(name = name, is_published = !draft, ..., project = resolveProjectURL(project)))
     )
     dropOnly(sessionURL("datasets"))
     invisible(loadDatasetFromURL(fork_url))
@@ -73,7 +76,7 @@ defaultForkName <- function(dataset) {
 #' @seealso [forkDataset()]
 #' @examples
 #' \dontrun{
-#' ds <- loadDataset("My survey")
+#' ds <- loadDataset("My survey", project = "Studies")
 #' fork <- forkDataset(ds)
 #' # Do stuff to fork
 #' ds <- mergeFork(ds, fork)

diff --git a/R/get-datasets.R b/R/get-datasets.R
@@ -25,7 +25,7 @@
 #'     names()
 #' # The assignment method lets you move a dataset to a project
 #' proj <- cd(projects(), "Important Clients")
-#' ds <- loadDataset("New important client survey")
+#' ds <- loadDataset("New important client survey", project = "Studies")
 #' datasets(proj) <- ds
 #' }
 datasets <- function(x = getAPIRoot()) {
@@ -116,42 +116,40 @@ listDatasets <- function(kind = c("active", "all", "archived"),
 #' and analysis as if the dataset were fully resident on your computer, without
 #' having to pull data locally.
 #'
-#' You can specify a dataset to load by its human-friendly "name", possibly also
-#' by indicating a project (folder) to find it in. This makes code more
+#' You can specify a dataset to load by its human-friendly "name", within
+#' the project (folder) to find it in. This makes code more
 #' readable, but it does mean that if the dataset is renamed or moved to a
 #' different folder, your code may no longer work. The fastest, most reliable
 #' way to use `loadDataset()` is to provide a URL to the dataset--the dataset's
 #' URL will never change.
 #'
-#' @param dataset character, the name or path to a Crunch dataset to load, or a
+#' @param dataset character, the path to a Crunch dataset to load, or a
 #' dataset URL. If `dataset` is a path to a dataset in a project, the path will
-#' be be parsed and walked, relative to `project` if specified, and the
-#' function will look for the dataset inside that project. If no path is
-#' specified and no `project` provided, the function will call a search API to
-#' do an exact string match on dataset names.
+#' be be parsed and walked, relative to `project`, and the  function will look
+#' for the dataset inside that project. If `dataset` is just a string and `project`
+#' is set to `NULL`, the function will assume that `dataset` is the dataset id.
 #' @param kind character specifying whether to look in active, archived, or all
 #' datasets. Default is "active", i.e. non-archived.
-#' @param project `ProjectFolder` entity, character name (path) to a project, or
-#' `NULL`, the default. If a Project entity or reference is supplied, either
-#' here or as a path in `dataset`, the dataset lookup will be limited to that
-#' project only.
+#' @param project `ProjectFolder` entity, character name (path) to a project.
+#' Defaults to the project set in `envOrOption('crunch.default.project')`
+#' or "./" (the project root), if the default is not set.
 #' @param refresh logical: should the function check the Crunch API for new
 #' datasets? Default is `FALSE`.
 #' @return An object of class `CrunchDataset`.
 #'
 #' @examples
 #' \dontrun{
-#' ds <- loadDatasets("A special dataset")
-#' ds2 <- loadDatasets("~/My dataset")
-#' ds3 <- loadDataset("My dataset", project = "~") # Same as ds2
+#' ds <- loadDatasets("A special dataset", project = "Studies")
+#' ds2 <- loadDatasets("~/My dataset", project = "Studies")
+#' ds3 <- loadDataset("My dataset", project = projects()[["Studies"]]) # Same as ds2
 #' ds4 <- loadDataset("https://app.crunch.io/api/datasets/bd3ad2/")
 #' }
 #' @export
 #' @seealso See [cd()] for details of parsing and walking dataset folder/project
 #' paths.
 loadDataset <- function(dataset,
                         kind = c("active", "all", "archived"),
-                        project = NULL,
+                        project = defaultCrunchProject("."),
                         refresh = FALSE) {
     if (inherits(dataset, "DatasetTuple")) {
         return(entity(dataset))
@@ -174,6 +172,12 @@ loadDataset <- function(dataset,
             archived = archived(found)
         )
         if (length(found) == 0) {
+            if (missing(project)) {
+                warn_once(
+                    "Finding datasets by name without specifying a path is no longer supported.",
+                    option = "find.dataset.no.project"
+                )
+            }
             halt(dQuote(dataset), " not found")
         }
         ## This odd selecting behavior handles the multiple matches case
@@ -239,27 +243,36 @@ lookupDataset <- function(x, project = NULL) {
     # `project`
     dspath <- parseFolderPath(x)
     x <- tail(dspath, 1)
+
     if (length(dspath) == 1 && is.null(project)) {
-        # If don't have a project, query by name
-        return(findDatasetsByName(x))
+        # This code path used to use the datasets by_name endpoint. However
+        # As of 2024-11, that endpoint is no longer very useful because it only
+        # surfaces datasets that are in personal folders (going away very soon) &
+        # direct dataset shares (deprecated).
+        # So we use this to load by dataset id, a nice convenience feature.
+        # To get here, a user had to explicitly set `project=NULL` so they're
+        # presumably not here accidentally
+        pseudo_shoji <- tryCatch({
+            ds_base_url <- absoluteURL("datasets/", envOrOption("crunch.api"))
+            ds_entity <- crGET(paste0(ds_base_url, "/", x))
+            # Need to make this pseudo DatasetCatalog to match old API call (because `loadDataset`
+            # will later pass it through `active()`/`archived()`)
+            structure(list(
+                self = ds_base_url,
+                index = setNames(list(ds_entity$body), ds_entity$self)
+            ), class = "shoji")
+        }, error = function(...) NULL) # But if we don't find it just return empty catalog
+
+        return(DatasetCatalog(pseudo_shoji))
     }
 
     # Resolve `project`
     if (is.null(project)) {
         project <- projects()
     } else if (!is.project(project)) {
-        ## Project name, URL, or index
-        project <- projects()[[project]]
-    }
-    if (is.null(project)) {
-        ## Means a project was specified (like by name) but it didn't exist
-        halt(
-            "Project ", deparseAndFlatten(eval.parent(Call$project)),
-            " is not valid"
-        )
+        project <- ProjectFolder(crGET(resolveProjectURL(project)))
     }
 
-    # If there is a path in `x`, walk it within `project`
     if (length(dspath) > 1) {
         project <- cd(project, dspath[-length(dspath)])
     }

diff --git a/R/misc.R b/R/misc.R
@@ -211,7 +211,8 @@ vectorOrList <- function(obj, type) {
 #' | crunch.warn.private          | R_CRUNCH_WARN_PRIVATE          | TRUE          | Whether to warn when using a private variable                               |
 #' | crunch.names.includes.hidden.private.variables | R_NAMES_INCLUDES_HIDDEN_PRIVATE_VARIABLES| TRUE | Whether to include hidden/private variables from names(ds) |
 #' | crunch.order.var.catalog     | R_CRUNCH_ORDER_VAR_CATALOG     | TRUE          | Whether to set the variable catalog in the order of the hierarchical order  |
-#' | crunch.default.derived       | R_CRUNCH_DEFAULT_DERIVED.      | FALSE         | Whether to create variables from expressions that are derived (TRUE) or materialized (FALSE) |
+#' | crunch.default.derived       | R_CRUNCH_DEFAULT_DERIVED.      | TRUE          | Whether to create variables from expressions that are derived (TRUE) or materialized (FALSE) |
+#' | crunch.default.project       | R_CRUNCH_DEFAULT_PROJECT       | -             | Path to a project folder to put new datasets in by default.                 |
 #' | crunch.delimiter             | R_CRUNCH_DELIMITER             | "/"           | What to use as a delimiter when printing folder paths                       |
 #' | crunch.check.updates         | R_CRUNCH_CHECK_UPDATES         | TRUE          | Whether to check for updates to the crunch package                          |
 #' | crunch.debug                 | R_CRUNCH_DEBUG                 | FALSE         | Whether to print verbose information for debugging                          |
@@ -292,7 +293,7 @@ get_crunch_opt <- function(opt) {
 }
 
 set_crunch_opt <- function(opt, value, source = NULL) {
-    if (!is.null(source)) value <- structure(value, source = source)
+    if (!is.null(source) && !is.null(value)) value <- structure(value, source = source)
     CRUNCH_OPTIONS[[opt]] <- value
 }