Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add derived subvar #453

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 139 additions & 6 deletions R/add-subvariable.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,38 @@
#' @export
addSubvariable <- function(variable, subvariable) {
stopifnot(is.Array(variable))
if (!is.derived(variable)) {
addSubvariablePrimary(variable, subvariable)
} else {
addSubvariableDerived(variable, subvariable)
}
}

#' @rdname addSubvariable
#' @export
addSubvariables <- addSubvariable

addSubvariablePrimary <- function(variable, subvariable) {
new.urls <- addSubvarDef(variable, subvariable)

## Store these for post workaround
subvar.urls <- subvariableURLs(tuple(variable))

## Do the adding
crPATCH(shojiURL(variable, "catalogs", "subvariables"),
body = toJSON(sapply(new.urls, emptyObject, simplify = FALSE))
body = toJSON(sapply(new.urls, emptyObject, simplify = FALSE))
)

## Workaround because apparently bind/rebind isn't retaining the order
crPATCH(self(variable),
body = toJSON(list(subvariables = I(c(subvar.urls, new.urls))))
body = toJSON(list(subvariables = I(c(subvar.urls, new.urls))))
)

## Refresh and return
dropCache(datasetReference(variable))
return(invisible(refresh(variable)))
}

#' @rdname addSubvariable
#' @export
addSubvariables <- addSubvariable

addSubvarDef <- function(var, subvar) {
## Input can be a variable, subvariable, dataset subset or
## a mixed or uniform list of variables and subvariables this
Expand Down Expand Up @@ -67,3 +75,128 @@ addSubvarDef <- function(var, subvar) {
}
return(as.character(out))
}


addSubvariableDerived <- function(variable, subvariable) {
if (is.VarDef(subvariable) | is.variable(subvariable)) subvariable <- list(subvariable)
if (is.catalog(subvariable) | is.dataset(subvariable)) subvariable <- lapply(
seq_along(subvariable),
function(var_num) subvariable[[var_num]]
)
# bypass `derivation(variable)` because `select` zcl function has ids
# not urls and so absolutifyURL mangles url
# TODO: use `derivation()` when select has relative urls (pivotal ticket: ???)
old_deriv <- CrunchExpr(expression = entity(variable)@body$derivation)

if (isSelectDerivation(old_deriv)) {
new_deriv <- addToSelectDerivation(old_deriv, subvariable)
} else if (isSelectCatDerivation(old_deriv)) {
new_deriv <- addToSelectCatDerivation(old_deriv, subvariable, categories(variable))
} else {
halt("Could not add subvariable because did not recognize variable derivation structure")
}

derivation(variable) <- new_deriv
# We don't get metadata from original variable like we would if we were creating
# subvariable during original derivation...
# TODO: It would be nice to update name in the same POST as updating derivation
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a derivation can have a subreferences field? Is that the best way?

# to ensure consistency, but I don't see how yet
# TODO: Also, the alias is really ugly for newly created subvariables
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to get the server's naming logic? I tried just adding the alias explicitly, hoping that the server would dedup for me, but this resulted in a failure.

# TODO: Do subvars have other metadata (description/notes), do we need to copy it as well?
new_name_pos <- seq(length(subvariables(variable)) - length(subvariable))
names(subvariables(variable))[-new_name_pos] <- vapply(subvariable, name, character(1))
dropCache(datasetReference(variable))
return(invisible(refresh(variable)))
}

# If select derivation, can put both existing variables and var defs inside select
isSelectDerivation <- function(deriv) {
deriv@expression[["function"]] == "array" &&
deriv@expression[["args"]][[1]][["function"]] == "select"
}

addToSelectDerivation <- function(deriv, new_vars) {
new_vars <- lapply(new_vars, varUrlOrExpression)

new_deriv <- deriv
current_map <- new_deriv@expression$args[[1]]$args[[1]]$map
max_map_name <- max(as.numeric(names(current_map)))

new_deriv@expression$args[[1]]$args[[1]]$map <- c(
current_map,
setNames(new_vars, seq_along(new_vars) + max_map_name)
)

new_deriv@expression$args[[1]]$args[[2]]$value <- c(
new_deriv@expression$args[[1]]$args[[2]]$value,
lapply(seq_along(new_vars) + max_map_name, as.character)
)

new_deriv
}


# if select_cat derivation, existing categorical variables without selections must not add
# any new categories to existing array. var defs cannot be checked but are assumed to also
# have the same categories
# TODO: this does not allow you to add a new subvar where you deliberately choose the
# selected categories which would be nice
isSelectCatDerivation <- function(deriv) {
deriv@expression[["function"]] == "select_categories" &&
deriv@expression[["args"]][[1]][["function"]] == "array" &&
deriv@expression[["args"]][[1]][["args"]][[1]][["function"]] == "select"
}

addToSelectCatDerivation <- function(deriv, new_vars, existing_cats) {
new_vars_are_expressions <- vapply(new_vars, is.VarDef, logical(1))
checkNewSubvarCats(new_vars[!new_vars_are_expressions], existing_cats)
new_vars <- lapply(new_vars, varUrlOrExpression)

new_deriv <- deriv
current_map <- new_deriv@expression$args[[1]]$args[[1]]$args[[1]]$map
max_map_name <- max(as.numeric(names(current_map)))

new_deriv@expression$args[[1]]$args[[1]]$args[[1]]$map <- c(
current_map,
setNames(new_vars, seq_along(new_vars) + max_map_name
)
)

new_deriv@expression$args[[1]]$args[[1]]$args[[2]]$value <- c(
new_deriv@expression$args[[1]]$args[[1]]$args[[2]]$value,
lapply(seq_along(new_vars) + max_map_name, as.character)
)

new_deriv
}

varUrlOrExpression <- function(var) {
if (is.VarDef(var)) {
out <- var$derivation
out$references <- var[names(var) != "derivation"]
out
} else {
list(variable = self(var))
}
}

checkNewSubvarCats <- function(vars, cats) {
new_cat_names <- lapply(vars, function(var) {
setdiff(names(categories(var)), names(cats))
})

if (any(lengths(new_cat_names) > 0)) {
var_aliases <- vapply(vars[lengths(new_cat_names) > 0], alias, character(1))
cats_for_vars <- vapply(
new_cat_names[lengths(new_cat_names) > 0],
function(cats) paste(cats, collapse = ", "),
character(1)
)
msg <- paste0(
"Some existing variables have categories not already present in the MR variable, so ",
"cannot add subvariables.\n ",
paste0(var_aliases, "(", cats_for_vars, ")", collapse = ", ")
)
halt(msg)
}
}
13 changes: 13 additions & 0 deletions R/variable-definition.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,16 @@ is.VariableDefinition <- function(x) inherits(x, "VariableDefinition")
is.VarDef <- is.VariableDefinition

setOldClass("VariableDefinition")

#' @rdname describe-entity
#' @export
setMethod("name", "VariableDefinition", function(x) x$name)

#' @rdname describe-entity
#' @export
setMethod("description", "VariableDefinition", function(x) x$description %||% "")

#' @rdname describe-entity
#' @export
setMethod("notes", "VariableDefinition", function(x) x$notes %||% "")
setMethod("notes", "VariableDefinition", function(x) x$notes %||% "")
99 changes: 99 additions & 0 deletions inst/app.crunch.io/api/datasets/40ccf1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
"element": "shoji:entity",
"self": "https://app.crunch.io/api/datasets/40ccf1/",
"catalogs": {
"folders": "https://app.crunch.io/api/datasets/40ccf1/folders/",
"users": "https://app.crunch.io/api/datasets/40ccf1/users/",
"views": "https://app.crunch.io/api/datasets/40ccf1/views/",
"variables": "https://app.crunch.io/api/datasets/40ccf1/variables/",
"actions": "https://app.crunch.io/api/datasets/40ccf1/actions/",
"boxdata": "https://app.crunch.io/api/datasets/40ccf1/boxdata/",
"filters": "https://app.crunch.io/api/datasets/40ccf1/filters/",
"scripts": "https://app.crunch.io/api/datasets/40ccf1/scripts/",
"permissions": "https://app.crunch.io/api/datasets/40ccf1/permissions/",
"batches": "https://app.crunch.io/api/datasets/40ccf1/batches/",
"tags": "https://app.crunch.io/api/datasets/40ccf1/tags/",
"teams": "https://app.crunch.io/api/datasets/40ccf1/teams/",
"savepoints": "https://app.crunch.io/api/datasets/40ccf1/savepoints/",
"multitables": "https://app.crunch.io/api/datasets/40ccf1/multitables/",
"forks": "https://app.crunch.io/api/datasets/40ccf1/forks/",
"decks": "https://app.crunch.io/api/datasets/40ccf1/decks/",
"parent": "https://app.crunch.io/api/datasets/",
"project": "https://app.crunch.io/api/projects/personal/",
"variables_private": "https://app.crunch.io/api/datasets/40ccf1/variables/private/"
},
"fragments": {
"preferences": "https://app.crunch.io/api/datasets/40ccf1/preferences/",
"stream": "https://app.crunch.io/api/datasets/40ccf1/stream/",
"settings": "https://app.crunch.io/api/datasets/40ccf1/settings/",
"exclusion": "https://app.crunch.io/api/datasets/40ccf1/exclusion/",
"publish": "https://app.crunch.io/api/datasets/40ccf1/publish/",
"state": "https://app.crunch.io/api/datasets/40ccf1/state/",
"table": "https://app.crunch.io/api/datasets/40ccf1/table/",
"pk": "https://app.crunch.io/api/datasets/40ccf1/pk/",
"schema": "https://app.crunch.io/api/datasets/40ccf1/schema/"
},
"views": {
"second_order_analysis": "https://app.crunch.io/api/datasets/40ccf1/second_order_analysis/",
"cube": "https://app.crunch.io/api/datasets/40ccf1/cube/",
"export": "https://app.crunch.io/api/datasets/40ccf1/export/",
"summary": "https://app.crunch.io/api/datasets/40ccf1/summary/",
"applied_filters": "https://app.crunch.io/api/datasets/40ccf1/filters/applied/"
},
"description": "Detail for a given dataset",
"body": {
"maintainer": "https://app.crunch.io/api/users/27a158/",
"current_editor": "https://app.crunch.io/api/users/27a158/",
"creation_time": "2020-05-22T20:48:13.905000",
"streaming": "no",
"owner": "https://app.crunch.io/api/users/27a158/",
"logo": {
"large": "",
"small": "",
"favicon": ""
},
"id": "40ccf1",
"size": {
"rows": 5,
"unfiltered_rows": 5,
"columns": 7
},
"palette": {
"brand": {
"message": "#722580",
"primary": "#005283",
"secondary": "#107f65"
}
},
"app_settings": {

},
"start_date": null,
"owner_name": "Greg",
"description": "",
"end_date": null,
"access_time": "2020-05-22T23:43:42.945000",
"current_editor_name": "Greg",
"view_of": null,
"modification_time": "2020-05-22T23:44:35.013000",
"archived": false,
"permissions": {
"edit": true,
"view": true
},
"account": "https://app.crunch.io/api/accounts/00001/",
"name": "combine into array test",
"notes": "",
"is_published": true
},
"orders": {
"variables_weights": "https://app.crunch.io/api/datasets/40ccf1/variables/weights/",
"variables_hier": "https://app.crunch.io/api/datasets/40ccf1/variables/hier/",
"variables_personal": "https://app.crunch.io/api/datasets/40ccf1/variables/personal/"
},
"urls": {
"owner_url": "https://app.crunch.io/api/projects/378ad4/",
"editor_url": "https://app.crunch.io/api/users/27a158/",
"user_url": "https://app.crunch.io/api/users/27a158/"
}
}
Loading