diff --git a/.github/workflows/pkgdown_deploy.yml b/.github/workflows/pkgdown_deploy.yml index 76f4062..52d2820 100644 --- a/.github/workflows/pkgdown_deploy.yml +++ b/.github/workflows/pkgdown_deploy.yml @@ -27,7 +27,7 @@ jobs: shell: Rscript {0} - name: Restore R package cache - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} diff --git a/.github/workflows/test-coverage.yml b/.github/workflows/test-coverage.yml index b2889c5..684eca0 100644 --- a/.github/workflows/test-coverage.yml +++ b/.github/workflows/test-coverage.yml @@ -30,7 +30,7 @@ jobs: shell: Rscript {0} - name: Restore R package cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.R_LIBS_USER }} key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} diff --git a/DESCRIPTION b/DESCRIPTION index 01a3899..2109d25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: austraits Title: Helpful functions to access the AusTraits database and wrangle data from other traits.build databases -Version: 3.0.1 +Version: 3.0.2 Authors@R: c(person(given = "Daniel", family = "Falster", diff --git a/R/checks.R b/R/checks.R new file mode 100644 index 0000000..4540177 --- /dev/null +++ b/R/checks.R @@ -0,0 +1,105 @@ +# Check missingness + +check_arg_missingness <- function(database, col, col_value){ + if(rlang::is_missing(database) | rlang::is_missing(col) | rlang::is_missing(col_value)) + cli::cli_abort(c( + "x" = "`database`, `col` or `col_value` must be supplied!" + ) + ) +} + +# Check if table name exists +# Note that users can supply the $traits table to database, for that reason table can be NA +# First check if table is NA + +check_table_name_exists <- function(database, table){ + if(!is.na(table) & !tibble::is_tibble(database)){ # database is NOT $traits and `table` is supplied + if(! names(database) %in% table |> any()){ # Does any names of the tables in database contain `table` + cli::cli_abort( + c( + "x" = "`{table}` is not a valid table name", + "i" = "Check `names(database)` and try again!" + ) + ) + } + } +} + +# Check if col exists in specified table when database is traits.build object +check_col_exists_in_table <- function(database, table, col){ + # If traits table supplied and no table is specified + if(tibble::is_tibble(database)){ + if(! names(database) %in% col |> any()){ # Does any names in table contain `col` + cli::cli_abort(c( + "x" = "`{col}` is not a valid column name in the `traits` table", + "i" = "Check `names(database$traits)` and try again!" + ) + ) + } + } else( + if(! names(database[[table]]) %in% col |> any()){ # Does any names in table contain `col` + cli::cli_abort(c( + "x" = "`{col}` is not a valid column name in the `{table}` table", + "i" = "Check `names(database${table})` and try again!" + ) + ) + } + ) +} + +# Check if col_value exists in the col after attempted extraction +# Accommodating for multiple values supplied AND partial matching + +check_col_value_exists <- function(ret, table, col, col_value){ + if(tibble::is_tibble(ret)){ + if(nrow(ret) == 0) + cli::cli_abort(c( + "x" = "`{col_value}` is not a valid value in `{col}` column of the `traits` table", + "i" = "Check spelling of `{col_value}` and try again!" + ) + ) + } else( + + if(nrow(ret$traits) == 0) + cli::cli_abort(c( + "x" = "`{col_value}` is not a valid value in `{col}` column of the `{table}` table", + "i" = "Check spelling of `{col_value}` and try again!" + ) + ) + ) +} + +# +# # Get possible col values +# available_values <- database[[table]][col] |> dplyr::pull() |> unique() +# +# # Check if there are non-matches +# if(length(col_value) > 1) +# concat_col_value <- paste(col_value, collapse = "|") +# +# partial_matches <- stringr::str_detect(available_values, concat_col_value) +# +# if(length(partial_matches) > 1) +# +# # Prompt user which one is non-match +# if(length(no_match) >= 1){ +# cli::cli_warn(c("x" = "`{no_match}` is not a valid value in `{col}` of the `{table}` table")) +# } + +# # Check if col_value exists in the col +# # Accommodating for multiple values supplied +# +# # Get possible col values +# available_values <- database[[table]][col] |> dplyr::pull() |> unique() +# +# # Check if there are non-matches +# no_match <- col_value[which(! col_value %in% available_values)] +# +# # Identify matches +# matches <- col_value[which(col_value %in% available_values)] +# +# # Prompt user which one is non-match +# if(length(no_match) > 0){ +# cli::cli_warn("`{no_match}` is not a valid value in `{col}` of the `{table}` table") +# cli::cli_alert_success("Continuing data extraction for {.val {matches}}") +# } diff --git a/R/extract_data.R b/R/extract_data.R index 42d7086..dc19081 100644 --- a/R/extract_data.R +++ b/R/extract_data.R @@ -21,6 +21,9 @@ #' } extract_data <- function(database, table = NA, col, col_value) { + # Check missingness + check_arg_missingness(database, col, col_value) + # Check compatability status <- check_compatibility(database, single_table_allowed = TRUE) @@ -29,9 +32,14 @@ extract_data <- function(database, table = NA, col, col_value) { function_not_supported(database) } + # Check table value is valid + check_table_name_exists(database, table) + # If just the traits table is read in if (tibble::is_tibble(database)) { + check_col_exists_in_table(database, table, col) + indicies_tmp <- purrr::map(col_value, ~{ stringr::str_which(database[[col]], pattern = stringr::regex(.x, ignore_case = TRUE)) @@ -42,10 +50,16 @@ extract_data <- function(database, table = NA, col, col_value) { # Trim traits, based on the columns identified ret <- database %>% dplyr::slice(found_indicies) + + check_col_value_exists(ret, table, col, col_value) # If a full traits.build database is read in } else { + + # Check if col exists in table within database + check_col_exists_in_table(database, table, col) + # Proceed to extraction database$contexts <- database$contexts %>% tidyr::separate_longer_delim(link_vals, delim = ", ") database$contexts_tmp <- split(database$contexts, database$contexts$link_id) @@ -235,10 +249,18 @@ extract_data <- function(database, table = NA, col, col_value) { } - # Assign class - attr(ret, "class") <- "traits.build" - - ret + # Check full database is provided, assign class + if(!tibble::is_tibble(ret)){ + + # Check if extraction was successful based on col value + check_col_value_exists(ret, table, col, col_value) + + # Assign class + attr(ret, "class") <- "traits.build" + } + + ret + } diff --git a/R/helper.R b/R/helper.R deleted file mode 100644 index 3d3eefb..0000000 --- a/R/helper.R +++ /dev/null @@ -1,30 +0,0 @@ - - -test_database_structure <- function(database, taxa = NA, dataset_id = NA, n_row = NA) { - - table_names <- c("traits", "locations", "contexts", "methods", "excluded_data", "taxonomic_updates", "taxa", "contributors", - "sources", "definitions", "schema", "metadata", "build_info") - - expect_type(database, "list") - # should this be "traits.build or austraits?? - #expect_equal(class(database), "austraits") - expect_equal(class(database), "traits.build") - - expect_equal(names(database), table_names) - - expect_contains(database$traits$taxon_name |> unique(), database$taxa$taxon_name |> unique()) - expect_contains(database$traits$dataset_id |> unique(), database$methods$dataset_id |> unique()) - expect_contains(paste(database$traits$dataset_id, database$traits$trait_name) |> unique(), paste(database$methods$dataset_id, database$methods$trait_name) |> unique()) - - if(!is.na(taxa)) { - expect_contains(database$traits$taxon_name |> unique(), taxa |> unique()) - } - - if(!is.na(dataset_id)) { - expect_contains(database$traits$dataset_id |> unique(), dataset_id |> unique()) - } - - if(!is.na(n_row)) { - expect_equal(database$traits |> nrow(), n_row) - } -} diff --git a/R/print.traits.build.R b/R/print.traits.build.R index e0d4b7c..70ffac8 100644 --- a/R/print.traits.build.R +++ b/R/print.traits.build.R @@ -14,6 +14,8 @@ print.traits.build <- function(x, ...){ nspecies <- unique(x$traits$taxon_name) %>% length() ntraits <- unique(x$traits$trait_name) %>% length() + if(tibble::is_tibble(x)) return(x) + if(check_compatibility(x)){ database_name <- x$metadata$title diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R new file mode 100644 index 0000000..e379b25 --- /dev/null +++ b/tests/testthat/helper.R @@ -0,0 +1,28 @@ + + +test_database_structure <- function(database, taxa = NA, dataset_id = NA, n_row = NA) { + + table_names <- c("traits", "locations", "contexts", "methods", "excluded_data", "taxonomic_updates", "taxa", "contributors", + "sources", "definitions", "schema", "metadata", "build_info") + + testthat::expect_type(database, "list") + testthat::expect_equal(class(database), "traits.build") + + testthat::expect_equal(names(database), table_names) + + testthat::expect_contains(database$traits$taxon_name |> unique(), database$taxa$taxon_name |> unique()) + testthat::expect_contains(database$traits$dataset_id |> unique(), database$methods$dataset_id |> unique()) + testthat::expect_contains(paste(database$traits$dataset_id, database$traits$trait_name) |> unique(), paste(database$methods$dataset_id, database$methods$trait_name) |> unique()) + + if(!is.na(taxa)) { + testthat::expect_contains(database$traits$taxon_name |> unique(), taxa |> unique()) + } + + if(!is.na(dataset_id)) { + testthat::expect_contains(database$traits$dataset_id |> unique(), dataset_id |> unique()) + } + + if(!is.na(n_row)) { + testthat::expect_equal(database$traits |> nrow(), n_row) + } +} diff --git a/tests/testthat/test-extract_.R b/tests/testthat/test-extract_.R index c30ef77..f473a73 100644 --- a/tests/testthat/test-extract_.R +++ b/tests/testthat/test-extract_.R @@ -10,6 +10,19 @@ taxon_name = "Banskia serrata" test_that("Error message is triggered", { expect_error(austraits_5.0.0_lite %>% extract_taxa()) + expect_error(extract_taxa()) + expect_error(extract_data(austraits_5.0.0_lite)) + expect_error(extract_data(austraits_5.0.0_lite, + table = "taxonomy", + col = "genus", + col_value = "Acacia")) + expect_error(extract_data(austraits_5.0.0_lite, + table = "taxa", + col = "genusss", + col_value = "Acacia")) + expect_error(extract_data(at_six$traits, + col = "basis_of record", + col_value = "field lab")) }) test_extract_error <- function(austraits){ @@ -144,12 +157,10 @@ test_that("extracts using generalised extract function behaves as expected - ext test_that("extracts for which there are no matches work`", { context_property_test <- "platypus" - expect_message(extract_data(database = austraits_5.0.0_lite, table = "contexts", col = "context_property", col_value = context_property_test)) - expect_equal(nrow(extract_data(database = austraits_5.0.0_lite, table = "contexts", col = "context_property", col_value = context_property_test)$traits), 0) + expect_error(extract_data(database = austraits_5.0.0_lite, table = "contexts", col = "context_property", col_value = context_property_test)) location_property_test <- "green flowers" - expect_message(extract_data(database = austraits_5.0.0_lite, table = "locations", col = "location_property", col_value = location_property_test)) - expect_equal(nrow(extract_data(database = austraits_5.0.0_lite, table = "locations", col = "location_property", col_value = location_property_test)$traits), 0) + expect_error(extract_data(database = austraits_5.0.0_lite, table = "locations", col = "location_property", col_value = location_property_test)) }) test_that("extracts using generalised extract function behaves as expected - extracting by `context_property`", { @@ -221,8 +232,9 @@ test_that("Extract function works when just traits table is read in", { expect_equal(length(extract_data(database = austraits_5.0.0_lite$traits, col = "dataset_id", col_value = dataset_id)), 26) expect_silent(extract_dataset(database = austraits_5.0.0_lite$traits, dataset_id = dataset_id)) expect_equal(length(extract_dataset(database = austraits_5.0.0_lite$traits, dataset_id = dataset_id)), 26) - expect_silent(extract_taxa(database = austraits_5.0.0_lite$traits, genus = "Banksia")) - expect_equal(length(extract_taxa(database = austraits_5.0.0_lite$traits, genus = "Banksia")), 26) + expect_silent(jointaxa_then_extract <- (austraits_5.0.0_lite %>% join_taxa())$traits) + expect_silent(extract_data(database = jointaxa_then_extract, col = "genus", col_value = "Banksia")) + expect_equal(length(extract_data(database = jointaxa_then_extract, col = "genus", col_value = "Banksia")), 30) expect_silent(extract_trait(database = austraits_5.0.0_lite$traits, trait_name = "photosyn")) expect_equal(length(extract_trait(database = austraits_5.0.0_lite$traits, trait_name = "photosyn")), 26) expect_silent(join_then_extract <- (austraits_5.0.0_lite %>% join_location_coordinates())$traits)