diff --git a/R/extract_data.R b/R/extract_data.R index dc19081..0810a0c 100644 --- a/R/extract_data.R +++ b/R/extract_data.R @@ -19,7 +19,7 @@ #' extract_data(database = traits.build_database, table = "traits", #' col = "trait_name", col_value = "leaf_area") #' } -extract_data <- function(database, table = NA, col, col_value) { +extract_data <- function(database, table = NA, col, col_value, partial_matches_allowed = TRUE) { # Check missingness check_arg_missingness(database, col, col_value) @@ -40,10 +40,16 @@ extract_data <- function(database, table = NA, col, col_value) { check_col_exists_in_table(database, table, col) - indicies_tmp <- purrr::map(col_value, ~{ - stringr::str_which(database[[col]], - pattern = stringr::regex(.x, ignore_case = TRUE)) - }) + if (partial_matches_allowed == TRUE) { + indicies_tmp <- purrr::map(col_value, ~{ + stringr::str_which(database[[col]], + pattern = stringr::regex(.x, ignore_case = TRUE)) + }) + } else { + indicies_tmp <- purrr::map(col_value, ~{ + which(database[[col]] == .x) + }) + } found_indicies <- purrr::reduce(indicies_tmp, union) @@ -164,10 +170,16 @@ extract_data <- function(database, table = NA, col, col_value) { # chose columns to select, ensuring "value" isn't among the columns, since it has a different meaning for each table columns_to_select <- intersect(setdiff(names(database$traits), "value"), names(database[[table[[i]]]])) - indicies_tmp <- purrr::map(col_value, ~{ - stringr::str_which(database[[table[[i]]]][[col]], - pattern = stringr::regex(.x, ignore_case = TRUE)) - }) + if (partial_matches_allowed == TRUE) { + indicies_tmp <- purrr::map(col_value, ~{ + stringr::str_which(database[[table[[i]]]][[col]], + pattern = stringr::regex(.x, ignore_case = TRUE)) + }) + } else { + indicies_tmp <- purrr::map(col_value, ~{ + which(database[[table[[i]]]][[col]] == .x) + }) + } found_indicies <- purrr::reduce(indicies_tmp, union) @@ -224,6 +236,7 @@ extract_data <- function(database, table = NA, col, col_value) { ret[["treatment_context_id"]]) %>% dplyr::select(-dplyr::any_of(c("entity_context_id", "method_context_id", "plot_context_id", "temporal_context_id", "treatment_context_id"))) %>% dplyr::group_by(dataset_id, category, link_id, value, description) %>% + dplyr::distinct(link_vals, .keep_all = TRUE) %>% dplyr::mutate(link_vals = paste0(link_vals, collapse = ", ")) %>% dplyr::ungroup() %>% dplyr::distinct() diff --git a/R/extract_dataset.R b/R/extract_dataset.R index e7bb123..413232f 100644 --- a/R/extract_dataset.R +++ b/R/extract_dataset.R @@ -24,7 +24,7 @@ #' @export -extract_dataset <- function(database, dataset_id) { +extract_dataset <- function(database, dataset_id, partial_matches_allowed = TRUE) { # Check compatability status <- check_compatibility(database, single_table_allowed = TRUE) @@ -34,7 +34,7 @@ extract_dataset <- function(database, dataset_id) { function_not_supported(database) } - extract_data(database, "traits", "dataset_id", col_value = dataset_id) + extract_data(database, "traits", "dataset_id", col_value = dataset_id, partial_matches_allowed = partial_matches_allowed) } diff --git a/R/extract_taxa.R b/R/extract_taxa.R index 8145ab8..e2ab8dd 100644 --- a/R/extract_taxa.R +++ b/R/extract_taxa.R @@ -25,7 +25,7 @@ #' @author Fonti Kar - f.kar@unsw.edu.au #' @export -extract_taxa <- function(database, family = NULL, genus = NULL, taxon_name = NULL){ +extract_taxa <- function(database, family = NULL, genus = NULL, taxon_name = NULL, partial_matches_allowed = TRUE){ # Check compatability status <- check_compatibility(database, single_table_allowed = TRUE) @@ -40,15 +40,15 @@ extract_taxa <- function(database, family = NULL, genus = NULL, taxon_name = NUL } if( ! is.null(family) ){ - return(extract_data(database, "taxa", "family", col_value = family)) + return(extract_data(database, "taxa", "family", col_value = family, partial_matches_allowed = partial_matches_allowed)) } if( ! is.null(genus) ){ - return(extract_data(database, "taxa", "genus", col_value = genus)) + return(extract_data(database, "taxa", "genus", col_value = genus, partial_matches_allowed = partial_matches_allowed)) } if( ! is.null(taxon_name)) - return(extract_data(database, "traits", "taxon_name", col_value = taxon_name)) + return(extract_data(database, "traits", "taxon_name", col_value = taxon_name, partial_matches_allowed = partial_matches_allowed)) } diff --git a/R/extract_trait.R b/R/extract_trait.R index b304643..ee7bf9f 100644 --- a/R/extract_trait.R +++ b/R/extract_trait.R @@ -26,7 +26,7 @@ -extract_trait <- function(database, trait_names, taxon_names=NULL) { +extract_trait <- function(database, trait_names, taxon_names=NULL, partial_matches_allowed = TRUE) { # Check compatability status <- check_compatibility(database, single_table_allowed = TRUE) @@ -35,10 +35,10 @@ extract_trait <- function(database, trait_names, taxon_names=NULL) { function_not_supported(database) } - ret <- extract_data(database, "traits", "trait_name", col_value = trait_names) + ret <- extract_data(database, "traits", "trait_name", col_value = trait_names, partial_matches_allowed = partial_matches_allowed) if(!is.null(taxon_names)) - ret <- extract_data(ret, "traits", "taxon_name", col_value = taxon_names) + ret <- extract_data(ret, "traits", "taxon_name", col_value = taxon_names, partial_matches_allowed = partial_matches_allowed) return(ret) } diff --git a/tests/testthat/test-extract_.R b/tests/testthat/test-extract_.R index f473a73..20ea3aa 100644 --- a/tests/testthat/test-extract_.R +++ b/tests/testthat/test-extract_.R @@ -242,3 +242,27 @@ test_that("Extract function works when just traits table is read in", { expect_silent(extract_data(database = join_then_extract, col = "longitude (deg)", col_value = "145")) }) +test_that("Extract function works with partial match parameter", { + expect_silent(extract_data(database = austraits_5.0.0_lite, table = "traits", col = "dataset_id", col_value = "Falster")) + expect_silent(extract_data(database = austraits_5.0.0_lite, table = "traits", col = "dataset_id", col_value = "Falster", partial_matches_allowed = T)) + expect_error(extract_data(database = austraits_5.0.0_lite, table = "traits", col = "dataset_id", col_value = "Falster", partial_matches_allowed = F)) + expect_silent(extract_data(database = austraits_5.0.0_lite, table = "traits", col = "dataset_id", col_value = "Falster_2003", partial_matches_allowed = F)) + expect_silent(extract_data(database = austraits_5.0.0_lite$traits, col = "dataset_id", col_value = "Falster")) + expect_silent(Falster_partial <- extract_data(database = austraits_5.0.0_lite$traits, col = "dataset_id", col_value = "Falster", partial_matches_allowed = T)) + expect_error(extract_data(database = austraits_5.0.0_lite$traits, col = "dataset_id", col_value = "Falster", partial_matches_allowed = F)) + expect_silent(Falster_exact <- extract_data(database = austraits_5.0.0_lite$traits, col = "dataset_id", col_value = "Falster_2003", partial_matches_allowed = F)) + expect_equal(nrow(Falster_partial %>% dplyr::distinct(dataset_id)), 3) + expect_equal(nrow(Falster_exact %>% dplyr::distinct(dataset_id)), 1) + expect_silent(extract_dataset(database = austraits_5.0.0_lite, "Falster", partial_matches_allowed = T)) + expect_error(extract_dataset(database = austraits_5.0.0_lite, "Falster", partial_matches_allowed = F)) + expect_silent(extract_dataset(database = austraits_5.0.0_lite, "Falster_2003", partial_matches_allowed = F)) + expect_silent(extract_trait(database = austraits_5.0.0_lite, "photosynth", partial_matches_allowed = T)) + expect_error(extract_trait(database = austraits_5.0.0_lite, "photosynth", partial_matches_allowed = F)) + expect_silent(extract_trait(database = austraits_5.0.0_lite, "photosynthetic_pathway", partial_matches_allowed = F)) + expect_silent(Acacia_dealbata_partial <- extract_taxa(database = austraits_5.0.0_lite, taxon_name = "Acacia dealbata", partial_matches_allowed = T)) + expect_silent(Acacia_dealbata_exact <- extract_taxa(database = austraits_5.0.0_lite, taxon_name = "Acacia dealbata", partial_matches_allowed = F)) + expect_gt(nrow(Acacia_dealbata_partial$traits),nrow(Acacia_dealbata_exact$traits)) + expect_equal(Acacia_dealbata_partial$traits %>% dplyr::distinct(taxon_name) %>% nrow(), 3) + expect_equal(Acacia_dealbata_exact$traits %>% dplyr::distinct(taxon_name) %>% nrow(), 1) +}) +