diff --git a/R/distance_to_centroid.R b/R/distance_to_centroid.R index b72d0bbb..a075af0a 100644 --- a/R/distance_to_centroid.R +++ b/R/distance_to_centroid.R @@ -1,26 +1,59 @@ #' Distance to group centroid #' #' `distance_to_centroid` calculates the distance of each relocation to the -#' centroid of the spatiotemporal group identified by `group_pts`. The -#' function expects a `data.table` with relocation data appended with a -#' `group` column from `group_pts` and centroid columns from -#' `centroid_group`. Relocation data should be in planar coordinates -#' provided in two columns representing the X and Y coordinates. -#' -#' The `DT` must be a `data.table`. If your data is a -#' `data.frame`, you can convert it by reference using -#' [data.table::setDT()] or by reassigning using +#' centroid of the spatiotemporal group identified by `group_pts`. The function +#' expects a `data.table` with relocation data appended with a `group` column +#' from `group_pts` and centroid columns from `centroid_group`. Relocation data +#' should be provided in two columns representing the X and Y coordinates, or in +#' a geometry column prepared by the helper function [get_geometry()]. +#' +#' The `DT` must be a `data.table`. If your data is a `data.frame`, you can +#' convert it by reference using [data.table::setDT()] or by reassigning using #' [data.table::data.table()]. #' -#' This function expects a `group` column present generated with the -#' `group_pts` function and centroid coordinate columns generated with the -#' `centroid_group` function. The `coords` and `group` arguments -#' expect the names of columns in `DT` which correspond to the X and Y -#' coordinates and group columns. The `return_rank` argument controls if -#' the rank of each individual's distance to the group centroid is also -#' returned. The `ties.method` argument is passed to -#' `data.table::frank`, see details at -#' [`?data.table::frank()`][data.table::frank]. +#' This function expects a `group` column present generated with the `group_pts` +#' function and centroid coordinate column(s) generated with the +#' `centroid_group` function. The `group` arguments expect the names of columns +#' in `DT` which correspond to the group column. The `return_rank` argument +#' controls if the rank of each individual's distance to the group centroid is +#' also returned. The `ties.method` argument is passed to `data.table::frank`, +#' see details at [`?data.table::frank()`][data.table::frank]. +#' +#' See below under "Interface" for details on providing coordinates and under +#' "Distance function" for details on underlying distance function used. +#' +#' @section Interface: +#' Two interfaces are available for providing coordinates: +#' +#' 1. Provide `coords` and `crs`. The `coords` argument expects the names of +#' the X and Y coordinate columns. The `crs` argument expects a character +#' string or numeric defining the coordinate reference system to be passed to +#' [sf::st_crs]. For example, for UTM zone 36S (EPSG 32736), the crs argument +#' is `crs = "EPSG:32736"` or `crs = 32736`. See +#' for a list of EPSG codes. +#' 2. (New!) Provide `geometry`. The `geometry` argument allows the user to +#' supply a `geometry` column that represents the coordinates as a simple +#' feature geometry list column. This interface expects the user to prepare +#' their input DT with [get_geometry()]. To use this interface, leave the +#' `coords` and `crs` arguments `NULL`, and the default argument for `geometry` +#' ('geometry') will be used directly. +#' +#' @section Distance function: +#' +#' The underlying distance function used depends on the crs of the coordinates +#' or geometry provided. +#' +#' - If the crs is longlat degrees (as determined by +#' [sf::st_is_longlat()]), the distance function is [sf::st_distance()] which +#' passes to [s2::s2_distance()] if [sf::sf_use_s2()] is TRUE and +#' [lwgeom::st_geod_distance()] if [sf::sf_use_s2()] is FALSE. The distance +#' returned has units set according to the crs. +#' +#' - If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +#' distance function used is [stats::dist()], maintaining expected behaviour +#' from previous versions. The distance returned does not have units set. +#' +#' Note: in both cases, if the coordinates are NA then the result will be NA. #' #' @param DT input data.table with centroid columns generated by eg. #' `centroid_group` @@ -30,16 +63,16 @@ #' FALSE #' @param ties.method see [`?data.table::frank()`][data.table::frank] #' @inheritParams group_pts +#' @inheritParams direction_step #' -#' @return `distance_to_centroid` returns the input `DT` appended with -#' a `distance_centroid` column indicating the distance to group centroid -#' and, optionally, a `rank_distance_centroid` column indicating the -#' within group rank distance to group centroid (if `return_rank = -#' TRUE`). +#' @return `distance_to_centroid` returns the input `DT` appended with a +#' `distance_centroid` column indicating the distance to the group centroid +#' and, optionally, a `rank_distance_centroid` column indicating the within +#' group rank distance to the group centroid (if `return_rank = TRUE`). #' #' A message is returned when `distance_centroid` and optional -#' `rank_distance_centroid` columns already exist in the input `DT`, -#' because they will be overwritten. +#' `rank_distance_centroid` columns already exist in the input `DT`, because +#' they will be overwritten. #' #' See details for appending outputs using modify-by-reference in the #' [FAQ](https://docs.ropensci.org/spatsoc/articles/faq.html). @@ -47,7 +80,7 @@ #' @export #' @family Distance functions #' @family Centroid functions -#' @seealso [centroid_group], [group_pts] +#' @seealso [centroid_group], [group_pts], [sf::st_distance()] #' @references #' See examples of using distance to group centroid: #' * \doi{doi:10.1016/j.anbehav.2021.08.004} @@ -80,60 +113,114 @@ #' DT, #' coords = c('X', 'Y'), #' group = 'group', +#' crs = 32736, #' return_rank = TRUE #' ) distance_to_centroid <- function( DT = NULL, coords = NULL, group = 'group', + crs = NULL, return_rank = FALSE, - ties.method = NULL) { + ties.method = NULL, + geometry = 'geometry') { # Due to NSE notes in R CMD check - distance_centroid <- rank_distance_centroid <- NULL + geo <- cent <- x <- y <- x_centroid <- y_centroid <- NULL assert_not_null(DT) assert_is_data_table(DT) - assert_are_colnames(DT, coords) - assert_length(coords, 2) - assert_col_inherits(DT, coords, 'numeric') - assert_not_null(return_rank) - if ('distance_centroid' %in% colnames(DT)) { - message('distance_centroid column will be overwritten by this function') - data.table::set(DT, j = 'distance_centroid', value = NULL) - } + out <- 'distance_centroid' - xcol <- data.table::first(coords) - ycol <- data.table::last(coords) - pre <- 'centroid_' - centroid_xcol <- paste0(pre, xcol) - centroid_ycol <- paste0(pre, ycol) - centroid_coords <- c(centroid_xcol, centroid_ycol) + if (is.null(coords)) { + if (!is.null(crs)) { + message('crs argument is ignored when coords are null, using geometry') + } + + assert_are_colnames(DT, geometry, ', did you run get_geometry()?') + assert_col_inherits(DT, geometry, 'sfc_POINT') + centroid_col <- 'centroid' + assert_are_colnames(DT, centroid_col, ', did you run centroid_group?') + assert_col_inherits(DT, centroid_col, 'sfc_POINT') - assert_are_colnames(DT, centroid_coords, ', did you run centroid_group?') - assert_col_inherits(DT, centroid_coords, 'numeric') + if (out %in% colnames(DT)) { + message(out, ' column will be overwritten by this function') + data.table::set(DT, j = out, value = NULL) + } + crs <- sf::st_crs(DT[[geometry]]) + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) - DT[, distance_centroid := - sqrt((.SD[[xcol]] - .SD[[centroid_xcol]])^2 + - (.SD[[ycol]] - .SD[[centroid_ycol]])^2)] + DT[, c(out) := calc_distance( + geometry_a = geo, + geometry_b = cent, + use_dist = use_dist + ), + env = list(geo = geometry, cent = centroid_col) + ] + + } else { + if (is.null(crs)) { + crs <- sf::NA_crs_ + } + + assert_are_colnames(DT, coords) + assert_length(coords, 2) + assert_col_inherits(DT, coords, 'numeric') + + xcol <- data.table::first(coords) + ycol <- data.table::last(coords) + pre <- 'centroid_' + xcol_centroid <- paste0(pre, xcol) + ycol_centroid <- paste0(pre, ycol) + coords_centroid <- c(xcol_centroid, ycol_centroid) + + assert_are_colnames(DT, coords_centroid, ', did you run centroid_group?') + assert_col_inherits(DT, coords_centroid, 'numeric') + + if (out %in% colnames(DT)) { + message(out, ' column will be overwritten by this function') + data.table::set(DT, j = out, value = NULL) + } + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + DT[, + c(out) := calc_distance( + x_a = x, + y_a = y, + x_b = x_centroid, + y_b = y_centroid, + crs = crs, + use_dist = use_dist + ), + env = list( + x = xcol, y = ycol, + x_centroid = xcol_centroid, y_centroid = ycol_centroid + ) + ] + + } if (return_rank) { assert_not_null(group) assert_are_colnames(DT, group, ', did you run group_pts?') - if ('rank_distance_centroid' %in% colnames(DT)) { + out_rank <- 'rank_distance_centroid' + if (out_rank %in% colnames(DT)) { message( - 'rank_distance_centroid column will be overwritten by this function' + out_rank, ' column will be overwritten by this function' ) - data.table::set(DT, j = 'rank_distance_centroid', value = NULL) + data.table::set(DT, j = out_rank, value = NULL) } - DT[, rank_distance_centroid := - data.table::frank(distance_centroid, ties.method = ties.method), - by = c(group)] + DT[, c(out_rank) := + data.table::frank(out, ties.method = ties.method, na.last = 'keep'), + by = group, + env = list(out = out, group = group)] } + return(DT[]) } diff --git a/R/distance_to_leader.R b/R/distance_to_leader.R index 641598af..be2d4e9f 100644 --- a/R/distance_to_leader.R +++ b/R/distance_to_leader.R @@ -1,43 +1,47 @@ #' Distance to group leader #' #' `distance_to_leader` calculates the distance to the leader of each -#' spatiotemporal group. The function expects a `data.table` with -#' relocation data appended with a `rank_position_group_direction` column -#' indicating the ranked position along the group direction generated with -#' `leader_direction_group(return_rank = TRUE)`. Relocation data should be -#' in planar coordinates provided in two columns representing the X and Y -#' coordinates. -#' -#' The `DT` must be a `data.table`. If your data is a -#' `data.frame`, you can convert it by reference using -#' [data.table::setDT()] or by reassigning using +#' spatiotemporal group. The function expects a `data.table` with relocation +#' data appended with a `rank_position_group_direction` column indicating the +#' ranked position along the group direction generated with +#' `leader_direction_group(return_rank = TRUE)`. Relocation data should be in +#' two columns representing the X and Y coordinates, or in a geometry column +#' prepared by the helper function [get_geometry()]. +#' +#' The `DT` must be a `data.table`. If your data is a `data.frame`, you can +#' convert it by reference using [data.table::setDT()] or by reassigning using #' [data.table::data.table()]. #' -#' This function expects a `rank_position_group_direction` column -#' generated with `leader_direction_group(return_rank = TRUE)`, -#' a `group` column generated with the -#' `group_pts` function. The `coords` and `group` arguments -#' expect the names of columns in `DT` which correspond to the X and Y -#' coordinates and group columns. +#' This function expects a `rank_position_group_direction` column generated with +#' `leader_direction_group(return_rank = TRUE)`, a `group` column generated with +#' the `group_pts` function. The `group` argument expect the names of the column +#' in `DT` which corresponds to the group column. +#' +#' See below under "Interface" for details on providing coordinates and under +#' "Distance function" for details on underlying distance function used. +#' +#' @inheritSection distance_to_centroid Interface +#' @inheritSection distance_to_centroid Distance function #' #' @param DT input data.table with 'rank_position_group_direction' column #' generated by `leader_direction_group` and group column generated by #' `group_pts` #' @inheritParams leader_direction_group +#' @inheritParams group_pts #' -#' @return `distance_to_leader` returns the input `DT` appended with -#' a `distance_leader` column indicating the distance to the group -#' leader. +#' @return `distance_to_leader` returns the input `DT` appended with a +#' `distance_leader` column indicating the distance to the group leader. #' -#' A message is returned when the `distance_leader` column already -#' exist in the input `DT` because it will be overwritten. +#' A message is returned when the `distance_leader` column already exist in +#' the input `DT` because it will be overwritten. #' #' See details for appending outputs using modify-by-reference in the #' [FAQ](https://docs.ropensci.org/spatsoc/articles/faq.html). #' #' @export #' @family Distance functions -#' @seealso [direction_to_leader], [leader_direction_group], [group_pts] +#' @seealso [direction_to_leader], [leader_direction_group], [group_pts], +#' [sf::st_distance()] #' @references #' #' See examples of using distance to leader and position within group: @@ -88,62 +92,134 @@ #' ) #' #' # Calculate distance to leader -#' distance_to_leader(DT, coords = c('X', 'Y')) +#' distance_to_leader(DT, coords = c('X', 'Y'), crs = 32736) distance_to_leader <- function( DT = NULL, coords = NULL, - group = 'group') { + group = 'group', + crs = NULL, + geometry = 'geometry') { + # Due to NSE notes - distance_leader <- zzz_N_by_group <- rank_position_group_direction <- - has_leader <- . <- NULL + geo <- lead <- x <- y <- x_leader <- y_leader <- . <- + rank_position_group_direction <- has_leader <- distance_leader <- NULL assert_not_null(DT) assert_is_data_table(DT) assert_not_null(group) assert_are_colnames(DT, group) - assert_are_colnames(DT, coords) - assert_length(coords, 2) - assert_col_inherits(DT, coords, 'numeric') - leader_col <- 'rank_position_group_direction' - assert_are_colnames(DT, leader_col, - ', did you run leader_direction_group(return_rank = TRUE)?') + assert_are_colnames( + DT, leader_col, + ', did you run leader_direction_group(return_rank = TRUE)?' + ) assert_col_inherits(DT, leader_col, 'numeric') + check_leaderless <- DT[, .( + has_leader = any(rank_position_group_direction == 1)), + by = group, + env = list(group = group)][!(has_leader)] + out_col <- 'distance_leader' - if (out_col %in% colnames(DT)) { - message( - paste0(out_col, ' column will be overwritten by this function') - ) - data.table::set(DT, j = out_col, value = NULL) - } - DT[, zzz_N_by_group := .N, by = c(group)] + if (is.null(coords)) { + if (!is.null(crs)) { + message('crs argument is ignored when coords are null, using geometry') + } - check_leaderless <- DT[, .( - has_leader = any(rank_position_group_direction == 1)), - by = c(group)][!(has_leader)] + assert_are_colnames(DT, geometry, ', did you run get_geometry()?') + assert_col_inherits(DT, geometry, 'sfc_POINT') - if (check_leaderless[, .N > 0]) { - warning( - 'groups found missing leader (rank_position_group_direction == 1): \n', - check_leaderless[, paste(group, collapse = ', ')] - ) - } + zzz_geometry_leader <- 'zzz_geometry_leader' + DT[, c(zzz_geometry_leader) := + sf::st_sf(rep(geo[which(rank_position_group_direction == 1)], .N)), + env = list(geo = geometry, group = group), + by = group] + + if (check_leaderless[, .N > 0]) { + warning( + 'groups found missing leader (rank_position_group_direction == 1): \n', + check_leaderless[, paste(group, collapse = ', ')] + ) + } + + if (out_col %in% colnames(DT)) { + message( + paste0(out_col, ' column will be overwritten by this function') + ) + data.table::set(DT, j = out_col, value = NULL) + } - DT[!group %in% check_leaderless$group, - c(out_col) := fifelse( - zzz_N_by_group > 1, - as.matrix( - stats::dist(cbind(.SD[[1]], .SD[[2]])) - )[, which(.SD[[3]] == 1)], - 0 - ), - .SDcols = c(coords, 'rank_position_group_direction'), - by = c(group)] - - data.table::set(DT, j = 'zzz_N_by_group', value = NULL) + crs <- sf::st_crs(DT[[geometry]]) + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + DT[!group %in% check_leaderless$group, c(out_col) := calc_distance( + geometry_a = geo, + geometry_b = lead, + use_dist = use_dist + ), + env = list( + geo = geometry, lead = zzz_geometry_leader + )] + + data.table::set(DT, j = zzz_geometry_leader, value = NULL) + + } else { + assert_are_colnames(DT, coords) + assert_length(coords, 2) + assert_col_inherits(DT, coords, 'numeric') + + if (is.null(crs)) { + crs <- sf::NA_crs_ + } + + xcol <- data.table::first(coords) + ycol <- data.table::last(coords) + pre <- 'zzz_leader_' + zzz_xcol_leader <- paste0(pre, xcol) + zzz_ycol_leader <- paste0(pre, ycol) + zzz_coords_leader <- c(zzz_xcol_leader, zzz_ycol_leader) + + DT[, c(zzz_coords_leader) := + .SD[which(rank_position_group_direction == 1)], + .SDcols = c(coords), + env = list(group = group), + by = group] + + if (check_leaderless[, .N > 0]) { + warning( + 'groups found missing leader (rank_position_group_direction == 1): \n', + check_leaderless[, paste(group, collapse = ', ')] + ) + } + + if (out_col %in% colnames(DT)) { + message( + paste0(out_col, ' column will be overwritten by this function') + ) + data.table::set(DT, j = out_col, value = NULL) + } + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + DT[!group %in% check_leaderless$group, + c(out_col) := calc_distance( + x_a = x, + y_a = y, + x_b = x_leader, + y_b = y_leader, + crs = crs, + use_dist = use_dist + ), + env = list( + x = xcol, y = ycol, x_leader = zzz_xcol_leader, y_leader = zzz_ycol_leader + ) + ] + + data.table::set(DT, j = zzz_coords_leader, value = NULL) + + } return(DT[]) } diff --git a/R/edge_dist.R b/R/edge_dist.R index 1832481c..f4dfefec 100644 --- a/R/edge_dist.R +++ b/R/edge_dist.R @@ -1,69 +1,75 @@ #' Distance based edge-lists #' -#' `edge_dist` returns edge-lists defined by a spatial distance within the -#' user defined threshold. The function expects a `data.table` with -#' relocation data, individual identifiers and a threshold argument. The -#' threshold argument is used to specify the criteria for distance between -#' points which defines a group. Relocation data should be in two columns -#' representing the X and Y coordinates. -#' -#' The `DT` must be a `data.table`. If your data is a -#' `data.frame`, you can convert it by reference using -#' [data.table::setDT()]. -#' -#' The `id`, `coords`, `timegroup` (and optional `splitBy`) -#' arguments expect the names of a column in `DT` which correspond to the -#' individual identifier, X and Y coordinates, timegroup (generated by -#' `group_times`) and additional grouping columns. -#' -#' If provided, the `threshold` must be provided in the units of the -#' coordinates and must be larger than 0. If the `threshold` is NULL, the -#' distance to all other individuals will be returned. The coordinates must be -#' planar coordinates (e.g.: UTM). In the case of UTM, `threshold = 50` -#' would indicate a 50 m distance threshold. -#' -#' The `timegroup` argument is required to define the temporal groups -#' within which edges are calculated. The intended framework is to group rows -#' temporally with `group_times` then spatially with -#' `edge_dist`. If you have already calculated temporal groups without -#' `group_times`, you can pass this column to the `timegroup` -#' argument. Note that the expectation is that each individual will be observed -#' only once per timegroup. Caution that accidentally including huge numbers of -#' rows within timegroups can overload your machine since all pairwise distances -#' are calculated within each timegroup. -#' -#' The `splitBy` argument offers further control over grouping. If within -#' your `DT`, you have multiple populations, subgroups or other distinct -#' parts, you can provide the name of the column which identifies them to -#' `splitBy`. `edge_dist` will only consider rows within each -#' `splitBy` subgroup. +#' `edge_dist` returns edge-lists defined by a spatial distance within the user +#' defined threshold. The function expects a `data.table` with relocation data, +#' individual identifiers and a threshold argument. The threshold argument is +#' used to specify the criteria for distance between points which defines a +#' group. Relocation data should be in two columns representing the X and Y +#' coordinates, or in a geometry column prepared by the helper function +#' [get_geometry()]. +#' +#' The `DT` must be a `data.table`. If your data is a `data.frame`, you can +#' convert it by reference using [data.table::setDT()]. +#' +#' The `id`, `timegroup` (and optional `splitBy`) arguments expect the names of +#' columns in `DT` which correspond to the individual identifier, and timegroup +#' (generated by `group_times`) and additional grouping columns. +#' +#' The `threshold` provided should match the units of the coordinates. The +#' `threshold` can be provided with units specified using the units package (eg. +#' `threshold = units::set_units(10, m)`) which will be checked against the +#' units of the coordinates using the `crs`. If units are not specified, the +#' `threshold` is assumed to be in the units of the coordinates. +#' +#' The `timegroup` argument is required to define the temporal groups within +#' which edges are calculated. The intended framework is to group rows +#' temporally with `group_times` then spatially with `edge_dist`. If you have +#' already calculated temporal groups without `group_times`, you can pass this +#' column to the `timegroup` argument. Note that the expectation is that each +#' individual will be observed only once per timegroup. Caution that +#' accidentally including huge numbers of rows within timegroups can overload +#' your machine since all pairwise distances are calculated within each +#' timegroup. +#' +#' The `splitBy` argument offers further control over grouping. If within your +#' `DT`, you have multiple populations, subgroups or other distinct parts, you +#' can provide the name of the column which identifies them to `splitBy`. +#' `edge_dist` will only consider rows within each `splitBy` subgroup. +#' +#' See below under "Interface" for details on providing coordinates and under +#' "Distance function" for details on underlying distance function used. +#' +#' @inheritSection distance_to_centroid Interface +#' @inheritSection distance_to_centroid Distance function #' #' @inheritParams group_pts +#' @inheritParams direction_step #' @param returnDist logical indicating if the distance between individuals -#' should be returned. If FALSE (default), only ID1, ID2 columns (and -#' timegroup, splitBy columns if provided) are returned. If TRUE, another -#' column "distance" is returned indicating the distance between ID1 and ID2. +#' should be returned. If FALSE (default), only individual columns (and +#' timegroup, splitBy columns if provided) are returned. If TRUE, a column +#' "distance" is also returned indicating the distance between individuals in +#' the units of the `crs`, or if `crs = NULL` no units are set. #' @param fillNA logical indicating if NAs should be returned for individuals #' that were not within the threshold distance of any other. If TRUE, NAs are #' returned. If FALSE, only edges between individuals within the threshold #' distance are returned. #' -#' @return `edge_dist` returns a `data.table` with columns ID1, ID2, -#' timegroup (if supplied) and any columns provided in splitBy. If -#' 'returnDist' is TRUE, column 'distance' is returned indicating the distance -#' between ID1 and ID2. +#' @return `edge_dist` returns a `data.table` with columns ID1, ID2, timegroup +#' (if supplied) and any columns provided in splitBy. If 'returnDist' is TRUE, +#' column 'distance' is returned indicating the distance between ID1 and ID2. #' #' The ID1 and ID2 columns represent the edges defined by the spatial (and #' temporal with `group_times`) thresholds. #' -#' Note: unlike many other functions (eg. `group_pts`) in `spatsoc`, -#' `edge_dist` needs to be reassigned. See details in -#' [FAQ](https://docs.ropensci.org/spatsoc/articles/faq.html). +#' Note: unlike many other functions (eg. `group_pts`) in `spatsoc`, +#' `edge_dist` needs to be reassigned. See details in +#' [FAQ](https://docs.ropensci.org/spatsoc/articles/faq.html). #' #' @export #' #' @family Edge-list generation #' @family Direction functions +#' @seealso [sf::st_distance()] #' #' @examples #' # Load data.table @@ -86,6 +92,7 @@ #' id = 'ID', #' coords = c('X', 'Y'), #' timegroup = 'timegroup', +#' crs = 32736, #' returnDist = TRUE, #' fillNA = TRUE #' ) @@ -95,7 +102,9 @@ edge_dist <- function( id = NULL, coords = NULL, timegroup, + crs = NULL, splitBy = NULL, + geometry = 'geometry', returnDist = FALSE, fillNA = TRUE) { # due to NSE notes in R CMD check @@ -104,11 +113,6 @@ edge_dist <- function( assert_not_null(DT) assert_is_data_table(DT) assert_not_missing(threshold) - assert_inherits(threshold, c('numeric', 'NULL')) - - if (is.numeric(threshold)) { - assert_relation(threshold, `>`, 0) - } assert_not_null(id) @@ -118,9 +122,6 @@ edge_dist <- function( check_cols <- c(timegroup, id, coords, splitBy) assert_are_colnames(DT, check_cols) - assert_length(coords, 2) - assert_col_inherits(DT, coords, 'numeric') - if (any(unlist(lapply(DT[, .SD, .SDcols = timegroup], class)) %in% c('POSIXct', 'POSIXlt', 'Date', 'IDate', 'ITime', 'character'))) { warning( @@ -155,50 +156,150 @@ edge_dist <- function( data.table::setnames(DT, 'splitBy', 'split_by') } - if (is.null(threshold)) { - edges <- DT[, { - - distMatrix <- - as.matrix(stats::dist(.SD[, 2:3], method = 'euclidean')) - diag(distMatrix) <- NA - - if (returnDist) { - l <- data.table::data.table( - ID1 = .SD[[1]][rep(seq_len(nrow(distMatrix)), ncol(distMatrix))], - ID2 = .SD[[1]][rep(seq_len(ncol(distMatrix)), each = nrow(distMatrix))], - distance = c(distMatrix) - )[ID1 != ID2] - } else { - l <- data.table::data.table( - ID1 = .SD[[1]][rep(seq_len(nrow(distMatrix)), ncol(distMatrix))], - ID2 = .SD[[1]][rep(seq_len(ncol(distMatrix)), each = nrow(distMatrix))] - )[ID1 != ID2] + + if (is.null(coords)) { + if (!is.null(crs)) { + message('crs argument is ignored when coords are null, using geometry') + } + + assert_are_colnames(DT, geometry, ', did you run get_geometry()?') + assert_col_inherits(DT, geometry, 'sfc_POINT') + + crs <- sf::st_crs(DT[[geometry]]) + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + if (is.null(threshold)) { + edges <- DT[, { + distMatrix <- calc_distance( + geometry_a = geo, + use_dist = use_dist + ) + diag(distMatrix) <- NA + + if (returnDist) { + l <- data.table::data.table( + ID1 = id[rep(seq_len(nrow(distMatrix)), ncol(distMatrix))], + ID2 = id[rep(seq_len(ncol(distMatrix)), each = nrow(distMatrix))], + distance = c(distMatrix) + )[ID1 != ID2] + } else { + l <- data.table::data.table( + ID1 = id[rep(seq_len(nrow(distMatrix)), ncol(distMatrix))], + ID2 = id[rep(seq_len(ncol(distMatrix)), each = nrow(distMatrix))] + )[ID1 != ID2] + } + l + }, + by = splitBy, + env = list(geo = geometry, id = id)] + } else { + assert_threshold(threshold, crs) + + if (!inherits(threshold, 'units') && !identical(crs, sf::NA_crs_) && + !use_dist) { + threshold <- units::as_units(threshold, units(sf::st_crs(crs)$SemiMajor)) } - l - }, - by = splitBy, .SDcols = c(id, coords)] + + edges <- DT[, { + distMatrix <- calc_distance( + geometry_a = geo, + use_dist = use_dist + ) + diag(distMatrix) <- NA + + w <- which(distMatrix < threshold, arr.ind = TRUE) + + if (returnDist) { + l <- list(ID1 = id[w[, 1]], + ID2 = id[w[, 2]], + distance = distMatrix[w]) + } else { + l <- list(ID1 = id[w[, 1]], + ID2 = id[w[, 2]]) + } + l + }, + by = splitBy, + env = list(geo = geometry, id = id)] + } } else { - edges <- DT[, { + if (is.null(crs)) { + crs <- sf::NA_crs_ + } + + assert_length(coords, 2) + assert_col_inherits(DT, coords, 'numeric') + + xcol <- data.table::first(coords) + ycol <- data.table::last(coords) - distMatrix <- - as.matrix(stats::dist(.SD[, 2:3], method = 'euclidean')) - diag(distMatrix) <- NA + if (is.null(crs)) { + crs <- sf::NA_crs_ + } - w <- which(distMatrix < threshold, arr.ind = TRUE) + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) - if (returnDist) { - l <- list(ID1 = .SD[[1]][w[, 1]], - ID2 = .SD[[1]][w[, 2]], - distance = distMatrix[w]) - } else { - l <- list(ID1 = .SD[[1]][w[, 1]], - ID2 = .SD[[1]][w[, 2]]) + if (is.null(threshold)) { + edges <- DT[, { + distMatrix <- calc_distance( + x_a = x, + y_a = y, + crs = crs, + use_dist = use_dist + ) + diag(distMatrix) <- NA + + if (returnDist) { + l <- data.table::data.table( + ID1 = id[rep(seq_len(nrow(distMatrix)), ncol(distMatrix))], + ID2 = id[rep(seq_len(ncol(distMatrix)), each = nrow(distMatrix))], + distance = c(distMatrix) + )[ID1 != ID2] + } else { + l <- data.table::data.table( + ID1 = id[rep(seq_len(nrow(distMatrix)), ncol(distMatrix))], + ID2 = id[rep(seq_len(ncol(distMatrix)), each = nrow(distMatrix))] + )[ID1 != ID2] + } + l + }, + by = splitBy, + env = list(x = xcol, y = ycol, id = id)] + } else { + + assert_threshold(threshold, crs) + + if (!inherits(threshold, 'units') && !identical(crs, sf::NA_crs_) && + !use_dist) { + threshold <- units::as_units(threshold, units(sf::st_crs(crs)$SemiMajor)) } - l - }, - by = splitBy, .SDcols = c(id, coords)] - } + edges <- DT[, { + distMatrix <- calc_distance( + x_a = x, + y_a = y, + crs = crs, + use_dist = use_dist + ) + diag(distMatrix) <- NA + + w <- which(distMatrix < threshold, arr.ind = TRUE) + + if (returnDist) { + l <- list(ID1 = id[w[, 1]], + ID2 = id[w[, 2]], + distance = distMatrix[w]) + } else { + l <- list(ID1 = id[w[, 1]], + ID2 = id[w[, 2]]) + } + l + }, + by = splitBy, + env = list(id = id, x = xcol, y = ycol)] + } + } if (fillNA) { merge(edges, diff --git a/R/edge_nn.R b/R/edge_nn.R index a252938e..6332fbd6 100644 --- a/R/edge_nn.R +++ b/R/edge_nn.R @@ -1,64 +1,64 @@ #' Nearest neighbour based edge-lists #' -#' `edge_nn` returns edge-lists defined by the nearest neighbour. The -#' function expects a `data.table` with relocation data, individual -#' identifiers and a threshold argument. The threshold argument is used to -#' specify the criteria for distance between points which defines a group. -#' Relocation data should be in two columns representing the X and Y -#' coordinates. -#' -#' The `DT` must be a `data.table`. If your data is a -#' `data.frame`, you can convert it by reference using -#' [data.table::setDT()]. -#' -#' The `id`, `coords`, `timegroup` (and optional `splitBy`) -#' arguments expect the names of a column in `DT` which correspond to the -#' individual identifier, X and Y coordinates, timegroup (generated by -#' `group_times`) and additional grouping columns. -#' -#' The `threshold` must be provided in the units of the coordinates. The -#' `threshold` must be larger than 0. The coordinates must be planar -#' coordinates (e.g.: UTM). In the case of UTM, a `threshold = 50` would -#' indicate a 50 m distance threshold. -#' -#' The `timegroup` argument is required to define the temporal groups -#' within which edge nearest neighbours are calculated. The intended framework -#' is to group rows temporally with `group_times` then spatially -#' with `edge_nn`. If you have already calculated temporal groups without -#' `group_times`, you can pass this column to the `timegroup` -#' argument. Note that the expectation is that each individual will be observed -#' only once per timegroup. Caution that accidentally including huge numbers of -#' rows within timegroups can overload your machine since all pairwise distances -#' are calculated within each timegroup. -#' -#' The `splitBy` argument offers further control over grouping. If within -#' your `DT`, you have multiple populations, subgroups or other distinct -#' parts, you can provide the name of the column which identifies them to -#' `splitBy`. `edge_nn` will only consider rows within each -#' `splitBy` subgroup. +#' `edge_nn` returns edge-lists defined by the nearest neighbour. The function +#' expects a `data.table` with relocation data, individual identifiers and a +#' threshold argument. The threshold argument is used to specify the criteria +#' for distance between points which defines a group. Relocation data should be +#' in two columns representing the X and Y coordinates, or in a geometry column +#' prepared by the helper function [get_geometry()]. +#' +#' The `DT` must be a `data.table`. If your data is a `data.frame`, you can +#' convert it by reference using [data.table::setDT()]. +#' +#' The `id`, `timegroup` (and optional `splitBy`) arguments expect the names of +#' columns in `DT` which correspond to the individual identifier, and timegroup +#' (generated by `group_times`) and additional grouping columns. +#' +#' If a `threshold` is provided, it should match the units of the coordinates. +#' The `threshold` can be provided with units specified using the units package +#' (eg. `threshold = units::set_units(10, m)`) which will be checked against the +#' units of the coordinates using the `crs`. If units are not specified, the +#' `threshold` is assumed to be in the units of the coordinates. +#' +#' The `timegroup` argument is required to define the temporal groups within +#' which edge nearest neighbours are calculated. The intended framework is to +#' group rows temporally with `group_times` then spatially with `edge_nn`. If +#' you have already calculated temporal groups without `group_times`, you can +#' pass this column to the `timegroup` argument. Note that the expectation is +#' that each individual will be observed only once per timegroup. Caution that +#' accidentally including huge numbers of rows within timegroups can overload +#' your machine since all pairwise distances are calculated within each +#' timegroup. +#' +#' The `splitBy` argument offers further control over grouping. If within your +#' `DT`, you have multiple populations, subgroups or other distinct parts, you +#' can provide the name of the column which identifies them to `splitBy`. +#' `edge_nn` will only consider rows within each `splitBy` subgroup. +#' +#' See below under "Interface" for details on providing coordinates and under +#' "Distance function" for details on underlying distance function used. +#' +#' @inheritSection distance_to_centroid Interface +#' @inheritSection distance_to_centroid Distance function #' #' @param threshold (optional) spatial distance threshold to set maximum #' distance between an individual and their neighbour. -#' @param returnDist logical indicating if the distance between individuals -#' should be returned. If FALSE (default), only ID, NN columns (and timegroup, -#' splitBy columns if provided) are returned. If TRUE, another column -#' "distance" is returned indicating the distance between ID and NN. #' @inheritParams group_pts +#' @inheritParams edge_dist #' -#' @return `edge_nn` returns a `data.table` with three columns: -#' timegroup, ID and NN. If 'returnDist' is TRUE, column 'distance' is -#' returned indicating the distance between ID and NN. -#' -#' The ID and NN columns represent the edges defined by the nearest neighbours -#' (and temporal thresholds with `group_times`). +#' @return `edge_nn` returns a `data.table` with three columns: timegroup, ID +#' and NN. If 'returnDist' is TRUE, a column 'distance' is returned indicating +#' the distance between ID and NN. The ID and NN columns represent the edges +#' defined by the nearest neighbours (and temporal thresholds with +#' `group_times`). #' #' If an individual was alone in a timegroup or splitBy, or did not have any #' neighbours within the threshold distance, they are assigned NA for nearest #' neighbour. #' -#' Note: unlike many other functions (eg. `group_pts`) in `spatsoc`, -#' `edge_nn` needs to be reassigned. See details in -#' [FAQ](https://docs.ropensci.org/spatsoc/articles/faq.html). +#' Note: unlike many other functions (eg. `group_pts`) in `spatsoc`, `edge_nn` +#' needs to be reassigned. See details in +#' [FAQ](https://docs.ropensci.org/spatsoc/articles/faq.html). #' #' #' @export @@ -101,8 +101,10 @@ edge_nn <- function( id = NULL, coords = NULL, timegroup, + crs = NULL, splitBy = NULL, threshold = NULL, + geometry = 'geometry', returnDist = FALSE) { # NSE N <- NULL @@ -110,22 +112,14 @@ edge_nn <- function( assert_not_null(DT) assert_is_data_table(DT) - if (!is.null(threshold)) { - assert_inherits(threshold, 'numeric') - assert_relation(threshold, `>`, 0) - } - assert_not_null(id) assert_not_missing(timegroup) assert_not_null(timegroup) - check_cols <- c(timegroup, id, coords, splitBy) + check_cols <- c(timegroup, id, splitBy) assert_are_colnames(DT, check_cols) - assert_length(coords, 2) - assert_col_inherits(DT, coords, 'numeric') - if (any(unlist(lapply(DT[, .SD, .SDcols = timegroup], class)) %in% c('POSIXct', 'POSIXlt', 'Date', 'IDate', 'ITime', 'character'))) { warning( @@ -148,7 +142,7 @@ edge_nn <- function( } splitBy <- c(splitBy, timegroup) - if (DT[, .N, by = c(id, splitBy, timegroup)][N > 1, sum(N)] != 0) { + if (DT[, .N, by = c(id, splitBy)][N > 1, sum(N)] != 0) { warning( strwrap( prefix = " ", @@ -160,31 +154,130 @@ edge_nn <- function( ) } - DT[, { + if (is.null(coords)) { + if (!is.null(crs)) { + message('crs argument is ignored when coords are null, using geometry') + } + + assert_are_colnames(DT, geometry, ', did you run get_geometry()?') + assert_col_inherits(DT, geometry, 'sfc_POINT') + + crs <- sf::st_crs(DT[[geometry]]) + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) - distMatrix <- - as.matrix(stats::dist(.SD[, 2:3], method = 'euclidean')) - diag(distMatrix) <- NA + if (!is.null(threshold)) { + assert_threshold(threshold, crs) - if (is.null(threshold)) { - wm <- apply(distMatrix, MARGIN = 2, which.min) - } else { - distMatrix[distMatrix > threshold] <- NA - wm <- apply(distMatrix, MARGIN = 2, - function(x) ifelse(sum(!is.na(x)) > 0, which.min(x), NA)) + if (!inherits(threshold, 'units') && !identical(crs, sf::NA_crs_) && + !use_dist) { + threshold <- units::as_units(threshold, units(sf::st_crs(crs)$SemiMajor)) + } } - if (returnDist) { - w <- wm + (length(wm) * (as.numeric(names(wm)) - 1)) - l <- list(ID = .SD[[1]][as.numeric(names(wm))], - NN = .SD[[1]][wm], - distance = distMatrix[w]) - } else { - l <- list(ID = .SD[[1]][as.numeric(names(wm))], - NN = .SD[[1]][wm]) + DT[, { + + distMatrix <- calc_distance( + geometry_a = geo, + use_dist = use_dist + ) + diag(distMatrix) <- NA + + if (!is.null(threshold)) { + distMatrix[distMatrix > threshold] <- NA + } + wm <- as.numeric(apply(distMatrix, MARGIN = 2, which.min)) + + out_id <- id + if (all(is.na(wm))) { + out_nn <- NA_character_ + if (returnDist) { + out_dist <- NA_real_ + } + } else { + out_nn <- id[wm] + if (returnDist) { + w <- wm + (length(wm) * (seq_along(wm) - 1)) + out_dist <- distMatrix[w] + } + } + + l <- list(ID = out_id, + NN = out_nn) + + if (returnDist) { + l <- c(l, list(distance = out_dist)) + } + + l + }, + by = c(splitBy), + env = list(geo = geometry, id = id)] + } else { + if (is.null(crs)) { + crs <- sf::NA_crs_ + } + + assert_length(coords, 2) + assert_are_colnames(DT, coords) + assert_col_inherits(DT, coords, 'numeric') + + xcol <- data.table::first(coords) + ycol <- data.table::last(coords) + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + if (!is.null(threshold)) { + assert_threshold(threshold, crs) + + if (!inherits(threshold, 'units') && !identical(crs, sf::NA_crs_) && + !use_dist) { + threshold <- units::as_units(threshold, units(sf::st_crs(crs)$SemiMajor)) + } } - l - }, - by = splitBy, .SDcols = c(id, coords)] + + DT[, { + + distMatrix <- calc_distance( + x_a = x, + y_a = y, + crs = crs, + use_dist = use_dist + ) + diag(distMatrix) <- NA + + if (!is.null(threshold)) { + distMatrix[distMatrix > threshold] <- NA + } + wm <- as.numeric(apply(distMatrix, MARGIN = 2, which.min)) + + out_id <- id + if (all(is.na(wm))) { + out_nn <- NA_character_ + if (returnDist) { + out_dist <- NA_real_ + } + } else { + out_nn <- id[wm] + if (returnDist) { + w <- wm + (length(wm) * (seq_along(wm) - 1)) + out_dist <- distMatrix[w] + } + } + + l <- list(ID = out_id, + NN = out_nn) + + if (returnDist) { + l <- c(l, list(distance = out_dist)) + } + + l + }, + by = c(splitBy), + env = list(x = xcol, y = ycol, id = id)] + } + + } diff --git a/R/group_pts.R b/R/group_pts.R index b686e560..0455676a 100644 --- a/R/group_pts.R +++ b/R/group_pts.R @@ -4,22 +4,25 @@ #' `data.table` with relocation data, individual identifiers and a #' threshold argument. The threshold argument is used to specify the criteria #' for distance between points which defines a group. Relocation data should be -#' in two columns representing the X and Y coordinates. +#' in two columns representing the X and Y coordinates, or in a +#' geometry column prepared by the helper function +#' [get_geometry()]. #' #' The `DT` must be a `data.table`. If your data is a #' `data.frame`, you can convert it by reference using #' [data.table::setDT()] or by reassigning using #' [data.table::data.table()]. #' -#' The `id`, `coords`, `timegroup` (and optional `splitBy`) -#' arguments expect the names of a column in `DT` which correspond to the -#' individual identifier, X and Y coordinates, timegroup (typically generated by +#' The `id`, `timegroup` (and optional `splitBy`) +#' arguments expect the names of columns in `DT` which correspond to the +#' individual identifier, and timegroup (generated by #' `group_times`) and additional grouping columns. #' -#' The `threshold` must be provided in the units of the coordinates. The -#' `threshold` must be larger than 0. The coordinates must be planar -#' coordinates (e.g.: UTM). In the case of UTM, `threshold = 50` would -#' indicate a 50 m distance threshold. +#' The `threshold` provided should match the units of the coordinates. The +#' `threshold` can be provided with units specified using the units package (eg. +#' `threshold = units::set_units(10, m)`) which will be checked against the +#' units of the coordinates using the `crs`. If units are not specified, the +#' `threshold` is assumed to be in the units of the coordinates. #' #' The `timegroup` argument is required to define the temporal groups #' within which spatial groups are calculated. The intended framework is to @@ -32,13 +35,18 @@ #' rows within timegroups can overload your machine since all pairwise distances #' are calculated within each timegroup. #' -#' #' The `splitBy` argument offers further control over grouping. If within #' your `DT`, you have multiple populations, subgroups or other distinct #' parts, you can provide the name of the column which identifies them to #' `splitBy`. The grouping performed by `group_pts` will only consider #' rows within each `splitBy` subgroup. #' +#' See below under "Interface" for details on providing coordinates and under +#' "Distance function" for details on underlying distance function used. +#' +#' @inheritSection distance_to_centroid Interface +#' @inheritSection distance_to_centroid Distance function +#' #' @return `group_pts` returns the input `DT` appended with a #' `group` column. #' @@ -62,6 +70,10 @@ #' Note: the order is assumed X followed by Y column names. #' @param timegroup timegroup field in the DT within which the grouping will be #' calculated +#' @param crs numeric or character defining the coordinate reference +#' system to be passed to [sf::st_crs]. For example, either +#' `crs = "EPSG:32736"` or `crs = 32736`. If `crs = NULL`, the `crs` will be +#' internally set to `sf::NA_crs_`. #' @param splitBy (optional) character string or vector of grouping column #' name(s) upon which the grouping will be calculated #' @@ -100,28 +112,20 @@ group_pts <- function( id = NULL, coords = NULL, timegroup, - splitBy = NULL) { + crs = NULL, + splitBy = NULL, + geometry = 'geometry') { # due to NSE notes in R CMD check N <- withinGroup <- ..id <- ..coords <- group <- NULL assert_not_null(DT) assert_is_data_table(DT) - - assert_not_null(threshold) - assert_inherits(threshold, 'numeric') - assert_relation(threshold, `>`, 0) - assert_not_null(id) - - assert_length(coords, 2) - + assert_not_null(threshold) assert_not_missing(timegroup) - - check_colnames <- c(timegroup, id, coords, splitBy) + check_colnames <- c(timegroup, id, splitBy) assert_are_colnames(DT, check_colnames) - assert_col_inherits(DT, coords, 'numeric') - if (!is.null(timegroup)) { if (any(unlist(lapply(DT[, .SD, .SDcols = timegroup], class)) %in% c('POSIXct', 'POSIXlt', 'Date', 'IDate', 'ITime', 'character'))) { @@ -136,11 +140,6 @@ group_pts <- function( } } - if ('group' %in% colnames(DT)) { - message('group column will be overwritten by this function') - data.table::set(DT, j = 'group', value = NULL) - } - if (DT[, .N, by = c(id, splitBy, timegroup)][N > 1, sum(N)] != 0) { warning( strwrap( @@ -153,19 +152,91 @@ group_pts <- function( ) } - DT[, withinGroup := { - distMatrix <- - as.matrix(stats::dist(cbind( - get(..coords[1]), get(..coords[2]) - ), - method = 'euclidean')) - graphAdj <- - igraph::graph_from_adjacency_matrix(distMatrix <= threshold) - igraph::components(graphAdj)$membership - }, - by = c(splitBy, timegroup), .SDcols = c(coords, id)] - DT[, group := .GRP, - by = c(splitBy, timegroup, 'withinGroup')] + if (is.null(coords)) { + if (!is.null(crs)) { + message('crs argument is ignored when coords are null, using geometry') + } + + assert_are_colnames(DT, geometry, ', did you run get_geometry()?') + assert_col_inherits(DT, geometry, 'sfc_POINT') + + crs <- sf::st_crs(DT[[geometry]]) + assert_threshold(threshold, crs) + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + if (!inherits(threshold, 'units') && !identical(crs, sf::NA_crs_) && + !use_dist) { + threshold <- units::as_units(threshold, units(sf::st_crs(crs)$SemiMajor)) + } + + DT[!sf::st_is_empty(geo), withinGroup := { + distMatrix <- calc_distance( + geometry_a = geo, + use_dist = use_dist + ) + graphAdj <- + igraph::graph_from_adjacency_matrix(distMatrix <= threshold) + igraph::components(graphAdj)$membership + }, + by = c(splitBy, timegroup), + env = list(geo = geometry)] + + if ('group' %in% colnames(DT)) { + message('group column will be overwritten by this function') + data.table::set(DT, j = 'group', value = NULL) + } + + DT[!sf::st_is_empty(geo), group := .GRP, + by = c(splitBy, timegroup, 'withinGroup'), + env = list(geo = geometry)] + } else { + if (is.null(crs)) { + crs <- sf::NA_crs_ + } + + assert_threshold(threshold, crs) + assert_length(coords, 2) + assert_are_colnames(DT, coords) + assert_col_inherits(DT, coords, 'numeric') + + xcol <- data.table::first(coords) + ycol <- data.table::last(coords) + + if ('group' %in% colnames(DT)) { + message('group column will be overwritten by this function') + data.table::set(DT, j = 'group', value = NULL) + } + + use_dist <- isFALSE(sf::st_is_longlat(crs)) || identical(crs, sf::NA_crs_) + + if (!inherits(threshold, 'units') && !identical(crs, sf::NA_crs_) && + !use_dist) { + threshold <- units::as_units(threshold, units(sf::st_crs(crs)$SemiMajor)) + } + + DT[!is.na(x) & !is.na(y), + withinGroup := { + distMatrix <- calc_distance( + x_a = x, y_a = y, crs = crs, + use_dist = use_dist + ) + graphAdj <- + igraph::graph_from_adjacency_matrix(distMatrix <= threshold) + igraph::components(graphAdj)$membership + }, + by = c(splitBy, timegroup), + env = list(x = xcol, y = ycol) + ] + + DT[!is.na(x) & !is.na(y), + group := .GRP, + by = c(splitBy, timegroup, 'withinGroup'), + env = list(x = xcol, y = ycol) + ] + } + data.table::set(DT, j = 'withinGroup', value = NULL) + return(DT[]) } diff --git a/R/internal.R b/R/internal.R index a410f0aa..1bf3c9c4 100644 --- a/R/internal.R +++ b/R/internal.R @@ -99,7 +99,7 @@ assert_not_null <- function(x, ...) { return(invisible(NULL)) } -assert_relation <- function(x, fun, y, ...) { +assert_relation <- function(x, fun, y, ..., n = 1) { if (!(fun(x, y))) { rlang::abort( paste0( @@ -110,11 +110,10 @@ assert_relation <- function(x, fun, y, ...) { rlang::caller_arg(y), ... ), - call = rlang::caller_env() + call = rlang::caller_env(n = n) ) - } else { - return(invisible(NULL)) } + return(invisible(NULL)) } assert_threshold <- function(threshold = NULL, crs = NULL) { @@ -125,7 +124,7 @@ assert_threshold <- function(threshold = NULL, crs = NULL) { assert_units_match(threshold, sf::st_crs(crs)$SemiMajor, n = 2) assert_relation(threshold, `>`, units::as_units(0, units(threshold))) } - } else { + } else if (inherits(threshold, 'numeric')){ if (any(is.null(crs), is.na(crs))) { assert_relation(threshold, `>`, 0) } else { @@ -135,6 +134,10 @@ assert_threshold <- function(threshold = NULL, crs = NULL) { n = 2) } return(invisible(NULL)) + } else if (is.null(threshold)) { + return(invisible(NULL)) + } else { + rlang::abort('threshold must be of class numeric, units, or NULL') } } @@ -205,7 +208,6 @@ calc_centroid <- function(geometry, x, y, crs) { } } - #' Calculate direction #' #' **Internal function** - not developed to be used outside of spatsoc functions @@ -290,7 +292,7 @@ calc_direction <- function( #' #' **Internal function** - not developed to be used outside of spatsoc functions #' -#' Calculate distance using [sf::st_distance()] for one of the following combinations: +#' Calculate distance for one of the following combinations: #' - the distance matrix of points in geometry_a #' - the distance matrix of points in x_a, y_a #' - the pairwise distance between points in geometry_a and geometry_b @@ -305,10 +307,26 @@ calc_direction <- function( #' @param y_a,y_b Y coordinate column, numeric #' @param crs crs for x_a, y_a (and if provided, x_b, y_b) coordinates, #' ignored for geometry_a and geometry_b arguments +#' @param use_dist boolean predetermine if distance calculated via dist #' #' @returns #' -#' Distance with unit of measurement, see details in [sf::st_distance()] +#' The underlying distance function used depends on the crs of the coordinates +#' or geometry provided. +#' +#' - If the crs is longlat degrees (as determined by +#' [sf::st_is_longlat()]), the distance function is [sf::st_distance()] which +#' passes to [s2::s2_distance()] if [sf::sf_use_s2()] is TRUE and +#' [lwgeom::st_geod_distance()] if [sf::sf_use_s2()] is FALSE. The distance +#' returned has units set according to the crs. +#' +#' - If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +#' distance function used is [stats::dist()] (or Euclidean distance for +#' pairwise distances), maintaining expected behaviour from previous versions. +#' The distance returned does not have units set. +#' +#' +#' Note: in both cases, if the coordinates are NA then the result will be NA. #' #' @keywords internal #' @examples @@ -322,50 +340,96 @@ calc_direction <- function( #' Y = c(0, 0, 5, 5, 0, 0, NA_real_, NA_real_) #' ) #' # E, N, W, S -#' example[, spatsoc:::calc_distance(x_a = X, y_a = Y, crs = 4326)] +#' example[, spatsoc:::calc_distance(x_a = X, y_a = Y, crs = 4326, use_dist = FALSE)] calc_distance <- function( - geometry_a, geometry_b, - x_a, y_a, - x_b, y_b, - crs) { - if (!missing(geometry_a) && missing(x_a) && missing(y_a) && - missing(x_b) && missing(y_b)) { - if (!missing(geometry_b)) { - # Pairwise - sf::st_distance(geometry_a, geometry_b, by_element = TRUE) - } else { - # Matrix - sf::st_distance(geometry_a, by_element = FALSE) - } - } else if (missing(geometry_a) && !missing(x_a) && !missing(y_a)) { - if (!missing(x_b) && !missing(y_b)) { - # Pairwise - sf::st_distance( - x = sf::st_as_sf(data.frame(x_a, y_a), crs = crs, coords = seq.int(2), - na.fail = FALSE), - y = sf::st_as_sf(data.frame(x_b, y_b), crs = crs, coords = seq.int(2), - na.fail = FALSE), - by_element = TRUE - ) - } else { - # Matrix - sf::st_distance( - x = sf::st_as_sf(data.frame(x_a, y_a), crs = crs, coords = seq.int(2), - na.fail = FALSE), - by_element = FALSE - ) - } - } else { - rlang::abort(c( - 'arguments incorrectly provided, use one of the following combinations:', - '1. geometry_a', - '2. geometry_a and geometry_b', - '3. x_a, y_a', - '4. x_a, y_a, and x_b, y_b' - )) - } + geometry_a, geometry_b, + x_a, y_a, + x_b, y_b, + crs, + use_dist) { + if (!missing(geometry_a) && missing(x_a) && missing(y_a) && + missing(x_b) && missing(y_b)) { + if (!missing(geometry_b)) { + # Pairwise + if (use_dist) { + calc_distance_pairwise(geometry_a, geometry_b) + } else { + sf::st_distance(geometry_a, geometry_b, by_element = TRUE) + } + } else { + # Matrix + if (use_dist) { + coords <- sf::st_coordinates(geometry_a) + m <- as.matrix(stats::dist(coords)) + m[is.na(coords[, 1]),] <- NA_real_ + m[, is.na(coords[, 2])] <- NA_real_ + m + } else { + sf::st_distance(geometry_a, by_element = FALSE) + } + } + } else if (missing(geometry_a) && !missing(x_a) && !missing(y_a)) { + if (!missing(x_b) && !missing(y_b)) { + # Pairwise + if (use_dist) { + calc_distance_pairwise(x_a = x_a, y_a = y_a, x_b = x_b, y_b = y_b) + } else { + sf::st_distance( + x = sf::st_as_sf(data.frame(x_a, y_a), + crs = crs, coords = seq.int(2), + na.fail = FALSE + ), + y = sf::st_as_sf(data.frame(x_b, y_b), + crs = crs, coords = seq.int(2), + na.fail = FALSE + ), + by_element = TRUE + ) + } + } else { + # Matrix + if (use_dist) { + m <- as.matrix(stats::dist(cbind(x_a, y_a))) + m[is.na(x_a),] <- NA_real_ + m[, is.na(y_a)] <- NA_real_ + m + } else { + sf::st_distance( + x = sf::st_as_sf(data.frame(x_a, y_a), + crs = crs, coords = seq.int(2), + na.fail = FALSE + ), + by_element = FALSE + ) + } + } + } else { + rlang::abort(c( + 'arguments incorrectly provided, use one of the following combinations:', + '1. geometry_a', + '2. geometry_a and geometry_b', + '3. x_a, y_a, and crs', + '4. x_a, y_a, x_b, y_b, and crs' + )) + } } +# Internal calc pairwise distance used in internal calc_distance +calc_distance_pairwise <- function(geometry_a, geometry_b, + x_a, y_a, + x_b, y_b) { + if (!missing(geometry_a) && missing(x_a) && missing(y_a) && + !missing(geometry_b)) { + a <- sf::st_coordinates(geometry_a) + b <- sf::st_coordinates(geometry_b) + sqrt((a[, 1] - b[, 1]) ^ 2 + (a[, 2] - b[, 2]) ^ 2) + } else if (missing(geometry_a) && !missing(x_a) && !missing(y_a) && + !missing(x_b) && !missing(y_b)) { + sqrt((x_a - x_b) ^ 2 + (y_a - y_b) ^ 2) + } +} + + #' Difference of two angles measured in radians #' #' **Internal function** - not developed to be used outside of spatsoc functions diff --git a/R/leader_direction_group.R b/R/leader_direction_group.R index a9746efe..7479b3bc 100644 --- a/R/leader_direction_group.R +++ b/R/leader_direction_group.R @@ -159,7 +159,8 @@ leader_direction_group <- function( DT[, rank_position_group_direction := data.table::frank(-position_group_direction, - ties.method = ties.method), + ties.method = ties.method, + na.last = 'keep'), by = c(group)] } diff --git a/man/build_lines.Rd b/man/build_lines.Rd index 4f4b367f..1feac2a9 100644 --- a/man/build_lines.Rd +++ b/man/build_lines.Rd @@ -19,7 +19,8 @@ build_lines( \item{crs}{numeric or character defining the coordinate reference system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either -\code{crs = "EPSG:32736"} or \code{crs = 32736}.} +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} \item{id}{character string of ID column name} diff --git a/man/calc_distance.Rd b/man/calc_distance.Rd index 5e9aacde..f2eadc41 100644 --- a/man/calc_distance.Rd +++ b/man/calc_distance.Rd @@ -4,7 +4,7 @@ \alias{calc_distance} \title{Calculate distance} \usage{ -calc_distance(geometry_a, geometry_b, x_a, y_a, x_b, y_b, crs) +calc_distance(geometry_a, geometry_b, x_a, y_a, x_b, y_b, crs, use_dist) } \arguments{ \item{geometry_a, geometry_b}{sfc (simple feature geometry list column) @@ -16,15 +16,31 @@ from \code{\link[=get_geometry]{get_geometry()}}} \item{crs}{crs for x_a, y_a (and if provided, x_b, y_b) coordinates, ignored for geometry_a and geometry_b arguments} + +\item{use_dist}{boolean predetermine if distance calculated via dist} } \value{ -Distance with unit of measurement, see details in \code{\link[sf:geos_measures]{sf::st_distance()}} +The underlying distance function used depends on the crs of the coordinates +or geometry provided. +\itemize{ +\item If the crs is longlat degrees (as determined by +\code{\link[sf:st_is_longlat]{sf::st_is_longlat()}}), the distance function is \code{\link[sf:geos_measures]{sf::st_distance()}} which +passes to \code{\link[s2:s2_is_collection]{s2::s2_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is TRUE and +\code{\link[lwgeom:geod]{lwgeom::st_geod_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is FALSE. The distance +returned has units set according to the crs. +\item If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +distance function used is \code{\link[stats:dist]{stats::dist()}} (or Euclidean distance for +pairwise distances), maintaining expected behaviour from previous versions. +The distance returned does not have units set. +} + +Note: in both cases, if the coordinates are NA then the result will be NA. } \description{ \strong{Internal function} - not developed to be used outside of spatsoc functions } \details{ -Calculate distance using \code{\link[sf:geos_measures]{sf::st_distance()}} for one of the following combinations: +Calculate distance for one of the following combinations: \itemize{ \item the distance matrix of points in geometry_a \item the distance matrix of points in x_a, y_a @@ -48,6 +64,6 @@ example <- data.table( Y = c(0, 0, 5, 5, 0, 0, NA_real_, NA_real_) ) # E, N, W, S -example[, spatsoc:::calc_distance(x_a = X, y_a = Y, crs = 4326)] +example[, spatsoc:::calc_distance(x_a = X, y_a = Y, crs = 4326, use_dist = FALSE)] } \keyword{internal} diff --git a/man/direction_step.Rd b/man/direction_step.Rd index 65d29e83..4ba52a92 100644 --- a/man/direction_step.Rd +++ b/man/direction_step.Rd @@ -23,7 +23,8 @@ Note: the order is assumed X followed by Y column names.} \item{crs}{numeric or character defining the coordinate reference system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either -\code{crs = "EPSG:32736"} or \code{crs = 32736}.} +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} \item{splitBy}{(optional) character string or vector of grouping column name(s) upon which the grouping will be calculated} diff --git a/man/distance_to_centroid.Rd b/man/distance_to_centroid.Rd index c20fb2c0..3434aff3 100644 --- a/man/distance_to_centroid.Rd +++ b/man/distance_to_centroid.Rd @@ -8,8 +8,10 @@ distance_to_centroid( DT = NULL, coords = NULL, group = "group", + crs = NULL, return_rank = FALSE, - ties.method = NULL + ties.method = NULL, + geometry = "geometry" ) } \arguments{ @@ -22,48 +24,91 @@ Note: the order is assumed X followed by Y column names.} \item{group}{group column name, generated by \code{group_pts}, default 'group'} +\item{crs}{numeric or character defining the coordinate reference +system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} + \item{return_rank}{logical if rank distance should also be returned, default FALSE} \item{ties.method}{see \code{\link[data.table:frank]{?data.table::frank()}}} } \value{ -\code{distance_to_centroid} returns the input \code{DT} appended with -a \code{distance_centroid} column indicating the distance to group centroid -and, optionally, a \code{rank_distance_centroid} column indicating the -within group rank distance to group centroid (if \code{return_rank = TRUE}). +\code{distance_to_centroid} returns the input \code{DT} appended with a +\code{distance_centroid} column indicating the distance to the group centroid +and, optionally, a \code{rank_distance_centroid} column indicating the within +group rank distance to the group centroid (if \code{return_rank = TRUE}). A message is returned when \code{distance_centroid} and optional -\code{rank_distance_centroid} columns already exist in the input \code{DT}, -because they will be overwritten. +\code{rank_distance_centroid} columns already exist in the input \code{DT}, because +they will be overwritten. See details for appending outputs using modify-by-reference in the \href{https://docs.ropensci.org/spatsoc/articles/faq.html}{FAQ}. } \description{ \code{distance_to_centroid} calculates the distance of each relocation to the -centroid of the spatiotemporal group identified by \code{group_pts}. The -function expects a \code{data.table} with relocation data appended with a -\code{group} column from \code{group_pts} and centroid columns from -\code{centroid_group}. Relocation data should be in planar coordinates -provided in two columns representing the X and Y coordinates. +centroid of the spatiotemporal group identified by \code{group_pts}. The function +expects a \code{data.table} with relocation data appended with a \code{group} column +from \code{group_pts} and centroid columns from \code{centroid_group}. Relocation data +should be provided in two columns representing the X and Y coordinates, or in +a geometry column prepared by the helper function \code{\link[=get_geometry]{get_geometry()}}. } \details{ -The \code{DT} must be a \code{data.table}. If your data is a -\code{data.frame}, you can convert it by reference using -\code{\link[data.table:setDT]{data.table::setDT()}} or by reassigning using +The \code{DT} must be a \code{data.table}. If your data is a \code{data.frame}, you can +convert it by reference using \code{\link[data.table:setDT]{data.table::setDT()}} or by reassigning using \code{\link[data.table:data.table]{data.table::data.table()}}. -This function expects a \code{group} column present generated with the -\code{group_pts} function and centroid coordinate columns generated with the -\code{centroid_group} function. The \code{coords} and \code{group} arguments -expect the names of columns in \code{DT} which correspond to the X and Y -coordinates and group columns. The \code{return_rank} argument controls if -the rank of each individual's distance to the group centroid is also -returned. The \code{ties.method} argument is passed to -\code{data.table::frank}, see details at -\code{\link[data.table:frank]{?data.table::frank()}}. +This function expects a \code{group} column present generated with the \code{group_pts} +function and centroid coordinate column(s) generated with the +\code{centroid_group} function. The \code{group} arguments expect the names of columns +in \code{DT} which correspond to the group column. The \code{return_rank} argument +controls if the rank of each individual's distance to the group centroid is +also returned. The \code{ties.method} argument is passed to \code{data.table::frank}, +see details at \code{\link[data.table:frank]{?data.table::frank()}}. + +See below under "Interface" for details on providing coordinates and under +"Distance function" for details on underlying distance function used. +} +\section{Interface}{ + +Two interfaces are available for providing coordinates: +\enumerate{ +\item Provide \code{coords} and \code{crs}. The \code{coords} argument expects the names of +the X and Y coordinate columns. The \code{crs} argument expects a character +string or numeric defining the coordinate reference system to be passed to +\link[sf:st_crs]{sf::st_crs}. For example, for UTM zone 36S (EPSG 32736), the crs argument +is \code{crs = "EPSG:32736"} or \code{crs = 32736}. See \url{https://spatialreference.org} +for a list of EPSG codes. +\item (New!) Provide \code{geometry}. The \code{geometry} argument allows the user to +supply a \code{geometry} column that represents the coordinates as a simple +feature geometry list column. This interface expects the user to prepare +their input DT with \code{\link[=get_geometry]{get_geometry()}}. To use this interface, leave the +\code{coords} and \code{crs} arguments \code{NULL}, and the default argument for \code{geometry} +('geometry') will be used directly. } +} + +\section{Distance function}{ + + +The underlying distance function used depends on the crs of the coordinates +or geometry provided. +\itemize{ +\item If the crs is longlat degrees (as determined by +\code{\link[sf:st_is_longlat]{sf::st_is_longlat()}}), the distance function is \code{\link[sf:geos_measures]{sf::st_distance()}} which +passes to \code{\link[s2:s2_is_collection]{s2::s2_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is TRUE and +\code{\link[lwgeom:geod]{lwgeom::st_geod_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is FALSE. The distance +returned has units set according to the crs. +\item If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +distance function used is \code{\link[stats:dist]{stats::dist()}}, maintaining expected behaviour +from previous versions. The distance returned does not have units set. +} + +Note: in both cases, if the coordinates are NA then the result will be NA. +} + \examples{ # Load data.table library(data.table) @@ -90,6 +135,7 @@ distance_to_centroid( DT, coords = c('X', 'Y'), group = 'group', + crs = 32736, return_rank = TRUE ) } @@ -102,7 +148,7 @@ See examples of using distance to group centroid: } } \seealso{ -\link{centroid_group}, \link{group_pts} +\link{centroid_group}, \link{group_pts}, \code{\link[sf:geos_measures]{sf::st_distance()}} Other Distance functions: \code{\link{distance_to_leader}()}, diff --git a/man/distance_to_leader.Rd b/man/distance_to_leader.Rd index df1b68af..f0d1db16 100644 --- a/man/distance_to_leader.Rd +++ b/man/distance_to_leader.Rd @@ -4,7 +4,13 @@ \alias{distance_to_leader} \title{Distance to group leader} \usage{ -distance_to_leader(DT = NULL, coords = NULL, group = "group") +distance_to_leader( + DT = NULL, + coords = NULL, + group = "group", + crs = NULL, + geometry = "geometry" +) } \arguments{ \item{DT}{input data.table with 'rank_position_group_direction' column @@ -16,40 +22,82 @@ Note: the order is assumed X followed by Y column names.} \item{group}{group column name, generated by \code{group_pts}, default 'group'} + +\item{crs}{numeric or character defining the coordinate reference +system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} } \value{ -\code{distance_to_leader} returns the input \code{DT} appended with -a \code{distance_leader} column indicating the distance to the group -leader. +\code{distance_to_leader} returns the input \code{DT} appended with a +\code{distance_leader} column indicating the distance to the group leader. -A message is returned when the \code{distance_leader} column already -exist in the input \code{DT} because it will be overwritten. +A message is returned when the \code{distance_leader} column already exist in +the input \code{DT} because it will be overwritten. See details for appending outputs using modify-by-reference in the \href{https://docs.ropensci.org/spatsoc/articles/faq.html}{FAQ}. } \description{ \code{distance_to_leader} calculates the distance to the leader of each -spatiotemporal group. The function expects a \code{data.table} with -relocation data appended with a \code{rank_position_group_direction} column -indicating the ranked position along the group direction generated with -\code{leader_direction_group(return_rank = TRUE)}. Relocation data should be -in planar coordinates provided in two columns representing the X and Y -coordinates. +spatiotemporal group. The function expects a \code{data.table} with relocation +data appended with a \code{rank_position_group_direction} column indicating the +ranked position along the group direction generated with +\code{leader_direction_group(return_rank = TRUE)}. Relocation data should be in +two columns representing the X and Y coordinates, or in a geometry column +prepared by the helper function \code{\link[=get_geometry]{get_geometry()}}. } \details{ -The \code{DT} must be a \code{data.table}. If your data is a -\code{data.frame}, you can convert it by reference using -\code{\link[data.table:setDT]{data.table::setDT()}} or by reassigning using +The \code{DT} must be a \code{data.table}. If your data is a \code{data.frame}, you can +convert it by reference using \code{\link[data.table:setDT]{data.table::setDT()}} or by reassigning using \code{\link[data.table:data.table]{data.table::data.table()}}. -This function expects a \code{rank_position_group_direction} column -generated with \code{leader_direction_group(return_rank = TRUE)}, -a \code{group} column generated with the -\code{group_pts} function. The \code{coords} and \code{group} arguments -expect the names of columns in \code{DT} which correspond to the X and Y -coordinates and group columns. +This function expects a \code{rank_position_group_direction} column generated with +\code{leader_direction_group(return_rank = TRUE)}, a \code{group} column generated with +the \code{group_pts} function. The \code{group} argument expect the names of the column +in \code{DT} which corresponds to the group column. + +See below under "Interface" for details on providing coordinates and under +"Distance function" for details on underlying distance function used. +} +\section{Interface}{ + +Two interfaces are available for providing coordinates: +\enumerate{ +\item Provide \code{coords} and \code{crs}. The \code{coords} argument expects the names of +the X and Y coordinate columns. The \code{crs} argument expects a character +string or numeric defining the coordinate reference system to be passed to +\link[sf:st_crs]{sf::st_crs}. For example, for UTM zone 36S (EPSG 32736), the crs argument +is \code{crs = "EPSG:32736"} or \code{crs = 32736}. See \url{https://spatialreference.org} +for a list of EPSG codes. +\item (New!) Provide \code{geometry}. The \code{geometry} argument allows the user to +supply a \code{geometry} column that represents the coordinates as a simple +feature geometry list column. This interface expects the user to prepare +their input DT with \code{\link[=get_geometry]{get_geometry()}}. To use this interface, leave the +\code{coords} and \code{crs} arguments \code{NULL}, and the default argument for \code{geometry} +('geometry') will be used directly. } +} + +\section{Distance function}{ + + +The underlying distance function used depends on the crs of the coordinates +or geometry provided. +\itemize{ +\item If the crs is longlat degrees (as determined by +\code{\link[sf:st_is_longlat]{sf::st_is_longlat()}}), the distance function is \code{\link[sf:geos_measures]{sf::st_distance()}} which +passes to \code{\link[s2:s2_is_collection]{s2::s2_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is TRUE and +\code{\link[lwgeom:geod]{lwgeom::st_geod_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is FALSE. The distance +returned has units set according to the crs. +\item If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +distance function used is \code{\link[stats:dist]{stats::dist()}}, maintaining expected behaviour +from previous versions. The distance returned does not have units set. +} + +Note: in both cases, if the coordinates are NA then the result will be NA. +} + \examples{ # Load data.table library(data.table) @@ -93,7 +141,7 @@ leader_direction_group( ) # Calculate distance to leader -distance_to_leader(DT, coords = c('X', 'Y')) +distance_to_leader(DT, coords = c('X', 'Y'), crs = 32736) } \references{ See examples of using distance to leader and position within group: @@ -104,7 +152,8 @@ See examples of using distance to leader and position within group: } } \seealso{ -\link{direction_to_leader}, \link{leader_direction_group}, \link{group_pts} +\link{direction_to_leader}, \link{leader_direction_group}, \link{group_pts}, +\code{\link[sf:geos_measures]{sf::st_distance()}} Other Distance functions: \code{\link{distance_to_centroid}()}, diff --git a/man/edge_direction.Rd b/man/edge_direction.Rd index 1aebbc89..a382c9b1 100644 --- a/man/edge_direction.Rd +++ b/man/edge_direction.Rd @@ -29,7 +29,8 @@ Note: the order is assumed X followed by Y column names.} \item{crs}{numeric or character defining the coordinate reference system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either -\code{crs = "EPSG:32736"} or \code{crs = 32736}.} +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} \item{timegroup}{character string of timegroup column name, default "timegroup"} diff --git a/man/edge_dist.Rd b/man/edge_dist.Rd index af402b79..470b48eb 100644 --- a/man/edge_dist.Rd +++ b/man/edge_dist.Rd @@ -10,7 +10,9 @@ edge_dist( id = NULL, coords = NULL, timegroup, + crs = NULL, splitBy = NULL, + geometry = "geometry", returnDist = FALSE, fillNA = TRUE ) @@ -29,13 +31,19 @@ Note: the order is assumed X followed by Y column names.} \item{timegroup}{timegroup field in the DT within which the grouping will be calculated} +\item{crs}{numeric or character defining the coordinate reference +system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} + \item{splitBy}{(optional) character string or vector of grouping column name(s) upon which the grouping will be calculated} \item{returnDist}{logical indicating if the distance between individuals -should be returned. If FALSE (default), only ID1, ID2 columns (and -timegroup, splitBy columns if provided) are returned. If TRUE, another -column "distance" is returned indicating the distance between ID1 and ID2.} +should be returned. If FALSE (default), only individual columns (and +timegroup, splitBy columns if provided) are returned. If TRUE, a column +"distance" is also returned indicating the distance between individuals in +the units of the \code{crs}, or if \code{crs = NULL} no units are set.} \item{fillNA}{logical indicating if NAs should be returned for individuals that were not within the threshold distance of any other. If TRUE, NAs are @@ -43,10 +51,9 @@ returned. If FALSE, only edges between individuals within the threshold distance are returned.} } \value{ -\code{edge_dist} returns a \code{data.table} with columns ID1, ID2, -timegroup (if supplied) and any columns provided in splitBy. If -'returnDist' is TRUE, column 'distance' is returned indicating the distance -between ID1 and ID2. +\code{edge_dist} returns a \code{data.table} with columns ID1, ID2, timegroup +(if supplied) and any columns provided in splitBy. If 'returnDist' is TRUE, +column 'distance' is returned indicating the distance between ID1 and ID2. The ID1 and ID2 columns represent the edges defined by the spatial (and temporal with \code{group_times}) thresholds. @@ -56,45 +63,84 @@ Note: unlike many other functions (eg. \code{group_pts}) in \code{spatsoc}, \href{https://docs.ropensci.org/spatsoc/articles/faq.html}{FAQ}. } \description{ -\code{edge_dist} returns edge-lists defined by a spatial distance within the -user defined threshold. The function expects a \code{data.table} with -relocation data, individual identifiers and a threshold argument. The -threshold argument is used to specify the criteria for distance between -points which defines a group. Relocation data should be in two columns -representing the X and Y coordinates. +\code{edge_dist} returns edge-lists defined by a spatial distance within the user +defined threshold. The function expects a \code{data.table} with relocation data, +individual identifiers and a threshold argument. The threshold argument is +used to specify the criteria for distance between points which defines a +group. Relocation data should be in two columns representing the X and Y +coordinates, or in a geometry column prepared by the helper function +\code{\link[=get_geometry]{get_geometry()}}. } \details{ -The \code{DT} must be a \code{data.table}. If your data is a -\code{data.frame}, you can convert it by reference using -\code{\link[data.table:setDT]{data.table::setDT()}}. - -The \code{id}, \code{coords}, \code{timegroup} (and optional \code{splitBy}) -arguments expect the names of a column in \code{DT} which correspond to the -individual identifier, X and Y coordinates, timegroup (generated by -\code{group_times}) and additional grouping columns. - -If provided, the \code{threshold} must be provided in the units of the -coordinates and must be larger than 0. If the \code{threshold} is NULL, the -distance to all other individuals will be returned. The coordinates must be -planar coordinates (e.g.: UTM). In the case of UTM, \code{threshold = 50} -would indicate a 50 m distance threshold. - -The \code{timegroup} argument is required to define the temporal groups -within which edges are calculated. The intended framework is to group rows -temporally with \code{group_times} then spatially with -\code{edge_dist}. If you have already calculated temporal groups without -\code{group_times}, you can pass this column to the \code{timegroup} -argument. Note that the expectation is that each individual will be observed -only once per timegroup. Caution that accidentally including huge numbers of -rows within timegroups can overload your machine since all pairwise distances -are calculated within each timegroup. - -The \code{splitBy} argument offers further control over grouping. If within -your \code{DT}, you have multiple populations, subgroups or other distinct -parts, you can provide the name of the column which identifies them to -\code{splitBy}. \code{edge_dist} will only consider rows within each -\code{splitBy} subgroup. +The \code{DT} must be a \code{data.table}. If your data is a \code{data.frame}, you can +convert it by reference using \code{\link[data.table:setDT]{data.table::setDT()}}. + +The \code{id}, \code{timegroup} (and optional \code{splitBy}) arguments expect the names of +columns in \code{DT} which correspond to the individual identifier, and timegroup +(generated by \code{group_times}) and additional grouping columns. + +The \code{threshold} provided should match the units of the coordinates. The +\code{threshold} can be provided with units specified using the units package (eg. +\code{threshold = units::set_units(10, m)}) which will be checked against the +units of the coordinates using the \code{crs}. If units are not specified, the +\code{threshold} is assumed to be in the units of the coordinates. + +The \code{timegroup} argument is required to define the temporal groups within +which edges are calculated. The intended framework is to group rows +temporally with \code{group_times} then spatially with \code{edge_dist}. If you have +already calculated temporal groups without \code{group_times}, you can pass this +column to the \code{timegroup} argument. Note that the expectation is that each +individual will be observed only once per timegroup. Caution that +accidentally including huge numbers of rows within timegroups can overload +your machine since all pairwise distances are calculated within each +timegroup. + +The \code{splitBy} argument offers further control over grouping. If within your +\code{DT}, you have multiple populations, subgroups or other distinct parts, you +can provide the name of the column which identifies them to \code{splitBy}. +\code{edge_dist} will only consider rows within each \code{splitBy} subgroup. + +See below under "Interface" for details on providing coordinates and under +"Distance function" for details on underlying distance function used. +} +\section{Interface}{ + +Two interfaces are available for providing coordinates: +\enumerate{ +\item Provide \code{coords} and \code{crs}. The \code{coords} argument expects the names of +the X and Y coordinate columns. The \code{crs} argument expects a character +string or numeric defining the coordinate reference system to be passed to +\link[sf:st_crs]{sf::st_crs}. For example, for UTM zone 36S (EPSG 32736), the crs argument +is \code{crs = "EPSG:32736"} or \code{crs = 32736}. See \url{https://spatialreference.org} +for a list of EPSG codes. +\item (New!) Provide \code{geometry}. The \code{geometry} argument allows the user to +supply a \code{geometry} column that represents the coordinates as a simple +feature geometry list column. This interface expects the user to prepare +their input DT with \code{\link[=get_geometry]{get_geometry()}}. To use this interface, leave the +\code{coords} and \code{crs} arguments \code{NULL}, and the default argument for \code{geometry} +('geometry') will be used directly. +} } + +\section{Distance function}{ + + +The underlying distance function used depends on the crs of the coordinates +or geometry provided. +\itemize{ +\item If the crs is longlat degrees (as determined by +\code{\link[sf:st_is_longlat]{sf::st_is_longlat()}}), the distance function is \code{\link[sf:geos_measures]{sf::st_distance()}} which +passes to \code{\link[s2:s2_is_collection]{s2::s2_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is TRUE and +\code{\link[lwgeom:geod]{lwgeom::st_geod_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is FALSE. The distance +returned has units set according to the crs. +\item If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +distance function used is \code{\link[stats:dist]{stats::dist()}}, maintaining expected behaviour +from previous versions. The distance returned does not have units set. +} + +Note: in both cases, if the coordinates are NA then the result will be NA. +} + \examples{ # Load data.table library(data.table) @@ -116,11 +162,14 @@ edges <- edge_dist( id = 'ID', coords = c('X', 'Y'), timegroup = 'timegroup', + crs = 32736, returnDist = TRUE, fillNA = TRUE ) } \seealso{ +\code{\link[sf:geos_measures]{sf::st_distance()}} + Other Edge-list generation: \code{\link{edge_alignment}()}, \code{\link{edge_delay}()}, diff --git a/man/edge_nn.Rd b/man/edge_nn.Rd index c6d6882c..ad225f99 100644 --- a/man/edge_nn.Rd +++ b/man/edge_nn.Rd @@ -9,8 +9,10 @@ edge_nn( id = NULL, coords = NULL, timegroup, + crs = NULL, splitBy = NULL, threshold = NULL, + geometry = "geometry", returnDist = FALSE ) } @@ -25,6 +27,11 @@ Note: the order is assumed X followed by Y column names.} \item{timegroup}{timegroup field in the DT within which the grouping will be calculated} +\item{crs}{numeric or character defining the coordinate reference +system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} + \item{splitBy}{(optional) character string or vector of grouping column name(s) upon which the grouping will be calculated} @@ -32,65 +39,104 @@ name(s) upon which the grouping will be calculated} distance between an individual and their neighbour.} \item{returnDist}{logical indicating if the distance between individuals -should be returned. If FALSE (default), only ID, NN columns (and timegroup, -splitBy columns if provided) are returned. If TRUE, another column -"distance" is returned indicating the distance between ID and NN.} +should be returned. If FALSE (default), only individual columns (and +timegroup, splitBy columns if provided) are returned. If TRUE, a column +"distance" is also returned indicating the distance between individuals in +the units of the \code{crs}, or if \code{crs = NULL} no units are set.} } \value{ -\code{edge_nn} returns a \code{data.table} with three columns: -timegroup, ID and NN. If 'returnDist' is TRUE, column 'distance' is -returned indicating the distance between ID and NN. - -The ID and NN columns represent the edges defined by the nearest neighbours -(and temporal thresholds with \code{group_times}). +\code{edge_nn} returns a \code{data.table} with three columns: timegroup, ID +and NN. If 'returnDist' is TRUE, a column 'distance' is returned indicating +the distance between ID and NN. The ID and NN columns represent the edges +defined by the nearest neighbours (and temporal thresholds with +\code{group_times}). If an individual was alone in a timegroup or splitBy, or did not have any neighbours within the threshold distance, they are assigned NA for nearest neighbour. -Note: unlike many other functions (eg. \code{group_pts}) in \code{spatsoc}, -\code{edge_nn} needs to be reassigned. See details in +Note: unlike many other functions (eg. \code{group_pts}) in \code{spatsoc}, \code{edge_nn} +needs to be reassigned. See details in \href{https://docs.ropensci.org/spatsoc/articles/faq.html}{FAQ}. } \description{ -\code{edge_nn} returns edge-lists defined by the nearest neighbour. The -function expects a \code{data.table} with relocation data, individual -identifiers and a threshold argument. The threshold argument is used to -specify the criteria for distance between points which defines a group. -Relocation data should be in two columns representing the X and Y -coordinates. +\code{edge_nn} returns edge-lists defined by the nearest neighbour. The function +expects a \code{data.table} with relocation data, individual identifiers and a +threshold argument. The threshold argument is used to specify the criteria +for distance between points which defines a group. Relocation data should be +in two columns representing the X and Y coordinates, or in a geometry column +prepared by the helper function \code{\link[=get_geometry]{get_geometry()}}. } \details{ -The \code{DT} must be a \code{data.table}. If your data is a -\code{data.frame}, you can convert it by reference using -\code{\link[data.table:setDT]{data.table::setDT()}}. - -The \code{id}, \code{coords}, \code{timegroup} (and optional \code{splitBy}) -arguments expect the names of a column in \code{DT} which correspond to the -individual identifier, X and Y coordinates, timegroup (generated by -\code{group_times}) and additional grouping columns. - -The \code{threshold} must be provided in the units of the coordinates. The -\code{threshold} must be larger than 0. The coordinates must be planar -coordinates (e.g.: UTM). In the case of UTM, a \code{threshold = 50} would -indicate a 50 m distance threshold. - -The \code{timegroup} argument is required to define the temporal groups -within which edge nearest neighbours are calculated. The intended framework -is to group rows temporally with \code{group_times} then spatially -with \code{edge_nn}. If you have already calculated temporal groups without -\code{group_times}, you can pass this column to the \code{timegroup} -argument. Note that the expectation is that each individual will be observed -only once per timegroup. Caution that accidentally including huge numbers of -rows within timegroups can overload your machine since all pairwise distances -are calculated within each timegroup. - -The \code{splitBy} argument offers further control over grouping. If within -your \code{DT}, you have multiple populations, subgroups or other distinct -parts, you can provide the name of the column which identifies them to -\code{splitBy}. \code{edge_nn} will only consider rows within each -\code{splitBy} subgroup. +The \code{DT} must be a \code{data.table}. If your data is a \code{data.frame}, you can +convert it by reference using \code{\link[data.table:setDT]{data.table::setDT()}}. + +The \code{id}, \code{timegroup} (and optional \code{splitBy}) arguments expect the names of +columns in \code{DT} which correspond to the individual identifier, and timegroup +(generated by \code{group_times}) and additional grouping columns. + +If a \code{threshold} is provided, it should match the units of the coordinates. +The \code{threshold} can be provided with units specified using the units package +(eg. \code{threshold = units::set_units(10, m)}) which will be checked against the +units of the coordinates using the \code{crs}. If units are not specified, the +\code{threshold} is assumed to be in the units of the coordinates. + +The \code{timegroup} argument is required to define the temporal groups within +which edge nearest neighbours are calculated. The intended framework is to +group rows temporally with \code{group_times} then spatially with \code{edge_nn}. If +you have already calculated temporal groups without \code{group_times}, you can +pass this column to the \code{timegroup} argument. Note that the expectation is +that each individual will be observed only once per timegroup. Caution that +accidentally including huge numbers of rows within timegroups can overload +your machine since all pairwise distances are calculated within each +timegroup. + +The \code{splitBy} argument offers further control over grouping. If within your +\code{DT}, you have multiple populations, subgroups or other distinct parts, you +can provide the name of the column which identifies them to \code{splitBy}. +\code{edge_nn} will only consider rows within each \code{splitBy} subgroup. + +See below under "Interface" for details on providing coordinates and under +"Distance function" for details on underlying distance function used. +} +\section{Interface}{ + +Two interfaces are available for providing coordinates: +\enumerate{ +\item Provide \code{coords} and \code{crs}. The \code{coords} argument expects the names of +the X and Y coordinate columns. The \code{crs} argument expects a character +string or numeric defining the coordinate reference system to be passed to +\link[sf:st_crs]{sf::st_crs}. For example, for UTM zone 36S (EPSG 32736), the crs argument +is \code{crs = "EPSG:32736"} or \code{crs = 32736}. See \url{https://spatialreference.org} +for a list of EPSG codes. +\item (New!) Provide \code{geometry}. The \code{geometry} argument allows the user to +supply a \code{geometry} column that represents the coordinates as a simple +feature geometry list column. This interface expects the user to prepare +their input DT with \code{\link[=get_geometry]{get_geometry()}}. To use this interface, leave the +\code{coords} and \code{crs} arguments \code{NULL}, and the default argument for \code{geometry} +('geometry') will be used directly. } +} + +\section{Distance function}{ + + +The underlying distance function used depends on the crs of the coordinates +or geometry provided. +\itemize{ +\item If the crs is longlat degrees (as determined by +\code{\link[sf:st_is_longlat]{sf::st_is_longlat()}}), the distance function is \code{\link[sf:geos_measures]{sf::st_distance()}} which +passes to \code{\link[s2:s2_is_collection]{s2::s2_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is TRUE and +\code{\link[lwgeom:geod]{lwgeom::st_geod_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is FALSE. The distance +returned has units set according to the crs. +\item If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +distance function used is \code{\link[stats:dist]{stats::dist()}}, maintaining expected behaviour +from previous versions. The distance returned does not have units set. +} + +Note: in both cases, if the coordinates are NA then the result will be NA. +} + \examples{ # Load data.table library(data.table) diff --git a/man/group_lines.Rd b/man/group_lines.Rd index 97c1839b..e875b6e0 100644 --- a/man/group_lines.Rd +++ b/man/group_lines.Rd @@ -25,7 +25,8 @@ crs. Use \code{threshold = 0} to compare intersection without buffering.} \item{crs}{numeric or character defining the coordinate reference system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either -\code{crs = "EPSG:32736"} or \code{crs = 32736}.} +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} \item{id}{character string of ID column name} diff --git a/man/group_polys.Rd b/man/group_polys.Rd index 17bab6a0..a3ca87df 100644 --- a/man/group_polys.Rd +++ b/man/group_polys.Rd @@ -29,7 +29,8 @@ area and proportion of overlap (when \code{TRUE})} \item{crs}{numeric or character defining the coordinate reference system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either -\code{crs = "EPSG:32736"} or \code{crs = 32736}.} +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} \item{id}{character string of ID column name} diff --git a/man/group_pts.Rd b/man/group_pts.Rd index bfa32e7a..665cb886 100644 --- a/man/group_pts.Rd +++ b/man/group_pts.Rd @@ -10,7 +10,9 @@ group_pts( id = NULL, coords = NULL, timegroup, - splitBy = NULL + crs = NULL, + splitBy = NULL, + geometry = "geometry" ) } \arguments{ @@ -27,6 +29,11 @@ Note: the order is assumed X followed by Y column names.} \item{timegroup}{timegroup field in the DT within which the grouping will be calculated} +\item{crs}{numeric or character defining the coordinate reference +system to be passed to \link[sf:st_crs]{sf::st_crs}. For example, either +\code{crs = "EPSG:32736"} or \code{crs = 32736}. If \code{crs = NULL}, the \code{crs} will be +internally set to \code{sf::NA_crs_}.} + \item{splitBy}{(optional) character string or vector of grouping column name(s) upon which the grouping will be calculated} } @@ -51,7 +58,9 @@ See details for appending outputs using modify-by-reference in the \code{data.table} with relocation data, individual identifiers and a threshold argument. The threshold argument is used to specify the criteria for distance between points which defines a group. Relocation data should be -in two columns representing the X and Y coordinates. +in two columns representing the X and Y coordinates, or in a +geometry column prepared by the helper function +\code{\link[=get_geometry]{get_geometry()}}. } \details{ The \code{DT} must be a \code{data.table}. If your data is a @@ -59,15 +68,16 @@ The \code{DT} must be a \code{data.table}. If your data is a \code{\link[data.table:setDT]{data.table::setDT()}} or by reassigning using \code{\link[data.table:data.table]{data.table::data.table()}}. -The \code{id}, \code{coords}, \code{timegroup} (and optional \code{splitBy}) -arguments expect the names of a column in \code{DT} which correspond to the -individual identifier, X and Y coordinates, timegroup (typically generated by +The \code{id}, \code{timegroup} (and optional \code{splitBy}) +arguments expect the names of columns in \code{DT} which correspond to the +individual identifier, and timegroup (generated by \code{group_times}) and additional grouping columns. -The \code{threshold} must be provided in the units of the coordinates. The -\code{threshold} must be larger than 0. The coordinates must be planar -coordinates (e.g.: UTM). In the case of UTM, \code{threshold = 50} would -indicate a 50 m distance threshold. +The \code{threshold} provided should match the units of the coordinates. The +\code{threshold} can be provided with units specified using the units package (eg. +\code{threshold = units::set_units(10, m)}) which will be checked against the +units of the coordinates using the \code{crs}. If units are not specified, the +\code{threshold} is assumed to be in the units of the coordinates. The \code{timegroup} argument is required to define the temporal groups within which spatial groups are calculated. The intended framework is to @@ -85,7 +95,48 @@ your \code{DT}, you have multiple populations, subgroups or other distinct parts, you can provide the name of the column which identifies them to \code{splitBy}. The grouping performed by \code{group_pts} will only consider rows within each \code{splitBy} subgroup. + +See below under "Interface" for details on providing coordinates and under +"Distance function" for details on underlying distance function used. +} +\section{Interface}{ + +Two interfaces are available for providing coordinates: +\enumerate{ +\item Provide \code{coords} and \code{crs}. The \code{coords} argument expects the names of +the X and Y coordinate columns. The \code{crs} argument expects a character +string or numeric defining the coordinate reference system to be passed to +\link[sf:st_crs]{sf::st_crs}. For example, for UTM zone 36S (EPSG 32736), the crs argument +is \code{crs = "EPSG:32736"} or \code{crs = 32736}. See \url{https://spatialreference.org} +for a list of EPSG codes. +\item (New!) Provide \code{geometry}. The \code{geometry} argument allows the user to +supply a \code{geometry} column that represents the coordinates as a simple +feature geometry list column. This interface expects the user to prepare +their input DT with \code{\link[=get_geometry]{get_geometry()}}. To use this interface, leave the +\code{coords} and \code{crs} arguments \code{NULL}, and the default argument for \code{geometry} +('geometry') will be used directly. } +} + +\section{Distance function}{ + + +The underlying distance function used depends on the crs of the coordinates +or geometry provided. +\itemize{ +\item If the crs is longlat degrees (as determined by +\code{\link[sf:st_is_longlat]{sf::st_is_longlat()}}), the distance function is \code{\link[sf:geos_measures]{sf::st_distance()}} which +passes to \code{\link[s2:s2_is_collection]{s2::s2_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is TRUE and +\code{\link[lwgeom:geod]{lwgeom::st_geod_distance()}} if \code{\link[sf:s2]{sf::sf_use_s2()}} is FALSE. The distance +returned has units set according to the crs. +\item If the crs is not longlat degrees (eg. NULL, NA_crs_, or projected), the +distance function used is \code{\link[stats:dist]{stats::dist()}}, maintaining expected behaviour +from previous versions. The distance returned does not have units set. +} + +Note: in both cases, if the coordinates are NA then the result will be NA. +} + \examples{ # Load data.table library(data.table) diff --git a/tests/testthat/test-assert.R b/tests/testthat/test-assert.R index 3b059315..4c974f4b 100644 --- a/tests/testthat/test-assert.R +++ b/tests/testthat/test-assert.R @@ -56,4 +56,6 @@ test_that('assert_threshold', { expect_error(assert_threshold(units::as_units(1, 'degree'), 4326), 'do not match') expect_silent(assert_threshold(units::as_units(1, 'm'), 4326)) expect_silent(assert_threshold(units::as_units(1, 'm'), 32649)) + expect_silent(assert_threshold(NULL)) + expect_silent(assert_threshold(100, 4326)) }) diff --git a/tests/testthat/test-calc-distance.R b/tests/testthat/test-calc-distance.R index 77e7fb8c..f5b3dc30 100644 --- a/tests/testthat/test-calc-distance.R +++ b/tests/testthat/test-calc-distance.R @@ -10,6 +10,7 @@ crs_lonlat <- 4326 DT <- DT[ID %in% c('A', 'B', 'C')] get_geometry(DT, coords = coords, crs = crs) +DT$proj_geometry <- sf::st_transform(DT$geometry, crs) DT[, dest_geometry := sf::st_centroid(sf::st_union(geometry))] @@ -42,70 +43,124 @@ test_that('arguments provided correctly else error', { ) }) -test_that('units are returned', { - expect_s3_class(DT[, calc_distance(geometry)], 'units') +test_that('units are returned when use_dist is FALSE else numeric', { + expect_s3_class(DT[, calc_distance(geometry, use_dist = FALSE)], 'units') + expect_s3_class(DT[, calc_distance(geometry, st_shift_longitude(geometry), + use_dist = FALSE)], + 'units') expect_s3_class( - DT[, calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = crs_lonlat)], + DT[, calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = crs_lonlat, + use_dist = FALSE)], 'units' ) - expect_s3_class( - DT[, calc_distance(x_a = X, y_a = Y, crs = crs)], - 'units' + + expect_type( + DT[, calc_distance(x_a = X, y_a = Y, crs = crs, use_dist = TRUE)], + 'double' + ) + expect_type( + DT[, calc_distance(x_a = X, y_a = Y, x_b = X + 100, y_b = Y + 100, + crs = crs, use_dist = TRUE)], + 'double' ) }) test_that('expected dims returned', { - N <- 10 - expect_length(DT[seq.int(N), calc_distance(geometry)], N * N) - expect_length(DT[seq.int(N), calc_distance(geometry, dest_geometry)], N) + N <- 100 + expect_length(DT[seq.int(N), calc_distance(geometry, use_dist = FALSE)], + N * N) + expect_length(DT[seq.int(N), calc_distance(geometry, dest_geometry, + use_dist = FALSE)], + N) expect_length( - DT[seq.int(N), calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = crs_lonlat)], + DT[seq.int(N), calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = crs_lonlat, + use_dist = FALSE)], N * N ) expect_length( DT[seq.int(N), calc_distance(x_a = lonlat_X, y_a = lonlat_Y, x_b = dest_X, y_b = dest_Y, - crs = crs_lonlat)], + crs = crs_lonlat, + use_dist = FALSE)], + N + ) + + expect_length(DT[seq.int(N), calc_distance(proj_geometry, use_dist = TRUE)], + N * N) + expect_length(DT[seq.int(N), calc_distance(proj_geometry, proj_geometry[1], + use_dist = TRUE)], + N) + + expect_length( + DT[seq.int(N), calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = crs_lonlat, + use_dist = TRUE)], + N * N + ) + expect_length( + DT[seq.int(N), calc_distance(x_a = lonlat_X, y_a = lonlat_Y, + x_b = dest_X, y_b = dest_Y, + crs = crs_lonlat, + use_dist = TRUE)], N ) }) test_that('expected range returned', { - expect_gte(DT[, min(calc_distance(geometry))], - units::set_units(0, 'm')) - expect_gte(DT[, min(calc_distance(x_a = X, y_a = Y, crs = crs))], + expect_gte(DT[, min(calc_distance(geometry, use_dist = FALSE))], units::set_units(0, 'm')) - expect_gte(DT[, min(calc_distance(geometry, geometry_b = dest_geometry))], + expect_gte(DT[, min(calc_distance(x_a = X, y_a = Y, crs = crs, + use_dist = TRUE))], + 0) + expect_gte(DT[, min(calc_distance(geometry, geometry_b = dest_geometry, + use_dist = FALSE))], units::set_units(0, 'm')) expect_gte(DT[, min(calc_distance(x_a = lonlat_X, y_a = lonlat_Y, x_b = dest_X, y_b = dest_Y, - crs = crs_lonlat))], - units::set_units(0, 'm')) + crs = crs_lonlat, + use_dist = TRUE))], + 0) }) test_that('NAs returned as expected', { + # NAs returned if use_dist = FALSE, in recent updates to sf + # NAs are not returned if use_dist = TRUE, unless X and Y are both NA + # This is handled in group_pts by subsetting where X and Y are not NA + + X_NA <- copy(DT)[seq.int(100)][sample(.N, 10), X := NA] + res <- X_NA[, calc_distance(x_a = X, y_a = Y, crs = crs, use_dist = TRUE)] + expect_length(res, nrow(X_NA) * nrow(X_NA)) + expect_true(any(is.na(X_NA$X))) + # expect_true(any(is.na(res))) + + Y_NA <- copy(DT)[seq.int(100)][sample(.N, 10), Y := NA] + res <- Y_NA[, calc_distance(x_a = X, y_a = Y, crs = crs, use_dist = TRUE)] + expect_length(res, nrow(Y_NA) * nrow(Y_NA)) + expect_true(any(is.na(Y_NA$Y))) + # expect_true(any(is.na(res))) + X_NA <- copy(DT)[seq.int(100)][sample(.N, 10), X := NA] - res <- X_NA[, calc_distance(x_a = X, y_a = Y, crs = crs)] + res <- X_NA[, calc_distance(x_a = X, y_a = Y, crs = crs, use_dist = FALSE)] expect_length(res, nrow(X_NA) * nrow(X_NA)) expect_true(any(is.na(X_NA$X))) # expect_true(any(is.na(res))) Y_NA <- copy(DT)[seq.int(100)][sample(.N, 10), Y := NA] - res <- Y_NA[, calc_distance(x_a = X, y_a = Y, crs = crs)] + res <- Y_NA[, calc_distance(x_a = X, y_a = Y, crs = crs, use_dist = FALSE)] expect_length(res, nrow(Y_NA) * nrow(Y_NA)) expect_true(any(is.na(Y_NA$Y))) # expect_true(any(is.na(res))) XY_NA <- copy(DT)[seq.int(100)][sample(.N, 10), (lonlat_coords) := NA] - res <- XY_NA[, calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = 4326)] + res <- XY_NA[, calc_distance(x_a = lonlat_X, y_a = lonlat_Y, crs = 4326, + use_dist = FALSE)] expect_length(res, nrow(XY_NA) * nrow(XY_NA)) expect_true(any(is.na(c(XY_NA$lonlat_X, XY_NA$lonlat_Y)))) - expect_true(any(is.na(res))) + # expect_true(any(is.na(res))) get_geometry(XY_NA, lonlat_coords, crs_lonlat) - res <- XY_NA[, calc_distance(geometry)] + res <- XY_NA[, calc_distance(geometry, use_dist = FALSE)] expect_length(res, nrow(XY_NA) * nrow(XY_NA)) expect_true(any(sf::st_is_empty(XY_NA$geometry))) - expect_true(any(is.na(res))) + # expect_true(any(is.na(res))) }) diff --git a/tests/testthat/test-distance-to-centroid.R b/tests/testthat/test-distance-to-centroid.R index 171f523c..bbd0fd68 100644 --- a/tests/testthat/test-distance-to-centroid.R +++ b/tests/testthat/test-distance-to-centroid.R @@ -26,7 +26,7 @@ test_that('DT is required', { test_that('arguments required, otherwise error detected', { expect_error(distance_to_centroid(DT, coords = NULL), - 'coords must be length 2') + 'geometry field') expect_error(distance_to_centroid(DT, coords = coords, group = NULL, return_rank = TRUE), 'group must be provided') @@ -55,7 +55,7 @@ test_that('coords are correctly provided or error detected', { 'coords must be of class numeric') copy_DT <- copy(DT)[, centroid_X := as.character(centroid_X)] expect_error(distance_to_centroid(copy_DT, coords = coords), - 'centroid_coords must be of class numeric') + 'coords_centroid must be of class numeric') }) test_that('distance_centroid column succesfully detected', { diff --git a/tests/testthat/test-distance-to-leader.R b/tests/testthat/test-distance-to-leader.R index f0aaf9cc..c782c3b8 100644 --- a/tests/testthat/test-distance-to-leader.R +++ b/tests/testthat/test-distance-to-leader.R @@ -33,7 +33,7 @@ test_that('DT is required', { test_that('arguments required, otherwise error detected', { expect_error(distance_to_leader(DT, coords = NULL, group = group), - 'coords must be length 2') + 'geometry field') expect_error(distance_to_leader(DT, coords = coords, group = NULL), 'group must be provided') }) @@ -124,7 +124,7 @@ expect_DT <- data.table( ID = c('A', 'B'), X = c(0, 10), Y = c(0, 0), - group_direction = rep(as_units(0, 'rad'), 2), + group_direction = rep(units::as_units(0, 'rad'), 2), group = c(1, 1) ) centroid_group(expect_DT, coords = coords) diff --git a/tests/testthat/test-edge-dist.R b/tests/testthat/test-edge-dist.R index 03281f1a..bd948c85 100644 --- a/tests/testthat/test-edge-dist.R +++ b/tests/testthat/test-edge-dist.R @@ -96,13 +96,16 @@ test_that('column names must exist in DT', { test_that('threshold correctly provided or error detected', { copyDT <- copy(DT) - expect_error(edge_dist(DT, threshold = -10, id = 'ID'), + expect_error(edge_dist(DT, threshold = -10, timegroup = 'timegroup', id = 'ID', + coords = c('X', 'Y')), 'threshold must be > 0') - expect_error(edge_dist(DT, threshold = 0, id = 'ID'), + expect_error(edge_dist(DT, threshold = 0, timegroup = 'timegroup', id = 'ID', + coords = c('X', 'Y')), 'threshold must be > 0') - expect_error(edge_dist(DT, threshold = '0', id = 'ID'), + expect_error(edge_dist(DT, threshold = '0', timegroup = 'timegroup', id = 'ID', + coords = c('X', 'Y')), 'threshold must be of class numeric') }) diff --git a/tests/testthat/test-edge-nn.R b/tests/testthat/test-edge-nn.R index 629c3ea9..805bd7c3 100644 --- a/tests/testthat/test-edge-nn.R +++ b/tests/testthat/test-edge-nn.R @@ -88,13 +88,16 @@ test_that('column names must exist in DT', { test_that('threshold correctly provided or error detected', { copyDT <- copy(DT) - expect_error(edge_nn(DT, threshold = -10, id = 'ID'), + expect_error(edge_nn(DT, threshold = -10, timegroup = 'timegroup', id = 'ID', + coords = c('X', 'Y')), 'threshold must be > 0') - expect_error(edge_nn(DT, threshold = 0, id = 'ID'), + expect_error(edge_nn(DT, threshold = 0, timegroup = 'timegroup', id = 'ID', + coords = c('X', 'Y')), 'threshold must be > 0') - expect_error(edge_nn(DT, threshold = '0', id = 'ID'), + expect_error(edge_nn(DT, threshold = '0', timegroup = 'timegroup', id = 'ID', + coords = c('X', 'Y')), 'threshold must be of class numeric') }) diff --git a/tests/testthat/test-group-pts.R b/tests/testthat/test-group-pts.R index 76391449..f4f55c39 100644 --- a/tests/testthat/test-group-pts.R +++ b/tests/testthat/test-group-pts.R @@ -51,7 +51,7 @@ test_that('column names must exist in DT', { threshold = 10, id = 'potato', coords = c('X', 'Y'), - timegroup = NULL + timegroup = 'timegroup' ), 'not present in input DT', fixed = FALSE @@ -64,7 +64,7 @@ test_that('column names must exist in DT', { threshold = 10, id = 'ID', coords = c('potatoX', 'potatoY'), - timegroup = NULL + timegroup = 'timegroup' ), 'not present in input DT', fixed = FALSE @@ -77,7 +77,7 @@ test_that('column names must exist in DT', { threshold = 10, id = 'ID', coords = c('X', 'Y'), - timegroup = NULL, + timegroup = 'timegroup', splitBy = 'potato' ), 'not present in input DT', @@ -102,13 +102,19 @@ test_that('column names must exist in DT', { test_that('threshold correctly provided or error detected', { copyDT <- copy(DT) - expect_error(group_pts(DT, threshold = -10, id = 'ID'), + expect_error(group_pts(DT, threshold = -10, id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup'), 'threshold must be > 0') - expect_error(group_pts(DT, threshold = 0, id = 'ID'), + expect_error(group_pts(DT, threshold = 0, id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup'), 'threshold must be > 0') - expect_error(group_pts(DT, threshold = '0', id = 'ID'), + expect_error(group_pts(DT, threshold = '0', id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup'), 'threshold must be of class numeric') }) @@ -131,7 +137,7 @@ test_that('coords are correctly provided or error detected', { threshold = 10, id = 'ID', coords = c('X', 'ID'), - timegroup = NULL + timegroup = 'timegroup' ), 'coords must be of class numeric' ) @@ -323,3 +329,54 @@ test_that('splitBy argument doesnt use splitBy column', { ) }) + + +test_that('group_pts returns NA for group when X/Y are NA', { + copyDT <- copy(DT) + + n <- 10 + copyDT[sample(.N, n), X := NA] + + expect_equal( + group_pts( + copyDT, + threshold = 10, + id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup' + )[is.na(group), .N], + n + ) + + copyDT <- copy(DT) + + n <- 10 + copyDT[sample(.N, n), Y := NA] + + expect_equal( + group_pts( + copyDT, + threshold = 10, + id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup' + )[is.na(group), .N], + n + ) + + copyDT <- copy(DT) + + n <- 10 + copyDT[sample(.N, n), c('X', 'Y') := NA] + + expect_equal( + group_pts( + copyDT, + threshold = 10, + id = 'ID', + coords = c('X', 'Y'), + timegroup = 'timegroup' + )[is.na(group), .N], + n + ) +})