Skip to content

Commit

Permalink
updated zoomerjoin_euclidean_join function
Browse files Browse the repository at this point in the history
  • Loading branch information
beniaminogreen committed Jul 3, 2024
1 parent ed17cdf commit 87b4558
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
30 changes: 20 additions & 10 deletions LinkOrgs/R/zoomerjoin_euclidean_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,31 @@ zoomerjoin_euclidean_join <- function(
y = NULL, embedy = NULL,
MaxDist = NULL,
AveMatchNumberPerAlias = NULL,
n_bands = 40L,
band_width = 5L,
r = 5L,
...
) {

stopifnot(
"AveMatchNumberPerAlias not supported for zoomerjoin" =
!is.null(AveMatchNumberPerAlias)
)
if(!is.null(AveMatchNumberPerAlias)){
warning("Setting AveMatchNumberPerAlias may result in long runtimes when using zoomerjoin")

threshold <- GetCalibratedDistThres(x = embedy,
y = embedx,
AveMatchNumberPerAlias = AveMatchNumberPerAlias)

} else {
threshold <- MaxDist
}

# Find zoomerjoin hyperparameters that xyz in time
hyperparameters <- euclidean_hyper_search(3*threshold, threshold, .01, .995)

n_bands <- hyperparameters$n_bands
band_width <- hyperparameters$band_width
r <- hyperparameters$r

match_table <- zoomerjoin::rust_p_norm_join(
match_table <- zoomerjoin:::rust_p_norm_join(
a_mat = as.matrix(embedx),
b_mat = as.matrix(embedy),
radius = MaxDist,
radius = threshold,
band_width = band_width,
n_bands = n_bands,
r = r,
Expand All @@ -29,5 +39,5 @@ zoomerjoin_euclidean_join <- function(
colnames(x) <- paste(colnames(x), ".x", sep = "")

z <- cbind(x[match_table[,1],], y[match_table[,2],])
return(DeconflictNames(z))
return(z)
}
2 changes: 1 addition & 1 deletion LinkOrgs/R/zoomerjoin_hyperparameter_searches.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ euclidean_hyper_search <- function(d1, d2, p1, p2,
nvars = 3,
max = FALSE,
Domains = matrix(
c(1, 2000, 1, 500, 0, d1),
c(1, 5000, 1, 5000, 0, d1),
ncol = 2,
byrow = TRUE
),
Expand Down

0 comments on commit 87b4558

Please sign in to comment.