From ac9f6c0b535c138043fc1e4aea8dbf66122c5013 Mon Sep 17 00:00:00 2001
From: DidierMurilloF <didier.murilloflorez@ndsu.edu>
Date: Sun, 6 Apr 2025 17:37:20 -0500
Subject: [PATCH 1/3] feat: fine-tunning prep optimization

---
 FielDHub.Rproj               |   1 +
 NAMESPACE                    |   1 +
 R/fct_partially_replicated.R |   6 +-
 R/mod_pREPS.R                |  13 ++
 R/utils_pREP.R               |  12 +-
 R/utils_swap_functions.R     | 242 +++++++++++++++++++++++++++++++----
 man/calculate_lambda.Rd      |  48 +++++++
 man/partially_replicated.Rd  |   2 +
 man/swap_pairs.Rd            |  46 ++++---
 9 files changed, 319 insertions(+), 52 deletions(-)
 create mode 100644 man/calculate_lambda.Rd

diff --git a/FielDHub.Rproj b/FielDHub.Rproj
index 21a4da0..c59b1b5 100644
--- a/FielDHub.Rproj
+++ b/FielDHub.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: cfbd1bcc-9ce9-485c-a905-031cd1d58d1f
 
 RestoreWorkspace: Default
 SaveWorkspace: Default
diff --git a/NAMESPACE b/NAMESPACE
index 4584522..dfec4b7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,6 +9,7 @@ export(CRD)
 export(RCBD)
 export(RCBD_augmented)
 export(alpha_lattice)
+export(calculate_lambda)
 export(diagonal_arrangement)
 export(do_optim)
 export(full_factorial)
diff --git a/R/fct_partially_replicated.R b/R/fct_partially_replicated.R
index 75a1ee8..2b43f97 100644
--- a/R/fct_partially_replicated.R
+++ b/R/fct_partially_replicated.R
@@ -121,7 +121,9 @@ partially_replicated <- function(
     seed = NULL, 
     exptName = NULL, 
     locationNames = NULL,
-    multiLocationData = FALSE,  
+    multiLocationData = FALSE, 
+    dist_method = "euclidean",
+    border_penalization = 0.5,
     data = NULL) {
     
     if (all(c("serpentine", "cartesian") != planter)) {
@@ -296,6 +298,8 @@ partially_replicated <- function(
             seed = seed, 
             optim = TRUE, 
             niter = 1000, 
+            dist_method = dist_method,
+            border_penalization = border_penalization,
             data = list_locs[[sites]]
         )
         rows_incidence[sites] <- prep$rows_incidence[length(prep$rows_incidence)]
diff --git a/R/mod_pREPS.R b/R/mod_pREPS.R
index 908c041..cacaf62 100644
--- a/R/mod_pREPS.R
+++ b/R/mod_pREPS.R
@@ -61,6 +61,17 @@ mod_pREPS_ui <- function(id){
 				label = "# of Rep Per Group:",
 				value = "2,1")
 			),
+		sliderInput(ns("border_penalization"), 
+		            label = "Border Penalization", 
+		            min = 0.00, 
+		            max = 1.00, 
+		            value = 0.3),
+		selectInput(
+      ns("optimization_distance_method"), 
+      label = "Optimization Distance Method:", 
+      choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
+      selected = "manhattan"
+      ),
 			fluidRow(
 				column(
 					width = 6,
@@ -483,6 +494,8 @@ mod_pREPS_server <- function(id){
             exptName =  expt_name,
             locationNames = site_names, 
             planter = movement_planter,
+            border_penalization = input$border_penalization,
+            dist_method = input$optimization_distance_method,
             data = gen.list 
           )
       })
diff --git a/R/utils_pREP.R b/R/utils_pREP.R
index a754eea..5ea45ed 100644
--- a/R/utils_pREP.R
+++ b/R/utils_pREP.R
@@ -42,7 +42,10 @@ pREP <- function(
     seed = NULL, 
     optim = TRUE, 
     niter = 10000, 
-    data = NULL) {
+    border_penalization = 0.1,
+    dist_method = "euclidean",
+    data = NULL
+    ) {
   
     niter <- 1000
     prep <- TRUE
@@ -173,12 +176,15 @@ pREP <- function(
     }
     # Make numeric each element in the matrix layout1
     field_layout <- apply(layout1, c(1,2), as.numeric)
+    
     ################### Optimization ##########################################
     # Perform an optimization by using the function swap_pairs()
     if (max(table(field_layout)) == 2) {
-        swap <- swap_pairs(X = field_layout, starting_dist = 3, stop_iter = 18)
+        swap <- swap_pairs(X = field_layout, starting_dist = 3, stop_iter = 5, 
+                           dist_method = dist_method, lambda = border_penalization)
     } else {
-        swap <- swap_pairs(X = field_layout, starting_dist = 2, stop_iter = 18)
+        swap <- swap_pairs(X = field_layout, starting_dist = 2, stop_iter = 5, 
+                           dist_method = dist_method, lambda = border_penalization)
     }
     optim_layout <- swap$optim_design
     dups <- table(as.vector(optim_layout))
diff --git a/R/utils_swap_functions.R b/R/utils_swap_functions.R
index 659543f..1df8d89 100644
--- a/R/utils_swap_functions.R
+++ b/R/utils_swap_functions.R
@@ -129,15 +129,167 @@ pairs_distance <- function(X) {
 #' 
 #' 
 #' @export
-swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
-    # check if the input X is a matrix
+# swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
+#     # check if the input X is a matrix
+#     if (!is.matrix(X)) {
+#         stop("Input must be a matrix")
+#     }
+#     # check if the input matrix X is numeric
+#     if (!is.numeric(X)) {
+#         stop("Matrix elements must be numeric")
+#     }
+#     swap_succeed <- FALSE
+#     input_X <- X
+#     input_freq <- table(input_X)
+#     minDist <- sqrt(sum(dim(X)^2))
+#     designs <- list()
+#     designs[[1]] <- X
+#     distances <- list()
+#     rows_incidence <- numeric()
+#     init_dist <- pairs_distance(X = X)
+#     genos <- unique(init_dist$geno)
+#     distances[[1]] <- init_dist
+#     w <- 2
+#     for (min_dist in seq(starting_dist, minDist, 1)) {
+#         n_iter <- 1
+#         while (n_iter <= stop_iter) {
+#             plotDist <- pairs_distance(X)
+#             rownames(plotDist) <- NULL  #reset for latert use
+#             LowID <- which(plotDist$DIST < min_dist)
+#             low_dist_gens <- unique(plotDist$geno[LowID])
+#             LS <- length(LowID)
+#             if (LS == 0) {
+#                 n_iter <- stop_iter + 1
+#                 break
+#             }
+#             for (genotype in low_dist_gens) {
+#                 indices <- which(X == genotype, arr.ind = TRUE)
+#                 # Get the row and column indices of the cells that contain other values
+#                 other_indices <- which(X != genotype, arr.ind = TRUE)
+#                 for (i in seq_len(nrow(indices))) {
+#                     # Calculate the Euclidean distances to the fixed point
+#                     dist <- apply(other_indices, 1, function(x) sum((x - indices[i, ])^2))
+#                     other_indices <- as.data.frame(other_indices)
+#                     # other_indices <- other_indices[order(other_indices[, 1]),]
+#                     valid_indices <- other_indices[sqrt(dist) >= min_dist, ]
+#                     if (nrow(valid_indices) == 0) break
+#                     valid_indices <- valid_indices[order(valid_indices[, 1]),]
+#                     # Pick a random cell to swap with
+#                     k <- sample(nrow(valid_indices), size = 1)
+#                     # Swap the two occurrences
+#                     X[indices[i,1], indices[i,2]] <- X[valid_indices[k,1], valid_indices[k,2]]
+#                     X[valid_indices[k,1], valid_indices[k,2]] <- genotype
+#                 }
+#             }
+#             n_iter <- n_iter + 1
+#         }
+#         if (min(pairs_distance(X)$DIST) < min_dist) {
+#             break
+#         } else {
+#             swap_succeed <- TRUE
+#             output_freq <- table(X)
+#             if (!all(input_freq == output_freq)) {
+#               stop("The swap function changed the frequency of some integers.")
+#             }
+#             frequency_rows <- as.data.frame(search_matrix_values(X = X, values_search = genos))
+#             df <- frequency_rows |> 
+#                 dplyr::filter(Times >= 2)
+#             rows_incidence[w - 1] <- nrow(df)
+#             designs[[w]] <- X
+#             distances[[w]] <- pairs_distance(X)
+#             w <- w + 1
+#         }
+#     }
+#     optim_design = designs[[length(designs)]] # return the last (better) design
+#     pairwise_distance <- pairs_distance(optim_design)
+#     min_distance = min(pairwise_distance$DIST)
+#     if (!swap_succeed) {
+#         optim_design = designs[[1]] # return the last (better) design
+#         distances[[1]] <- pairs_distance(optim_design)
+#         pairwise_distance <- pairs_distance(optim_design)
+#         min_distance = min(pairwise_distance$DIST)
+#         frequency_rows <- as.data.frame(search_matrix_values(X = optim_design, values_search = genos))
+#         df <- frequency_rows |> 
+#             dplyr::filter(Times >= 2)
+#         rows_incidence[1] <- nrow(df)
+#     }
+#     return(
+#         list(
+#             rows_incidence = rows_incidence,
+#             optim_design = optim_design, 
+#             designs = designs, 
+#             distances = distances,
+#             min_distance = min_distance,
+#             pairwise_distance = pairwise_distance
+#         )
+#     )
+# }
+
+# Function to calculate dynamic lambda based on matrix size
+calculate_lambda <- function(X, lambda_base = 0.1) {
+    # Get matrix dimensions
+    n <- nrow(X)
+    m <- ncol(X)
+    
+    # Calculate the maximum Euclidean distance from the center of the matrix
+    # Center coordinates
+    center <- c(n / 2, m / 2)
+    # Maximum distance is from the center to a corner, e.g. (1,1)
+    max_dist <- sqrt((1 - center[1])^2 + (1 - center[2])^2)
+    
+    # Alternatively, you could compute the maximum as the distance to the furthest corner:
+    # corners <- matrix(c(1, 1, n, 1, 1, m, n, m), ncol = 2, byrow = TRUE)
+    # max_dist <- max(sqrt(rowSums((t(t(corners) - center))^2)))
+    
+    # Dynamically compute lambda as a function of matrix size.
+    lambda <- lambda_base / max_dist
+    return(lambda)
+}
+
+
+#' @title Swap pairs in a matrix of integers
+#' 
+#' @description Modifies the input matrix \code{X} to ensure that the distance between any two occurrences
+#' of the same integer is at least a distance \code{d}, by swapping one of the occurrences with a
+#' candidate cell of a different integer. The function starts with \code{starting_dist = 3} and increases it
+#' by \code{1} until the algorithm no longer converges or \code{stop_iter} iterations have been performed.
+#' This version evaluates candidate swaps using both the mean pairwise distance and a centrality penalty,
+#' and it uses candidate sampling to reduce computation.
+#' 
+#' @param X A matrix of integers.
+#' @param starting_dist The minimum starting distance to enforce between pairs of occurrences of the same integer. Default is 3.
+#' @param stop_iter The maximum number of iterations to perform. Default is 50.
+#' @param lambda A tuning parameter for the centrality penalty. Default is 0.1.
+#' @param dist_method The method used for distance calculation. Options are "euclidean" (default) and "manhattan".
+#' @param candidate_sample_size Maximum number of candidate cells to evaluate per swap. Default is 5.
+#' 
+#' @return A list containing:
+#' \item{optim_design}{The modified matrix.}
+#' \item{designs}{A list of all intermediate designs, starting from the input matrix.}
+#' \item{distances}{A list of all pair distances for each intermediate design.}
+#' \item{min_distance}{The minimum distance between pairs of occurrences of the same integer in the final design.}
+#' \item{pairwise_distance}{A data frame with the pairwise distances for the final design.}
+#' \item{rows_incidence}{A vector recording the number of rows with repeated integers for each iteration.}
+#' 
+#' @examples
+#' set.seed(123)
+#' X <- matrix(sample(c(rep(1:10, 2), 11:50), replace = FALSE), ncol = 10)
+#' B <- swap_pairs(X, starting_dist = 3, stop_iter = 50, lambda = 0.1, dist_method = "manhattan", candidate_sample_size = 5)
+#' B$optim_design
+#' 
+#' @export
+swap_pairs <- function(X, starting_dist = 3, stop_iter = 10, lambda = 0.5,
+                       dist_method = "euclidean", candidate_sample_size = 4) {
+    # Check if the input X is a matrix and numeric.
     if (!is.matrix(X)) {
         stop("Input must be a matrix")
     }
-    # check if the input matrix X is numeric
     if (!is.numeric(X)) {
         stop("Matrix elements must be numeric")
     }
+
+    # lambda <- calculate_lambda(X, lambda_base = lambda)
+
     swap_succeed <- FALSE
     input_X <- X
     input_freq <- table(input_X)
@@ -146,15 +298,18 @@ swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
     designs[[1]] <- X
     distances <- list()
     rows_incidence <- numeric()
+
     init_dist <- pairs_distance(X = X)
     genos <- unique(init_dist$geno)
     distances[[1]] <- init_dist
     w <- 2
+
+    # Main loop: Increase the minimum distance from starting_dist to maximum possible.
     for (min_dist in seq(starting_dist, minDist, 1)) {
         n_iter <- 1
         while (n_iter <= stop_iter) {
             plotDist <- pairs_distance(X)
-            rownames(plotDist) <- NULL  #reset for latert use
+            rownames(plotDist) <- NULL  # Reset rownames.
             LowID <- which(plotDist$DIST < min_dist)
             low_dist_gens <- unique(plotDist$geno[LowID])
             LS <- length(LowID)
@@ -162,62 +317,94 @@ swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
                 n_iter <- stop_iter + 1
                 break
             }
+
+            # Compute the center of the matrix for the centrality penalty.
+            center <- c(nrow(X) / 2, ncol(X) / 2)
+
             for (genotype in low_dist_gens) {
                 indices <- which(X == genotype, arr.ind = TRUE)
-                # Get the row and column indices of the cells that contain other values
                 other_indices <- which(X != genotype, arr.ind = TRUE)
+
                 for (i in seq_len(nrow(indices))) {
-                    # Calculate the Euclidean distances to the fixed point
-                    dist <- apply(other_indices, 1, function(x) sum((x - indices[i, ])^2))
-                    other_indices <- as.data.frame(other_indices)
-                    # other_indices <- other_indices[order(other_indices[, 1]),]
-                    valid_indices <- other_indices[sqrt(dist) >= min_dist, ]
+                    # Calculate distances from the current occurrence to all other indices.
+                    if (dist_method == "euclidean") {
+                        d <- sqrt(apply(other_indices, 1, function(x) sum((x - indices[i, ])^2)))
+                    } else if (dist_method == "manhattan") {
+                        d <- apply(other_indices, 1, function(x) sum(abs(x - indices[i, ])))
+                    } else {
+                        stop("Invalid distance method specified. Use 'euclidean' or 'manhattan'.")
+                    }
+
+                    other_indices_df <- as.data.frame(other_indices)
+                    valid_indices <- other_indices_df[d >= min_dist, ]
                     if (nrow(valid_indices) == 0) break
-                    valid_indices <- valid_indices[order(valid_indices[, 1]),]
-                    # Pick a random cell to swap with
-                    k <- sample(nrow(valid_indices), size = 1)
-                    # Swap the two occurrences
-                    X[indices[i,1], indices[i,2]] <- X[valid_indices[k,1], valid_indices[k,2]]
-                    X[valid_indices[k,1], valid_indices[k,2]] <- genotype
+
+                    if (nrow(valid_indices) > candidate_sample_size) {
+                        sample_idx <- sample(seq_len(nrow(valid_indices)), candidate_sample_size)
+                        candidate_set <- valid_indices[sample_idx, , drop = FALSE]
+                    } else {
+                        candidate_set <- valid_indices
+                    }
+
+                    candidate_scores <- numeric(nrow(candidate_set))
+                    for (j in seq_len(nrow(candidate_set))) {
+                        X_temp <- X
+                        X_temp[indices[i, 1], indices[i, 2]] <- X_temp[candidate_set[j, 1], candidate_set[j, 2]]
+                        X_temp[candidate_set[j, 1], candidate_set[j, 2]] <- genotype
+
+                        candidate_mean <- mean(pairs_distance(X_temp)$DIST)
+
+                        candidate_center_dist <- sqrt(sum((c(candidate_set[j, 1], candidate_set[j, 2]) - center)^2))
+
+                        candidate_scores[j] <- candidate_mean - lambda * candidate_center_dist
+                    }
+
+                    best_candidate_index <- which.max(candidate_scores)
+                    best_candidate <- as.numeric(unlist(candidate_set[best_candidate_index, , drop = TRUE]))
+
+                    X[indices[i, 1], indices[i, 2]] <- X[best_candidate[1], best_candidate[2]]
+                    X[best_candidate[1], best_candidate[2]] <- genotype
                 }
             }
             n_iter <- n_iter + 1
         }
+
         if (min(pairs_distance(X)$DIST) < min_dist) {
             break
         } else {
             swap_succeed <- TRUE
             output_freq <- table(X)
             if (!all(input_freq == output_freq)) {
-              stop("The swap function changed the frequency of some integers.")
+                stop("The swap function changed the frequency of some integers.")
             }
             frequency_rows <- as.data.frame(search_matrix_values(X = X, values_search = genos))
-            df <- frequency_rows |> 
-                dplyr::filter(Times >= 2)
+            df <- frequency_rows |> dplyr::filter(Times >= 2)
             rows_incidence[w - 1] <- nrow(df)
             designs[[w]] <- X
             distances[[w]] <- pairs_distance(X)
             w <- w + 1
         }
     }
-    optim_design = designs[[length(designs)]] # return the last (better) design
+
+    optim_design <- designs[[length(designs)]]
     pairwise_distance <- pairs_distance(optim_design)
-    min_distance = min(pairwise_distance$DIST)
+    min_distance <- min(pairwise_distance$DIST)
+
     if (!swap_succeed) {
-        optim_design = designs[[1]] # return the last (better) design
+        optim_design <- designs[[1]]
         distances[[1]] <- pairs_distance(optim_design)
         pairwise_distance <- pairs_distance(optim_design)
-        min_distance = min(pairwise_distance$DIST)
+        min_distance <- min(pairwise_distance$DIST)
         frequency_rows <- as.data.frame(search_matrix_values(X = optim_design, values_search = genos))
-        df <- frequency_rows |> 
-            dplyr::filter(Times >= 2)
+        df <- frequency_rows |> dplyr::filter(Times >= 2)
         rows_incidence[1] <- nrow(df)
     }
+
     return(
         list(
             rows_incidence = rows_incidence,
-            optim_design = optim_design, 
-            designs = designs, 
+            optim_design = optim_design,
+            designs = designs,
             distances = distances,
             min_distance = min_distance,
             pairwise_distance = pairwise_distance
@@ -225,6 +412,7 @@ swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
     )
 }
 
+
 #' @title Search Matrix Values
 #'
 #' @description Search for values in a matrix and return the row number, value, and frequency.
diff --git a/man/calculate_lambda.Rd b/man/calculate_lambda.Rd
new file mode 100644
index 0000000..5a15463
--- /dev/null
+++ b/man/calculate_lambda.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/utils_swap_functions.R
+\name{calculate_lambda}
+\alias{calculate_lambda}
+\title{Swap pairs in a matrix of integers}
+\usage{
+calculate_lambda(X, lambda_base = 0.1)
+}
+\arguments{
+\item{X}{A matrix of integers.}
+
+\item{starting_dist}{The minimum starting distance to enforce between pairs of occurrences
+of the same integer. Default is 3.}
+
+\item{stop_iter}{The maximum number of iterations to perform. Default is 100.}
+}
+\value{
+A list containing the following elements:
+\item{optim_design}{The modified matrix.}
+\item{designs}{A list of all intermediate designs, starting from the input matrix.}
+\item{distances}{A list of all pair distances for each intermediate design.}
+\item{min_distance}{An integer indicating the minimum distance between pairs of occurrences of the same integer.}
+\item{pairwise_distance}{A data frame with the pairwise distances for the final design.}
+}
+\description{
+Modifies the input matrix \code{X} to ensure that the distance between any two occurrences
+of the same integer is at least a dist \code{d}, by swapping one of the occurrences with a
+random occurrence of a different integer that is at least \code{d} away. The function
+starts with \code{starting_dist = 3} and increases it by \code{1} until the algorithm no longer 
+converges or \code{stop_iter} iterations have been performed.
+}
+\examples{
+# Create a matrix X with the numbers 1 to 10 are twice and 11 to 50 are once.
+# The matrix has 6 rows and 10 columns
+set.seed(123)
+X <- matrix(sample(c(rep(1:10, 2), 11:50), replace = FALSE), ncol = 10)
+X
+# Swap pairs
+B <- swap_pairs(X, starting_dist = 3)
+B$optim_design
+B$designs
+B$distances
+
+
+}
+\author{
+Jean-Marc Montpetit [aut], Didier Murillo [aut]
+}
diff --git a/man/partially_replicated.Rd b/man/partially_replicated.Rd
index ddd68d1..e69861f 100644
--- a/man/partially_replicated.Rd
+++ b/man/partially_replicated.Rd
@@ -16,6 +16,8 @@ partially_replicated(
   exptName = NULL,
   locationNames = NULL,
   multiLocationData = FALSE,
+  dist_method = "euclidean",
+  border_penalization = 0.1,
   data = NULL
 )
 }
diff --git a/man/swap_pairs.Rd b/man/swap_pairs.Rd
index c51f2a1..795c3f1 100644
--- a/man/swap_pairs.Rd
+++ b/man/swap_pairs.Rd
@@ -4,45 +4,49 @@
 \alias{swap_pairs}
 \title{Swap pairs in a matrix of integers}
 \usage{
-swap_pairs(X, starting_dist = 3, stop_iter = 50)
+swap_pairs(
+  X,
+  starting_dist = 3,
+  stop_iter = 10,
+  lambda = 0.5,
+  dist_method = "euclidean",
+  candidate_sample_size = 4
+)
 }
 \arguments{
 \item{X}{A matrix of integers.}
 
-\item{starting_dist}{The minimum starting distance to enforce between pairs of occurrences
-of the same integer. Default is 3.}
+\item{starting_dist}{The minimum starting distance to enforce between pairs of occurrences of the same integer. Default is 3.}
 
-\item{stop_iter}{The maximum number of iterations to perform. Default is 100.}
+\item{stop_iter}{The maximum number of iterations to perform. Default is 50.}
+
+\item{lambda}{A tuning parameter for the centrality penalty. Default is 0.1.}
+
+\item{dist_method}{The method used for distance calculation. Options are "euclidean" (default) and "manhattan".}
+
+\item{candidate_sample_size}{Maximum number of candidate cells to evaluate per swap. Default is 5.}
 }
 \value{
-A list containing the following elements:
+A list containing:
 \item{optim_design}{The modified matrix.}
 \item{designs}{A list of all intermediate designs, starting from the input matrix.}
 \item{distances}{A list of all pair distances for each intermediate design.}
-\item{min_distance}{An integer indicating the minimum distance between pairs of occurrences of the same integer.}
+\item{min_distance}{The minimum distance between pairs of occurrences of the same integer in the final design.}
 \item{pairwise_distance}{A data frame with the pairwise distances for the final design.}
+\item{rows_incidence}{A vector recording the number of rows with repeated integers for each iteration.}
 }
 \description{
 Modifies the input matrix \code{X} to ensure that the distance between any two occurrences
-of the same integer is at least a dist \code{d}, by swapping one of the occurrences with a
-random occurrence of a different integer that is at least \code{d} away. The function
-starts with \code{starting_dist = 3} and increases it by \code{1} until the algorithm no longer 
-converges or \code{stop_iter} iterations have been performed.
+of the same integer is at least a distance \code{d}, by swapping one of the occurrences with a
+candidate cell of a different integer. The function starts with \code{starting_dist = 3} and increases it
+by \code{1} until the algorithm no longer converges or \code{stop_iter} iterations have been performed.
+This version evaluates candidate swaps using both the mean pairwise distance and a centrality penalty,
+and it uses candidate sampling to reduce computation.
 }
 \examples{
-# Create a matrix X with the numbers 1 to 10 are twice and 11 to 50 are once.
-# The matrix has 6 rows and 10 columns
 set.seed(123)
 X <- matrix(sample(c(rep(1:10, 2), 11:50), replace = FALSE), ncol = 10)
-X
-# Swap pairs
-B <- swap_pairs(X, starting_dist = 3)
+B <- swap_pairs(X, starting_dist = 3, stop_iter = 50, lambda = 0.1, dist_method = "manhattan", candidate_sample_size = 5)
 B$optim_design
-B$designs
-B$distances
 
-
-}
-\author{
-Jean-Marc Montpetit [aut], Didier Murillo [aut]
 }

From 32cb2a24b49bd19ac91ce29602f34891e33851ff Mon Sep 17 00:00:00 2001
From: DidierMurilloF <didier.murilloflorez@ndsu.edu>
Date: Sun, 6 Apr 2025 20:55:52 -0500
Subject: [PATCH 2/3] feat: add ui components for new parameters

---
 NAMESPACE                   |   1 -
 R/mod_multi_loc_prep.R      |  14 ++++
 R/utils_swap_functions.R    | 154 ------------------------------------
 man/calculate_lambda.Rd     |  48 -----------
 man/partially_replicated.Rd |   2 +-
 5 files changed, 15 insertions(+), 204 deletions(-)
 delete mode 100644 man/calculate_lambda.Rd

diff --git a/NAMESPACE b/NAMESPACE
index dfec4b7..4584522 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,7 +9,6 @@ export(CRD)
 export(RCBD)
 export(RCBD_augmented)
 export(alpha_lattice)
-export(calculate_lambda)
 export(diagonal_arrangement)
 export(do_optim)
 export(full_factorial)
diff --git a/R/mod_multi_loc_prep.R b/R/mod_multi_loc_prep.R
index d84e833..165f7ee 100644
--- a/R/mod_multi_loc_prep.R
+++ b/R/mod_multi_loc_prep.R
@@ -87,6 +87,18 @@ mod_multi_loc_preps_ui <- function(id){
                     )
                 )
             ),
+            
+            sliderInput(ns("border_penalization_prep"), 
+                        label = "Border Penalization", 
+                        min = 0.00, 
+                        max = 1.00, 
+                        value = 0.3),
+            selectInput(
+              ns("optimization_distance_method_prep"), 
+              label = "Optimization Distance Method:", 
+              choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
+              selected = "manhattan"
+            ),
             fluidRow(
                 column(
                     width = 6, 
@@ -884,6 +896,8 @@ mod_multi_loc_preps_server <- function(id){
                 planter = movement_planter,
                 seed = preps_seed, 
                 multiLocationData = TRUE,
+                dist_method = input$optimization_distance_method_prep,
+                border_penalization = input$border_penalization_prep,
                 data = entry_list
             )
         })
diff --git a/R/utils_swap_functions.R b/R/utils_swap_functions.R
index 1df8d89..0233e5e 100644
--- a/R/utils_swap_functions.R
+++ b/R/utils_swap_functions.R
@@ -92,160 +92,6 @@ pairs_distance <- function(X) {
     return(plotDist)
 }
 
-#' @title Swap pairs in a matrix of integers
-#' 
-#' @description Modifies the input matrix \code{X} to ensure that the distance between any two occurrences
-#' of the same integer is at least a dist \code{d}, by swapping one of the occurrences with a
-#' random occurrence of a different integer that is at least \code{d} away. The function
-#' starts with \code{starting_dist = 3} and increases it by \code{1} until the algorithm no longer 
-#' converges or \code{stop_iter} iterations have been performed. 
-#' 
-#' 
-#' @param X A matrix of integers.
-#' @param starting_dist The minimum starting distance to enforce between pairs of occurrences
-#'  of the same integer. Default is 3.
-#' @param stop_iter The maximum number of iterations to perform. Default is 100.
-#' 
-#' @return A list containing the following elements:
-#' \item{optim_design}{The modified matrix.}
-#' \item{designs}{A list of all intermediate designs, starting from the input matrix.}
-#' \item{distances}{A list of all pair distances for each intermediate design.}
-#' \item{min_distance}{An integer indicating the minimum distance between pairs of occurrences of the same integer.}
-#' \item{pairwise_distance}{A data frame with the pairwise distances for the final design.}
-#' 
-#' @author Jean-Marc Montpetit [aut], Didier Murillo [aut]
-#' 
-#' @examples
-#' # Create a matrix X with the numbers 1 to 10 are twice and 11 to 50 are once.
-#' # The matrix has 6 rows and 10 columns
-#' set.seed(123)
-#' X <- matrix(sample(c(rep(1:10, 2), 11:50), replace = FALSE), ncol = 10)
-#' X
-#' # Swap pairs
-#' B <- swap_pairs(X, starting_dist = 3)
-#' B$optim_design
-#' B$designs
-#' B$distances
-#' 
-#' 
-#' @export
-# swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
-#     # check if the input X is a matrix
-#     if (!is.matrix(X)) {
-#         stop("Input must be a matrix")
-#     }
-#     # check if the input matrix X is numeric
-#     if (!is.numeric(X)) {
-#         stop("Matrix elements must be numeric")
-#     }
-#     swap_succeed <- FALSE
-#     input_X <- X
-#     input_freq <- table(input_X)
-#     minDist <- sqrt(sum(dim(X)^2))
-#     designs <- list()
-#     designs[[1]] <- X
-#     distances <- list()
-#     rows_incidence <- numeric()
-#     init_dist <- pairs_distance(X = X)
-#     genos <- unique(init_dist$geno)
-#     distances[[1]] <- init_dist
-#     w <- 2
-#     for (min_dist in seq(starting_dist, minDist, 1)) {
-#         n_iter <- 1
-#         while (n_iter <= stop_iter) {
-#             plotDist <- pairs_distance(X)
-#             rownames(plotDist) <- NULL  #reset for latert use
-#             LowID <- which(plotDist$DIST < min_dist)
-#             low_dist_gens <- unique(plotDist$geno[LowID])
-#             LS <- length(LowID)
-#             if (LS == 0) {
-#                 n_iter <- stop_iter + 1
-#                 break
-#             }
-#             for (genotype in low_dist_gens) {
-#                 indices <- which(X == genotype, arr.ind = TRUE)
-#                 # Get the row and column indices of the cells that contain other values
-#                 other_indices <- which(X != genotype, arr.ind = TRUE)
-#                 for (i in seq_len(nrow(indices))) {
-#                     # Calculate the Euclidean distances to the fixed point
-#                     dist <- apply(other_indices, 1, function(x) sum((x - indices[i, ])^2))
-#                     other_indices <- as.data.frame(other_indices)
-#                     # other_indices <- other_indices[order(other_indices[, 1]),]
-#                     valid_indices <- other_indices[sqrt(dist) >= min_dist, ]
-#                     if (nrow(valid_indices) == 0) break
-#                     valid_indices <- valid_indices[order(valid_indices[, 1]),]
-#                     # Pick a random cell to swap with
-#                     k <- sample(nrow(valid_indices), size = 1)
-#                     # Swap the two occurrences
-#                     X[indices[i,1], indices[i,2]] <- X[valid_indices[k,1], valid_indices[k,2]]
-#                     X[valid_indices[k,1], valid_indices[k,2]] <- genotype
-#                 }
-#             }
-#             n_iter <- n_iter + 1
-#         }
-#         if (min(pairs_distance(X)$DIST) < min_dist) {
-#             break
-#         } else {
-#             swap_succeed <- TRUE
-#             output_freq <- table(X)
-#             if (!all(input_freq == output_freq)) {
-#               stop("The swap function changed the frequency of some integers.")
-#             }
-#             frequency_rows <- as.data.frame(search_matrix_values(X = X, values_search = genos))
-#             df <- frequency_rows |> 
-#                 dplyr::filter(Times >= 2)
-#             rows_incidence[w - 1] <- nrow(df)
-#             designs[[w]] <- X
-#             distances[[w]] <- pairs_distance(X)
-#             w <- w + 1
-#         }
-#     }
-#     optim_design = designs[[length(designs)]] # return the last (better) design
-#     pairwise_distance <- pairs_distance(optim_design)
-#     min_distance = min(pairwise_distance$DIST)
-#     if (!swap_succeed) {
-#         optim_design = designs[[1]] # return the last (better) design
-#         distances[[1]] <- pairs_distance(optim_design)
-#         pairwise_distance <- pairs_distance(optim_design)
-#         min_distance = min(pairwise_distance$DIST)
-#         frequency_rows <- as.data.frame(search_matrix_values(X = optim_design, values_search = genos))
-#         df <- frequency_rows |> 
-#             dplyr::filter(Times >= 2)
-#         rows_incidence[1] <- nrow(df)
-#     }
-#     return(
-#         list(
-#             rows_incidence = rows_incidence,
-#             optim_design = optim_design, 
-#             designs = designs, 
-#             distances = distances,
-#             min_distance = min_distance,
-#             pairwise_distance = pairwise_distance
-#         )
-#     )
-# }
-
-# Function to calculate dynamic lambda based on matrix size
-calculate_lambda <- function(X, lambda_base = 0.1) {
-    # Get matrix dimensions
-    n <- nrow(X)
-    m <- ncol(X)
-    
-    # Calculate the maximum Euclidean distance from the center of the matrix
-    # Center coordinates
-    center <- c(n / 2, m / 2)
-    # Maximum distance is from the center to a corner, e.g. (1,1)
-    max_dist <- sqrt((1 - center[1])^2 + (1 - center[2])^2)
-    
-    # Alternatively, you could compute the maximum as the distance to the furthest corner:
-    # corners <- matrix(c(1, 1, n, 1, 1, m, n, m), ncol = 2, byrow = TRUE)
-    # max_dist <- max(sqrt(rowSums((t(t(corners) - center))^2)))
-    
-    # Dynamically compute lambda as a function of matrix size.
-    lambda <- lambda_base / max_dist
-    return(lambda)
-}
-
 
 #' @title Swap pairs in a matrix of integers
 #' 
diff --git a/man/calculate_lambda.Rd b/man/calculate_lambda.Rd
deleted file mode 100644
index 5a15463..0000000
--- a/man/calculate_lambda.Rd
+++ /dev/null
@@ -1,48 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/utils_swap_functions.R
-\name{calculate_lambda}
-\alias{calculate_lambda}
-\title{Swap pairs in a matrix of integers}
-\usage{
-calculate_lambda(X, lambda_base = 0.1)
-}
-\arguments{
-\item{X}{A matrix of integers.}
-
-\item{starting_dist}{The minimum starting distance to enforce between pairs of occurrences
-of the same integer. Default is 3.}
-
-\item{stop_iter}{The maximum number of iterations to perform. Default is 100.}
-}
-\value{
-A list containing the following elements:
-\item{optim_design}{The modified matrix.}
-\item{designs}{A list of all intermediate designs, starting from the input matrix.}
-\item{distances}{A list of all pair distances for each intermediate design.}
-\item{min_distance}{An integer indicating the minimum distance between pairs of occurrences of the same integer.}
-\item{pairwise_distance}{A data frame with the pairwise distances for the final design.}
-}
-\description{
-Modifies the input matrix \code{X} to ensure that the distance between any two occurrences
-of the same integer is at least a dist \code{d}, by swapping one of the occurrences with a
-random occurrence of a different integer that is at least \code{d} away. The function
-starts with \code{starting_dist = 3} and increases it by \code{1} until the algorithm no longer 
-converges or \code{stop_iter} iterations have been performed.
-}
-\examples{
-# Create a matrix X with the numbers 1 to 10 are twice and 11 to 50 are once.
-# The matrix has 6 rows and 10 columns
-set.seed(123)
-X <- matrix(sample(c(rep(1:10, 2), 11:50), replace = FALSE), ncol = 10)
-X
-# Swap pairs
-B <- swap_pairs(X, starting_dist = 3)
-B$optim_design
-B$designs
-B$distances
-
-
-}
-\author{
-Jean-Marc Montpetit [aut], Didier Murillo [aut]
-}
diff --git a/man/partially_replicated.Rd b/man/partially_replicated.Rd
index e69861f..4d93308 100644
--- a/man/partially_replicated.Rd
+++ b/man/partially_replicated.Rd
@@ -17,7 +17,7 @@ partially_replicated(
   locationNames = NULL,
   multiLocationData = FALSE,
   dist_method = "euclidean",
-  border_penalization = 0.1,
+  border_penalization = 0.5,
   data = NULL
 )
 }

From d7c22996bfc37225f57ef7d979ef71ed4877a0d5 Mon Sep 17 00:00:00 2001
From: DidierMurilloF <didier.murilloflorez@ndsu.edu>
Date: Wed, 4 Jun 2025 09:45:43 -0500
Subject: [PATCH 3/3] feat: fine tuning prep optimization

---
 R/mod_multi_loc_prep.R | 26 +++++++++++++-------------
 R/mod_pREPS.R          | 26 +++++++++++++-------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/R/mod_multi_loc_prep.R b/R/mod_multi_loc_prep.R
index 165f7ee..b63264e 100644
--- a/R/mod_multi_loc_prep.R
+++ b/R/mod_multi_loc_prep.R
@@ -88,17 +88,17 @@ mod_multi_loc_preps_ui <- function(id){
                 )
             ),
             
-            sliderInput(ns("border_penalization_prep"), 
-                        label = "Border Penalization", 
-                        min = 0.00, 
-                        max = 1.00, 
-                        value = 0.3),
-            selectInput(
-              ns("optimization_distance_method_prep"), 
-              label = "Optimization Distance Method:", 
-              choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
-              selected = "manhattan"
-            ),
+            # sliderInput(ns("border_penalization_prep"), 
+            #             label = "Border Penalization", 
+            #             min = 0.00, 
+            #             max = 1.00, 
+            #             value = 0.3),
+            # selectInput(
+            #   ns("optimization_distance_method_prep"), 
+            #   label = "Optimization Distance Method:", 
+            #   choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
+            #   selected = "manhattan"
+            # ),
             fluidRow(
                 column(
                     width = 6, 
@@ -896,8 +896,8 @@ mod_multi_loc_preps_server <- function(id){
                 planter = movement_planter,
                 seed = preps_seed, 
                 multiLocationData = TRUE,
-                dist_method = input$optimization_distance_method_prep,
-                border_penalization = input$border_penalization_prep,
+                dist_method = "euclidean", #input$optimization_distance_method_prep,
+                border_penalization = 0.5, #input$border_penalization_prep,
                 data = entry_list
             )
         })
diff --git a/R/mod_pREPS.R b/R/mod_pREPS.R
index cacaf62..d4b4d3d 100644
--- a/R/mod_pREPS.R
+++ b/R/mod_pREPS.R
@@ -61,17 +61,17 @@ mod_pREPS_ui <- function(id){
 				label = "# of Rep Per Group:",
 				value = "2,1")
 			),
-		sliderInput(ns("border_penalization"), 
-		            label = "Border Penalization", 
-		            min = 0.00, 
-		            max = 1.00, 
-		            value = 0.3),
-		selectInput(
-      ns("optimization_distance_method"), 
-      label = "Optimization Distance Method:", 
-      choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
-      selected = "manhattan"
-      ),
+# 		sliderInput(ns("border_penalization"), 
+# 		            label = "Border Penalization", 
+# 		            min = 0.00, 
+# 		            max = 1.00, 
+# 		            value = 0.3),
+# 		selectInput(
+#       ns("optimization_distance_method"), 
+#       label = "Optimization Distance Method:", 
+#       choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
+#       selected = "manhattan"
+#       ),
 			fluidRow(
 				column(
 					width = 6,
@@ -494,8 +494,8 @@ mod_pREPS_server <- function(id){
             exptName =  expt_name,
             locationNames = site_names, 
             planter = movement_planter,
-            border_penalization = input$border_penalization,
-            dist_method = input$optimization_distance_method,
+            border_penalization = 0.5, #input$border_penalization,
+            dist_method = "euclidean", # input$optimization_distance_method,
             data = gen.list 
           )
       })