DidierMurilloF · DidierMurilloF · Jun 4, 2025 · Apr 6, 2025 · Apr 7, 2025 · Jun 4, 2025
diff --git a/FielDHub.Rproj b/FielDHub.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+
 ProjectId: ebb268b6-9f41-4fe0-aee9-2286cc906b17
 
 RestoreWorkspace: Default

diff --git a/R/fct_partially_replicated.R b/R/fct_partially_replicated.R
@@ -121,7 +121,9 @@ partially_replicated <- function(
     seed = NULL, 
     exptName = NULL, 
     locationNames = NULL,
-    multiLocationData = FALSE,  
+    multiLocationData = FALSE, 
+    dist_method = "euclidean",
+    border_penalization = 0.5,
     data = NULL) {
 
     if (all(c("serpentine", "cartesian") != planter)) {
@@ -296,6 +298,8 @@ partially_replicated <- function(
             seed = seed, 
             optim = TRUE, 
             niter = 1000, 
+            dist_method = dist_method,
+            border_penalization = border_penalization,
             data = list_locs[[sites]]
         )
         rows_incidence[sites] <- prep$rows_incidence[length(prep$rows_incidence)]

diff --git a/R/mod_multi_loc_prep.R b/R/mod_multi_loc_prep.R
@@ -87,6 +87,18 @@ mod_multi_loc_preps_ui <- function(id){
                     )
                 )
             ),
+
+            # sliderInput(ns("border_penalization_prep"), 
+            #             label = "Border Penalization", 
+            #             min = 0.00, 
+            #             max = 1.00, 
+            #             value = 0.3),
+            # selectInput(
+            #   ns("optimization_distance_method_prep"), 
+            #   label = "Optimization Distance Method:", 
+            #   choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
+            #   selected = "manhattan"
+            # ),
             fluidRow(
                 column(
                     width = 6, 
@@ -884,6 +896,8 @@ mod_multi_loc_preps_server <- function(id){
                 planter = movement_planter,
                 seed = preps_seed, 
                 multiLocationData = TRUE,
+                dist_method = "euclidean", #input$optimization_distance_method_prep,
+                border_penalization = 0.5, #input$border_penalization_prep,
                 data = entry_list
             )
         })

diff --git a/R/mod_pREPS.R b/R/mod_pREPS.R
@@ -61,6 +61,17 @@ mod_pREPS_ui <- function(id){
 				label = "# of Rep Per Group:",
 				value = "2,1")
 			),
+# 		sliderInput(ns("border_penalization"), 
+# 		            label = "Border Penalization", 
+# 		            min = 0.00, 
+# 		            max = 1.00, 
+# 		            value = 0.3),
+# 		selectInput(
+#       ns("optimization_distance_method"), 
+#       label = "Optimization Distance Method:", 
+#       choices = c("Euclidean" = "euclidean", "Manhattan" = "manhattan"), 
+#       selected = "manhattan"
+#       ),
 			fluidRow(
 				column(
 					width = 6,
@@ -483,6 +494,8 @@ mod_pREPS_server <- function(id){
             exptName =  expt_name,
             locationNames = site_names, 
             planter = movement_planter,
+            border_penalization = 0.5, #input$border_penalization,
+            dist_method = "euclidean", # input$optimization_distance_method,
             data = gen.list 
           )
       })

diff --git a/R/utils_pREP.R b/R/utils_pREP.R
@@ -42,7 +42,10 @@ pREP <- function(
     seed = NULL, 
     optim = TRUE, 
     niter = 10000, 
-    data = NULL) {
+    border_penalization = 0.1,
+    dist_method = "euclidean",
+    data = NULL
+    ) {
 
     niter <- 1000
     prep <- TRUE
@@ -173,12 +176,15 @@ pREP <- function(
     }
     # Make numeric each element in the matrix layout1
     field_layout <- apply(layout1, c(1,2), as.numeric)
+
     ################### Optimization ##########################################
     # Perform an optimization by using the function swap_pairs()
     if (max(table(field_layout)) == 2) {
-        swap <- swap_pairs(X = field_layout, starting_dist = 3, stop_iter = 18)
+        swap <- swap_pairs(X = field_layout, starting_dist = 3, stop_iter = 5, 
+                           dist_method = dist_method, lambda = border_penalization)
     } else {
-        swap <- swap_pairs(X = field_layout, starting_dist = 2, stop_iter = 18)
+        swap <- swap_pairs(X = field_layout, starting_dist = 2, stop_iter = 5, 
+                           dist_method = dist_method, lambda = border_penalization)
     }
     optim_layout <- swap$optim_design
     dups <- table(as.vector(optim_layout))

diff --git a/R/utils_swap_functions.R b/R/utils_swap_functions.R
@@ -92,52 +92,50 @@ pairs_distance <- function(X) {
     return(plotDist)
 }
 
+
 #' @title Swap pairs in a matrix of integers
 #' 
 #' @description Modifies the input matrix \code{X} to ensure that the distance between any two occurrences
-#' of the same integer is at least a dist \code{d}, by swapping one of the occurrences with a
-#' random occurrence of a different integer that is at least \code{d} away. The function
-#' starts with \code{starting_dist = 3} and increases it by \code{1} until the algorithm no longer 
-#' converges or \code{stop_iter} iterations have been performed. 
-#' 
+#' of the same integer is at least a distance \code{d}, by swapping one of the occurrences with a
+#' candidate cell of a different integer. The function starts with \code{starting_dist = 3} and increases it
+#' by \code{1} until the algorithm no longer converges or \code{stop_iter} iterations have been performed.
+#' This version evaluates candidate swaps using both the mean pairwise distance and a centrality penalty,
+#' and it uses candidate sampling to reduce computation.
 #' 
 #' @param X A matrix of integers.
-#' @param starting_dist The minimum starting distance to enforce between pairs of occurrences
-#'  of the same integer. Default is 3.
-#' @param stop_iter The maximum number of iterations to perform. Default is 100.
+#' @param starting_dist The minimum starting distance to enforce between pairs of occurrences of the same integer. Default is 3.
+#' @param stop_iter The maximum number of iterations to perform. Default is 50.
+#' @param lambda A tuning parameter for the centrality penalty. Default is 0.1.
+#' @param dist_method The method used for distance calculation. Options are "euclidean" (default) and "manhattan".
+#' @param candidate_sample_size Maximum number of candidate cells to evaluate per swap. Default is 5.
 #' 
-#' @return A list containing the following elements:
+#' @return A list containing:
 #' \item{optim_design}{The modified matrix.}
 #' \item{designs}{A list of all intermediate designs, starting from the input matrix.}
 #' \item{distances}{A list of all pair distances for each intermediate design.}
-#' \item{min_distance}{An integer indicating the minimum distance between pairs of occurrences of the same integer.}
+#' \item{min_distance}{The minimum distance between pairs of occurrences of the same integer in the final design.}
 #' \item{pairwise_distance}{A data frame with the pairwise distances for the final design.}
-#' 
-#' @author Jean-Marc Montpetit [aut], Didier Murillo [aut]
+#' \item{rows_incidence}{A vector recording the number of rows with repeated integers for each iteration.}
 #' 
 #' @examples
-#' # Create a matrix X with the numbers 1 to 10 are twice and 11 to 50 are once.
-#' # The matrix has 6 rows and 10 columns
 #' set.seed(123)
 #' X <- matrix(sample(c(rep(1:10, 2), 11:50), replace = FALSE), ncol = 10)
-#' X
-#' # Swap pairs
-#' B <- swap_pairs(X, starting_dist = 3)
+#' B <- swap_pairs(X, starting_dist = 3, stop_iter = 50, lambda = 0.1, dist_method = "manhattan", candidate_sample_size = 5)
 #' B$optim_design
-#' B$designs
-#' B$distances
-#' 
 #' 
 #' @export
-swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
-    # check if the input X is a matrix
+swap_pairs <- function(X, starting_dist = 3, stop_iter = 10, lambda = 0.5,
+                       dist_method = "euclidean", candidate_sample_size = 4) {
+    # Check if the input X is a matrix and numeric.
     if (!is.matrix(X)) {
         stop("Input must be a matrix")
     }
-    # check if the input matrix X is numeric
     if (!is.numeric(X)) {
         stop("Matrix elements must be numeric")
     }
+
+    # lambda <- calculate_lambda(X, lambda_base = lambda)
+
     swap_succeed <- FALSE
     input_X <- X
     input_freq <- table(input_X)
@@ -146,85 +144,121 @@ swap_pairs <- function(X, starting_dist = 3, stop_iter = 50) {
     designs[[1]] <- X
     distances <- list()
     rows_incidence <- numeric()
+
     init_dist <- pairs_distance(X = X)
     genos <- unique(init_dist$geno)
     distances[[1]] <- init_dist
     w <- 2
+
+    # Main loop: Increase the minimum distance from starting_dist to maximum possible.
     for (min_dist in seq(starting_dist, minDist, 1)) {
         n_iter <- 1
         while (n_iter <= stop_iter) {
             plotDist <- pairs_distance(X)
-            rownames(plotDist) <- NULL  #reset for latert use
+            rownames(plotDist) <- NULL  # Reset rownames.
             LowID <- which(plotDist$DIST < min_dist)
             low_dist_gens <- unique(plotDist$geno[LowID])
             LS <- length(LowID)
             if (LS == 0) {
                 n_iter <- stop_iter + 1
                 break
             }
+
+            # Compute the center of the matrix for the centrality penalty.
+            center <- c(nrow(X) / 2, ncol(X) / 2)
+
             for (genotype in low_dist_gens) {
                 indices <- which(X == genotype, arr.ind = TRUE)
-                # Get the row and column indices of the cells that contain other values
                 other_indices <- which(X != genotype, arr.ind = TRUE)
+
                 for (i in seq_len(nrow(indices))) {
-                    # Calculate the Euclidean distances to the fixed point
-                    dist <- apply(other_indices, 1, function(x) sum((x - indices[i, ])^2))
-                    other_indices <- as.data.frame(other_indices)
-                    # other_indices <- other_indices[order(other_indices[, 1]),]
-                    valid_indices <- other_indices[sqrt(dist) >= min_dist, ]
+                    # Calculate distances from the current occurrence to all other indices.
+                    if (dist_method == "euclidean") {
+                        d <- sqrt(apply(other_indices, 1, function(x) sum((x - indices[i, ])^2)))
+                    } else if (dist_method == "manhattan") {
+                        d <- apply(other_indices, 1, function(x) sum(abs(x - indices[i, ])))
+                    } else {
+                        stop("Invalid distance method specified. Use 'euclidean' or 'manhattan'.")
+                    }
+
+                    other_indices_df <- as.data.frame(other_indices)
+                    valid_indices <- other_indices_df[d >= min_dist, ]
                     if (nrow(valid_indices) == 0) break
-                    valid_indices <- valid_indices[order(valid_indices[, 1]),]
-                    # Pick a random cell to swap with
-                    k <- sample(nrow(valid_indices), size = 1)
-                    # Swap the two occurrences
-                    X[indices[i,1], indices[i,2]] <- X[valid_indices[k,1], valid_indices[k,2]]
-                    X[valid_indices[k,1], valid_indices[k,2]] <- genotype
+
+                    if (nrow(valid_indices) > candidate_sample_size) {
+                        sample_idx <- sample(seq_len(nrow(valid_indices)), candidate_sample_size)
+                        candidate_set <- valid_indices[sample_idx, , drop = FALSE]
+                    } else {
+                        candidate_set <- valid_indices
+                    }
+
+                    candidate_scores <- numeric(nrow(candidate_set))
+                    for (j in seq_len(nrow(candidate_set))) {
+                        X_temp <- X
+                        X_temp[indices[i, 1], indices[i, 2]] <- X_temp[candidate_set[j, 1], candidate_set[j, 2]]
+                        X_temp[candidate_set[j, 1], candidate_set[j, 2]] <- genotype
+
+                        candidate_mean <- mean(pairs_distance(X_temp)$DIST)
+
+                        candidate_center_dist <- sqrt(sum((c(candidate_set[j, 1], candidate_set[j, 2]) - center)^2))
+
+                        candidate_scores[j] <- candidate_mean - lambda * candidate_center_dist
+                    }
+
+                    best_candidate_index <- which.max(candidate_scores)
+                    best_candidate <- as.numeric(unlist(candidate_set[best_candidate_index, , drop = TRUE]))
+
+                    X[indices[i, 1], indices[i, 2]] <- X[best_candidate[1], best_candidate[2]]
+                    X[best_candidate[1], best_candidate[2]] <- genotype
                 }
             }
             n_iter <- n_iter + 1
         }
+
         if (min(pairs_distance(X)$DIST) < min_dist) {
             break
         } else {
             swap_succeed <- TRUE
             output_freq <- table(X)
             if (!all(input_freq == output_freq)) {
-              stop("The swap function changed the frequency of some integers.")
+                stop("The swap function changed the frequency of some integers.")
             }
             frequency_rows <- as.data.frame(search_matrix_values(X = X, values_search = genos))
-            df <- frequency_rows |> 
-                dplyr::filter(Times >= 2)
+            df <- frequency_rows |> dplyr::filter(Times >= 2)
             rows_incidence[w - 1] <- nrow(df)
             designs[[w]] <- X
             distances[[w]] <- pairs_distance(X)
             w <- w + 1
         }
     }
-    optim_design = designs[[length(designs)]] # return the last (better) design
+
+    optim_design <- designs[[length(designs)]]
     pairwise_distance <- pairs_distance(optim_design)
-    min_distance = min(pairwise_distance$DIST)
+    min_distance <- min(pairwise_distance$DIST)
+
     if (!swap_succeed) {
-        optim_design = designs[[1]] # return the last (better) design
+        optim_design <- designs[[1]]
         distances[[1]] <- pairs_distance(optim_design)
         pairwise_distance <- pairs_distance(optim_design)
-        min_distance = min(pairwise_distance$DIST)
+        min_distance <- min(pairwise_distance$DIST)
         frequency_rows <- as.data.frame(search_matrix_values(X = optim_design, values_search = genos))
-        df <- frequency_rows |> 
-            dplyr::filter(Times >= 2)
+        df <- frequency_rows |> dplyr::filter(Times >= 2)
         rows_incidence[1] <- nrow(df)
     }
+
     return(
         list(
             rows_incidence = rows_incidence,
-            optim_design = optim_design, 
-            designs = designs, 
+            optim_design = optim_design,
+            designs = designs,
             distances = distances,
             min_distance = min_distance,
             pairwise_distance = pairwise_distance
         )
     )
 }
 
+
 #' @title Search Matrix Values
 #'
 #' @description Search for values in a matrix and return the row number, value, and frequency.

diff --git a/man/partially_replicated.Rd b/man/partially_replicated.Rd