DidierMurilloF · DidierMurilloF · Apr 13, 2023 · Apr 13, 2023
diff --git a/R/fct_do_optim.R b/R/fct_do_optim.R
@@ -71,7 +71,7 @@ do_optim <- function(
         if (design == "sparse") {
             data_input <- stats::na.omit(data[,1:2])
             df_data_checks <- data_input[1:checks,]
-            df_data <- data_input[(checks + 1):nrow(data_input),]
+            df_data <- data_input[(checks + 1):nrow(data_input), ]
             colnames(df_data) <- c("ENTRY", "NAME")
             ENTRY <- as.vector(df_data$ENTRY)
             if (!is.numeric(ENTRY)) stop("ENTRY column should be integer numbers!")
@@ -253,6 +253,7 @@ do_optim <- function(
 #' @param ncols Number of columns in the field.
 #' @param sparse_list (optional) A class "Sparse" object generated by \code{do_optim()} function.
 #' @param seed (optional) Real number that specifies the starting seed to obtain reproducible designs.
+#' @param data (optional) Data frame with 2 columns: \code{ENTRY | NAME }. ENTRY must be numeric.
 #' 
 #' @author Didier Murillo [aut],
 #'         Salvador Gezan [aut],
@@ -293,7 +294,8 @@ sparse_allocation <- function(
     exptName = NULL, 
     locationNames,
     sparse_list, 
-    seed) {
+    seed,
+    data = NULL) {
     # set a random seed if it is missing
     if (missing(seed)) seed <- base::sample.int(10000, size = 1) 
     if (missing(l)) stop("Please, define the number of locations for this design.")
@@ -312,6 +314,7 @@ sparse_allocation <- function(
     if (missing(copies_per_entry)) {
         stop("You must specify the number of reps per plant")
     }
+    if (copies_per_entry >= l) stop("Please, enter copies_per_entry < l")
     if (is.null(checks)) stop("Please, define the number of checks for this design.")
     if (!missing(sparse_list)) {
         if (!inherits(sparse_list, "Sparse")) {
@@ -329,6 +332,16 @@ sparse_allocation <- function(
             seed = seed
         )
     }
+    # Merge user input data
+    if (!is.null(data)) {
+        unrep <- merge_user_data(
+            optim_out = unrep, 
+            data = data, 
+            lines = lines, 
+            add_checks = TRUE, 
+            checks = checks
+        )
+    } 
     # Define field dimensions (rows and columns)
     if (missing(nrows) || missing(ncols)) {
         lines_within_loc <- as.numeric(unrep$size_locations[1])
@@ -392,23 +405,39 @@ sparse_allocation <- function(
 #' @param plotNumber Numeric vector with the starting plot number for each location. By default \code{plotNumber = 101}.
 #' @param exptName (optional) Name of the experiment.
 #' @param locationNames (optional) Name for each location.
-#' @param nrows Number of rows field.
-#' @param ncols Number of columns field.
+#' @param nrows Numeric vector with the number of rows field at each location.
+#' @param ncols Numeric vector with the number of columns field at each location.
 #' @param seed (optional) Real number that specifies the starting seed to obtain reproducible designs.
 #' @param optim_list (optional) A list object of class "MultiPrep"generated by \code{do_optim()} function.
+#' @param data (optional) Data frame with 2 columns: \code{ENTRY | NAME }. ENTRY must be numeric.
 #' 
 #' 
 #' @author Didier Murillo [aut],
 #'         Salvador Gezan [aut],
+#'         Jean-Marc Montpetit [ctb],
 #'         Ana Heilman [ctb]
 #' 
-#' @return A list with four elements.
+#' @return A list of class \code{FielDHub} with several elements.
 #' \itemize{
-#'   \item \code{designs} is a list with each location p-rep randomization.
+#'   \item \code{infoDesign} is a list with information on the design parameters.
+#'   \item \code{layoutRandom} is a matrix with the randomization layout.
+#'   \item \code{plotNumber} is a matrix with the layout plot number.
+#'   \item \code{binaryField} is a matrix with the binary field.
+#'   \item \code{dataEntry} is a data frame with the data input.
+#'   \item \code{genEntries} is a list with the entries for replicated and non-replicated parts.
+#'   \item \code{fieldBook} is a data frame with field book design. This includes the index (Row, Column).
+#'   \item \code{min_pairswise_distance} is a data frame with the minimum pairwise distance between 
+#'              each pair of locations.
+#'   \item \code{reps_info} is a data frame with information on the number of replicated and 
+#'              non-replicated treatments at each location.
+#'   \item \code{pairsDistance} is a data frame with the pairwise distances between each pair of 
+#'              treatments.
+#'   \item \code{treatments_with_reps} is a list with the entries for the replicated part of the design.
+#'   \item \code{treatments_with_no_reps} is a list with the entries for the non-replicated part of the design.
 #'   \item \code{list_locs} is a list with each location list of entries.
 #'   \item \code{allocation} is a matrix with the allocation of treatments.
 #'   \item \code{size_locations} is a data frame with one column for each 
-#'  location and one row with the size of the location.
+#'              location and one row with the size of the location.
 #' }
 #'
 #' @references
@@ -455,7 +484,8 @@ multi_location_prep <- function(
     exptName, 
     locationNames,
     optim_list, 
-    seed) {
+    seed,
+    data = NULL) {
     # set a random seed if it is missing
     if (missing(seed)) seed <- base::sample.int(10000, size = 1)
     if (missing(l)) stop("Please, define the number of locations for this design.")
@@ -492,9 +522,21 @@ multi_location_prep <- function(
             add_checks = add_checks, 
             checks = checks, 
             rep_checks = rep_checks,
-            seed = seed
+            seed = seed,
+            data = data
         )
     }
+    # Merge user input data
+    if (!is.null(data)) {
+        preps <- merge_user_data(
+            optim_out = preps, 
+            data = data, 
+            lines = lines, 
+            add_checks = add_checks, 
+            checks = checks, 
+            rep_checks = rep_checks
+        )
+    } 
     # Set the number of rows and columns
     if (missing(nrows) || missing(ncols)) {
         lines_within_loc <- as.numeric(preps$size_locations[1])
@@ -511,8 +553,8 @@ multi_location_prep <- function(
         df_choices <- data.frame(choices = unlist(choices), diff_dim = dif)
         df_choices <- df_choices[order(df_choices$diff_dim, decreasing = FALSE), ]
         dimensions <- unlist(strsplit(df_choices[1,1], " x "))
-        nrows <- as.numeric(dimensions[1])
-        ncols <- as.numeric(dimensions[2])
+        nrows <- rep(as.numeric(dimensions[1]), times = l)
+        ncols <- rep(as.numeric(dimensions[2]), times = l)
     }
     # Generate the p-rep randomization
     design_randomization <- partially_replicated(
@@ -548,4 +590,83 @@ multi_location_prep <- function(
     )
     class(output) <- "FielDHub"
     return(invisible(output))
-}
+}
+
+#' Merge user data with optimization output
+#'
+#' This function merges user data with optimization output to prepare input
+#' data for randomization. It accepts the output from the optimization function
+#' `do_optim()` and user data with entries and corresponding line names.
+#' It returns a modified `optim_out` object containing the merged data.
+#'
+#' @param optim_out Output object from the optimization function `do_optim()`.
+#' @param data A data frame containing entries and corresponding names.
+#' @param lines Number of entries.
+#' @param add_checks A boolean indicating whether to add checks to the input data.
+#' @param checks A numeric vector containing the check entries.
+#' @param rep_checks A numeric vector containing replicated checks.
+#'
+#' @return The modified `optim_out` object containing merged data.
+#' @noRd
+merge_user_data <- function(
+    optim_out,
+    data, 
+    lines,
+    add_checks = FALSE, 
+    checks, 
+    rep_checks = NULL) {
+    if (!is.null(data)) {
+        data_entry <- data[, 1:2]
+        data_entry <- na.omit(data_entry) 
+        colnames(data_entry) <- c("ENTRY", "NAME")
+        vlookUp_entry <- 1:lines
+        if (add_checks) input_checks <- checks else input_checks <- 0
+        if (!is.null(rep_checks)) {
+            if (length(rep_checks) != input_checks) {
+                stop("Length of checks does not match replications!")
+            }
+        }
+        data_prep_no_checks <- data_entry[(input_checks + 1):nrow(data_entry), ]
+        entries_in_file <- nrow(data_prep_no_checks)
+        if (entries_in_file != lines) {
+            stop("Input lines does not match number of lines in input data!")
+        }
+        max_entry <- max(data_entry$ENTRY)
+        vlookUp_entry <- c((max_entry + 1):((max_entry + input_checks)), 1:lines)
+        prep_data_input <- data_entry
+        locs <- length(optim_out$list_locs)
+        merged_list_locs <- setNames(
+            vector("list", length = locs), 
+            nm = paste0("LOC", 1:locs)
+        )
+        locs_range <- 1:locs
+        for (LOC in locs_range) {
+            iter_loc <- optim_out$list_locs[[LOC]]
+            data_input_mutated <- prep_data_input %>%
+                dplyr::mutate(
+                    ENTRY_list = ENTRY,
+                    ENTRY = vlookUp_entry
+                ) %>%
+                dplyr::select(ENTRY_list, ENTRY, NAME) %>%
+                dplyr::left_join(y = iter_loc, by = "ENTRY")
+
+                if (inherits(optim_out, "MultiPrep")) {
+                    data_input_mutated <- data_input_mutated %>% 
+                        dplyr::select(ENTRY_list, NAME.x, REPS) %>%
+                        dplyr::arrange(dplyr::desc(REPS)) %>%
+                        dplyr::rename(ENTRY = ENTRY_list, NAME = NAME.x)
+                }
+
+                if (inherits(optim_out, "Sparse")) {
+                    data_input_mutated <- data_input_mutated %>% 
+                        stats::na.omit() %>%
+                        dplyr::select(ENTRY_list, NAME.x) %>%
+                        dplyr::rename(ENTRY = ENTRY_list, NAME = NAME.x)
+                }
+
+            merged_list_locs[[LOC]] <- data_input_mutated
+        }
+        optim_out$list_locs <- merged_list_locs
+        return(optim_out)
+    }
+}
diff --git a/R/fct_optimized_arrangement.R b/R/fct_optimized_arrangement.R
@@ -127,15 +127,15 @@ optimized_arrangement <- function(nrows = NULL, ncols = NULL, lines = NULL,
     } else {
         arg1 <- list(nrows, ncols, lines, l);arg2 <- c(nrows, ncols, lines, l)
         if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
-        base::stop('"optimized_arrangement()" requires arguments nrows, ncols, and l to be numeric and distint of NULL')
+            base::stop('"optimized_arrangement()" requires arguments nrows, ncols, and l to be numeric and distint of NULL')
         }
     } 
 
     if(is.null(data)) {
         if (length(checks) == 1 && checks > 1) {
         checksEntries <- 1:checks
         checks <- checks
-        }else if (length(checks) > 1) {
+        } else if (length(checks) > 1) {
         checksEntries <- sort(checks)
         checks <- length(checks)
         } else if (length(checks) == 1 && checks == 1) {

diff --git a/R/fct_partially_replicated.R b/R/fct_partially_replicated.R
@@ -5,8 +5,8 @@
 #' between checks is maximized in such a way that each row and column have control plots. 
 #' Note that design generation needs the dimension of the field (number of rows and columns).
 #'
-#' @param nrows Number of rows field.
-#' @param ncols Number of columns field.
+#' @param nrows Numeric vector with the number of rows field at each location.
+#' @param ncols Numeric vector with the number of columns field at each location.
 #' @param repGens Numeric vector with the amount genotypes to replicate.
 #' @param repUnits Numeric vector with the number of reps of each genotype.
 #' @param planter Option for \code{serpentine} or \code{cartesian} movement. By default  \code{planter = 'serpentine'}. 
@@ -26,19 +26,29 @@
 #'         Salvador Gezan [aut],
 #'         Ana Heilman [ctb],
 #'         Thomas Walk [ctb], 
-#'         Johan Aparicio [ctb], 
+#'         Johan Aparicio [ctb],
+#'         Jean-Marc Montpetit [ctb],
 #'         Richard Horsley [ctb]
 #' 
 #' 
 #' 
-#' @return A list with five elements.
+#' @return A list with several elements.
 #' \itemize{
 #'   \item \code{infoDesign} is a list with information on the design parameters.
 #'   \item \code{layoutRandom} is a matrix with the randomization layout.
 #'   \item \code{plotNumber} is a matrix with the layout plot number.
+#'   \item \code{binaryField} is a matrix with the binary field.
 #'   \item \code{dataEntry} is a data frame with the data input.
-#'   \item \code{genEntries} is a list with the entries for replicated and no replicated part.
+#'   \item \code{genEntries} is a list with the entries for replicated and non-replicated parts.
 #'   \item \code{fieldBook} is a data frame with field book design. This includes the index (Row, Column).
+#'   \item \code{min_pairswise_distance} is a data frame with the minimum pairwise distance between 
+#'           each pair of locations.
+#'   \item \code{reps_info} is a data frame with information on the number of replicated and 
+#'           non-replicated treatments at each location.
+#'   \item \code{pairsDistance} is a data frame with the pairwise distances between each pair of 
+#'           treatments.
+#'   \item \code{treatments_with_reps} is a list with the entries for the replicated part of the design.
+#'   \item \code{treatments_with_no_reps} is a list with the entries for the non-replicated part of the design.
 #' }
 #'
 #' @references
@@ -116,6 +126,28 @@ partially_replicated <- function(
     if (is.null(nrows) || is.null(ncols) || !is.numeric(nrows) || !is.numeric(ncols)) {
         base::stop('Basic design parameters missing (nrows, ncols) or is not numeric.')
     }
+    if (length(nrows) != l) {
+        if (length(nrows) < l) {
+            warning("Number of nrows values not matching number of locations", call. = FALSE)
+            # Filling missing nrows values with last provided value
+            nrows <- c(nrows, rep(nrows[length(nrows)], l - length(nrows)))
+        } else {
+            warning("Number of nrows values not matching number of locations", call. = FALSE)
+            # Filling missing nrows values with last provided value
+            nrows <- nrows[1:l]
+        }
+    }
+    if (length(ncols) != l) {
+        if (length(ncols) < l) {
+            warning("Number of ncols values not matching number of locations", call. = FALSE)
+            # Filling missing nrows values with last provided value
+            ncols <- c(ncols, rep(ncols[length(ncols)], l - length(ncols)))
+        } else  {
+            warning("Number of ncols values not matching number of locations", call. = FALSE)
+            # Filling missing nrows values with last provided value
+            ncols <- ncols[1:l]
+        }
+    }
 
     if (is.null(data)) {
         if (is.null(repGens) || is.null(repUnits)) {
@@ -144,17 +176,17 @@ partially_replicated <- function(
         }
     } else stop("Number of locations/sites is missing")
 
-    if (!is.null(data)) {
-        arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
-        if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
-        base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
-        }
-    } else {
-        arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
-        if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
-        base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
-        }
-    } 
+    # if (!is.null(data)) {
+    #     arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
+    #     if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
+    #     base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
+    #     }
+    # } else {
+    #     arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
+    #     if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
+    #     base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
+    #     }
+    # } 
     if (!is.null(data)) {
         if (multi_location_data) {
             if (is.data.frame(data)) {
@@ -260,8 +292,8 @@ partially_replicated <- function(
     rows_incidence <- vector(mode = "numeric", length = l)
     for (sites in 1:l) {
         prep <- pREP(
-            nrows = nrows, 
-            ncols = ncols, 
+            nrows = nrows[sites], 
+            ncols = ncols[sites], 
             RepChecks = NULL, 
             checks = NULL, 
             Fillers = 0,