Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add custom field dimensions at each location in preps #33

Merged
merged 1 commit into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 133 additions & 12 deletions R/fct_do_optim.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ do_optim <- function(
if (design == "sparse") {
data_input <- stats::na.omit(data[,1:2])
df_data_checks <- data_input[1:checks,]
df_data <- data_input[(checks + 1):nrow(data_input),]
df_data <- data_input[(checks + 1):nrow(data_input), ]
colnames(df_data) <- c("ENTRY", "NAME")
ENTRY <- as.vector(df_data$ENTRY)
if (!is.numeric(ENTRY)) stop("ENTRY column should be integer numbers!")
Expand Down Expand Up @@ -253,6 +253,7 @@ do_optim <- function(
#' @param ncols Number of columns in the field.
#' @param sparse_list (optional) A class "Sparse" object generated by \code{do_optim()} function.
#' @param seed (optional) Real number that specifies the starting seed to obtain reproducible designs.
#' @param data (optional) Data frame with 2 columns: \code{ENTRY | NAME }. ENTRY must be numeric.
#'
#' @author Didier Murillo [aut],
#' Salvador Gezan [aut],
Expand Down Expand Up @@ -293,7 +294,8 @@ sparse_allocation <- function(
exptName = NULL,
locationNames,
sparse_list,
seed) {
seed,
data = NULL) {
# set a random seed if it is missing
if (missing(seed)) seed <- base::sample.int(10000, size = 1)
if (missing(l)) stop("Please, define the number of locations for this design.")
Expand All @@ -312,6 +314,7 @@ sparse_allocation <- function(
if (missing(copies_per_entry)) {
stop("You must specify the number of reps per plant")
}
if (copies_per_entry >= l) stop("Please, enter copies_per_entry < l")
if (is.null(checks)) stop("Please, define the number of checks for this design.")
if (!missing(sparse_list)) {
if (!inherits(sparse_list, "Sparse")) {
Expand All @@ -329,6 +332,16 @@ sparse_allocation <- function(
seed = seed
)
}
# Merge user input data
if (!is.null(data)) {
unrep <- merge_user_data(
optim_out = unrep,
data = data,
lines = lines,
add_checks = TRUE,
checks = checks
)
}
# Define field dimensions (rows and columns)
if (missing(nrows) || missing(ncols)) {
lines_within_loc <- as.numeric(unrep$size_locations[1])
Expand Down Expand Up @@ -392,23 +405,39 @@ sparse_allocation <- function(
#' @param plotNumber Numeric vector with the starting plot number for each location. By default \code{plotNumber = 101}.
#' @param exptName (optional) Name of the experiment.
#' @param locationNames (optional) Name for each location.
#' @param nrows Number of rows field.
#' @param ncols Number of columns field.
#' @param nrows Numeric vector with the number of rows field at each location.
#' @param ncols Numeric vector with the number of columns field at each location.
#' @param seed (optional) Real number that specifies the starting seed to obtain reproducible designs.
#' @param optim_list (optional) A list object of class "MultiPrep"generated by \code{do_optim()} function.
#' @param data (optional) Data frame with 2 columns: \code{ENTRY | NAME }. ENTRY must be numeric.
#'
#'
#' @author Didier Murillo [aut],
#' Salvador Gezan [aut],
#' Jean-Marc Montpetit [ctb],
#' Ana Heilman [ctb]
#'
#' @return A list with four elements.
#' @return A list of class \code{FielDHub} with several elements.
#' \itemize{
#' \item \code{designs} is a list with each location p-rep randomization.
#' \item \code{infoDesign} is a list with information on the design parameters.
#' \item \code{layoutRandom} is a matrix with the randomization layout.
#' \item \code{plotNumber} is a matrix with the layout plot number.
#' \item \code{binaryField} is a matrix with the binary field.
#' \item \code{dataEntry} is a data frame with the data input.
#' \item \code{genEntries} is a list with the entries for replicated and non-replicated parts.
#' \item \code{fieldBook} is a data frame with field book design. This includes the index (Row, Column).
#' \item \code{min_pairswise_distance} is a data frame with the minimum pairwise distance between
#' each pair of locations.
#' \item \code{reps_info} is a data frame with information on the number of replicated and
#' non-replicated treatments at each location.
#' \item \code{pairsDistance} is a data frame with the pairwise distances between each pair of
#' treatments.
#' \item \code{treatments_with_reps} is a list with the entries for the replicated part of the design.
#' \item \code{treatments_with_no_reps} is a list with the entries for the non-replicated part of the design.
#' \item \code{list_locs} is a list with each location list of entries.
#' \item \code{allocation} is a matrix with the allocation of treatments.
#' \item \code{size_locations} is a data frame with one column for each
#' location and one row with the size of the location.
#' location and one row with the size of the location.
#' }
#'
#' @references
Expand Down Expand Up @@ -455,7 +484,8 @@ multi_location_prep <- function(
exptName,
locationNames,
optim_list,
seed) {
seed,
data = NULL) {
# set a random seed if it is missing
if (missing(seed)) seed <- base::sample.int(10000, size = 1)
if (missing(l)) stop("Please, define the number of locations for this design.")
Expand Down Expand Up @@ -492,9 +522,21 @@ multi_location_prep <- function(
add_checks = add_checks,
checks = checks,
rep_checks = rep_checks,
seed = seed
seed = seed,
data = data
)
}
# Merge user input data
if (!is.null(data)) {
preps <- merge_user_data(
optim_out = preps,
data = data,
lines = lines,
add_checks = add_checks,
checks = checks,
rep_checks = rep_checks
)
}
# Set the number of rows and columns
if (missing(nrows) || missing(ncols)) {
lines_within_loc <- as.numeric(preps$size_locations[1])
Expand All @@ -511,8 +553,8 @@ multi_location_prep <- function(
df_choices <- data.frame(choices = unlist(choices), diff_dim = dif)
df_choices <- df_choices[order(df_choices$diff_dim, decreasing = FALSE), ]
dimensions <- unlist(strsplit(df_choices[1,1], " x "))
nrows <- as.numeric(dimensions[1])
ncols <- as.numeric(dimensions[2])
nrows <- rep(as.numeric(dimensions[1]), times = l)
ncols <- rep(as.numeric(dimensions[2]), times = l)
}
# Generate the p-rep randomization
design_randomization <- partially_replicated(
Expand Down Expand Up @@ -548,4 +590,83 @@ multi_location_prep <- function(
)
class(output) <- "FielDHub"
return(invisible(output))
}
}

#' Merge user data with optimization output
#'
#' This function merges user data with optimization output to prepare input
#' data for randomization. It accepts the output from the optimization function
#' `do_optim()` and user data with entries and corresponding line names.
#' It returns a modified `optim_out` object containing the merged data.
#'
#' @param optim_out Output object from the optimization function `do_optim()`.
#' @param data A data frame containing entries and corresponding names.
#' @param lines Number of entries.
#' @param add_checks A boolean indicating whether to add checks to the input data.
#' @param checks A numeric vector containing the check entries.
#' @param rep_checks A numeric vector containing replicated checks.
#'
#' @return The modified `optim_out` object containing merged data.
#' @noRd
merge_user_data <- function(
optim_out,
data,
lines,
add_checks = FALSE,
checks,
rep_checks = NULL) {
if (!is.null(data)) {
data_entry <- data[, 1:2]
data_entry <- na.omit(data_entry)
colnames(data_entry) <- c("ENTRY", "NAME")
vlookUp_entry <- 1:lines
if (add_checks) input_checks <- checks else input_checks <- 0
if (!is.null(rep_checks)) {
if (length(rep_checks) != input_checks) {
stop("Length of checks does not match replications!")
}
}
data_prep_no_checks <- data_entry[(input_checks + 1):nrow(data_entry), ]
entries_in_file <- nrow(data_prep_no_checks)
if (entries_in_file != lines) {
stop("Input lines does not match number of lines in input data!")
}
max_entry <- max(data_entry$ENTRY)
vlookUp_entry <- c((max_entry + 1):((max_entry + input_checks)), 1:lines)
prep_data_input <- data_entry
locs <- length(optim_out$list_locs)
merged_list_locs <- setNames(
vector("list", length = locs),
nm = paste0("LOC", 1:locs)
)
locs_range <- 1:locs
for (LOC in locs_range) {
iter_loc <- optim_out$list_locs[[LOC]]
data_input_mutated <- prep_data_input %>%
dplyr::mutate(
ENTRY_list = ENTRY,
ENTRY = vlookUp_entry
) %>%
dplyr::select(ENTRY_list, ENTRY, NAME) %>%
dplyr::left_join(y = iter_loc, by = "ENTRY")

if (inherits(optim_out, "MultiPrep")) {
data_input_mutated <- data_input_mutated %>%
dplyr::select(ENTRY_list, NAME.x, REPS) %>%
dplyr::arrange(dplyr::desc(REPS)) %>%
dplyr::rename(ENTRY = ENTRY_list, NAME = NAME.x)
}

if (inherits(optim_out, "Sparse")) {
data_input_mutated <- data_input_mutated %>%
stats::na.omit() %>%
dplyr::select(ENTRY_list, NAME.x) %>%
dplyr::rename(ENTRY = ENTRY_list, NAME = NAME.x)
}

merged_list_locs[[LOC]] <- data_input_mutated
}
optim_out$list_locs <- merged_list_locs
return(optim_out)
}
}
4 changes: 2 additions & 2 deletions R/fct_optimized_arrangement.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ optimized_arrangement <- function(nrows = NULL, ncols = NULL, lines = NULL,
} else {
arg1 <- list(nrows, ncols, lines, l);arg2 <- c(nrows, ncols, lines, l)
if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
base::stop('"optimized_arrangement()" requires arguments nrows, ncols, and l to be numeric and distint of NULL')
base::stop('"optimized_arrangement()" requires arguments nrows, ncols, and l to be numeric and distint of NULL')
}
}

if(is.null(data)) {
if (length(checks) == 1 && checks > 1) {
checksEntries <- 1:checks
checks <- checks
}else if (length(checks) > 1) {
} else if (length(checks) > 1) {
checksEntries <- sort(checks)
checks <- length(checks)
} else if (length(checks) == 1 && checks == 1) {
Expand Down
68 changes: 50 additions & 18 deletions R/fct_partially_replicated.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#' between checks is maximized in such a way that each row and column have control plots.
#' Note that design generation needs the dimension of the field (number of rows and columns).
#'
#' @param nrows Number of rows field.
#' @param ncols Number of columns field.
#' @param nrows Numeric vector with the number of rows field at each location.
#' @param ncols Numeric vector with the number of columns field at each location.
#' @param repGens Numeric vector with the amount genotypes to replicate.
#' @param repUnits Numeric vector with the number of reps of each genotype.
#' @param planter Option for \code{serpentine} or \code{cartesian} movement. By default \code{planter = 'serpentine'}.
Expand All @@ -26,19 +26,29 @@
#' Salvador Gezan [aut],
#' Ana Heilman [ctb],
#' Thomas Walk [ctb],
#' Johan Aparicio [ctb],
#' Johan Aparicio [ctb],
#' Jean-Marc Montpetit [ctb],
#' Richard Horsley [ctb]
#'
#'
#'
#' @return A list with five elements.
#' @return A list with several elements.
#' \itemize{
#' \item \code{infoDesign} is a list with information on the design parameters.
#' \item \code{layoutRandom} is a matrix with the randomization layout.
#' \item \code{plotNumber} is a matrix with the layout plot number.
#' \item \code{binaryField} is a matrix with the binary field.
#' \item \code{dataEntry} is a data frame with the data input.
#' \item \code{genEntries} is a list with the entries for replicated and no replicated part.
#' \item \code{genEntries} is a list with the entries for replicated and non-replicated parts.
#' \item \code{fieldBook} is a data frame with field book design. This includes the index (Row, Column).
#' \item \code{min_pairswise_distance} is a data frame with the minimum pairwise distance between
#' each pair of locations.
#' \item \code{reps_info} is a data frame with information on the number of replicated and
#' non-replicated treatments at each location.
#' \item \code{pairsDistance} is a data frame with the pairwise distances between each pair of
#' treatments.
#' \item \code{treatments_with_reps} is a list with the entries for the replicated part of the design.
#' \item \code{treatments_with_no_reps} is a list with the entries for the non-replicated part of the design.
#' }
#'
#' @references
Expand Down Expand Up @@ -116,6 +126,28 @@ partially_replicated <- function(
if (is.null(nrows) || is.null(ncols) || !is.numeric(nrows) || !is.numeric(ncols)) {
base::stop('Basic design parameters missing (nrows, ncols) or is not numeric.')
}
if (length(nrows) != l) {
if (length(nrows) < l) {
warning("Number of nrows values not matching number of locations", call. = FALSE)
# Filling missing nrows values with last provided value
nrows <- c(nrows, rep(nrows[length(nrows)], l - length(nrows)))
} else {
warning("Number of nrows values not matching number of locations", call. = FALSE)
# Filling missing nrows values with last provided value
nrows <- nrows[1:l]
}
}
if (length(ncols) != l) {
if (length(ncols) < l) {
warning("Number of ncols values not matching number of locations", call. = FALSE)
# Filling missing nrows values with last provided value
ncols <- c(ncols, rep(ncols[length(ncols)], l - length(ncols)))
} else {
warning("Number of ncols values not matching number of locations", call. = FALSE)
# Filling missing nrows values with last provided value
ncols <- ncols[1:l]
}
}

if (is.null(data)) {
if (is.null(repGens) || is.null(repUnits)) {
Expand Down Expand Up @@ -144,17 +176,17 @@ partially_replicated <- function(
}
} else stop("Number of locations/sites is missing")

if (!is.null(data)) {
arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
}
} else {
arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
}
}
# if (!is.null(data)) {
# arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
# if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
# base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
# }
# } else {
# arg1 <- list(nrows, ncols, l);arg2 <- c(nrows, ncols, l)
# if (base::any(lengths(arg1) != 1) || base::any(arg2 %% 1 != 0) || base::any(arg2 < 1)) {
# base::stop('"partially_replicated()" requires input nrows, ncols, and l to be numeric and distint of NULL.')
# }
# }
if (!is.null(data)) {
if (multi_location_data) {
if (is.data.frame(data)) {
Expand Down Expand Up @@ -260,8 +292,8 @@ partially_replicated <- function(
rows_incidence <- vector(mode = "numeric", length = l)
for (sites in 1:l) {
prep <- pREP(
nrows = nrows,
ncols = ncols,
nrows = nrows[sites],
ncols = ncols[sites],
RepChecks = NULL,
checks = NULL,
Fillers = 0,
Expand Down
Loading