Fix: mismatch in randomization for Row-column design

DidierMurilloF · Mar 22, 2024 · 2731291 · 2731291
1 parent 28a244c
commit 2731291
Show file tree

Hide file tree

Showing 7 changed files with 377 additions and 19 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -45,6 +45,7 @@ importFrom(shiny,NS)
 importFrom(shiny,column)
 importFrom(shiny,shinyApp)
 importFrom(shiny,tagList)
+importFrom(stats,aggregate)
 importFrom(stats,dist)
 importFrom(stats,na.omit)
 importFrom(stats,pnorm)

diff --git a/R/fct_do_optim.R b/R/fct_do_optim.R
@@ -7,6 +7,7 @@
 #' @param add_checks Option to add checks. Optional if \code{design = "prep"}
 #' @param checks Number of genotypes checks. 
 #' @param rep_checks Replication for each check.
+#' @param force_balance Get balanced unbalanced locations. By default \code{force_balance = TRUE}.
 #' @param data (optional) Data frame with 2 columns: \code{ENTRY | NAME }. ENTRY must be numeric.
 #' @param seed (optional) Real number that specifies the starting seed to obtain reproducible designs.
 #' 
@@ -44,7 +45,8 @@ do_optim <- function(
  copies_per_entry, 
  add_checks = FALSE, 
  checks = NULL, 
- rep_checks = NULL, 
+ rep_checks = NULL,
+ force_balance = TRUE,
  seed,
  data = NULL) {
  # set a random seed if it is missing
@@ -147,7 +149,7 @@ do_optim <- function(
  # Check if there are unbalanced locations and force them to be balanced
  size_locs <- as.vector(base::colSums(allocation))
  max_size_locs <- max(size_locs)
- if (!all(size_locs == max_size_locs)) {
+ if (!all(size_locs == max_size_locs) & force_balance == TRUE) {
  unbalanced_locs <- which(size_locs != max_size_locs)
  max_swaps <- length(unbalanced_locs)
  k <- nrow(allocation)

diff --git a/R/fct_incomplete_blocks.R b/R/fct_incomplete_blocks.R
@@ -19,7 +19,7 @@
 #' Johan Aparicio [ctb], 
 #' Richard Horsley [ctb]
 #'
-#' @importFrom stats runif na.omit
+#' @importFrom stats runif na.omit aggregate
 #'
 #'
 #' @return A list with two elements.
@@ -81,12 +81,12 @@ incomplete_blocks <- function(t = NULL, k = NULL, r = NULL, l = 1, plotNumber =
  nt <- length(t)
  TRT <- t
  }
- }else if (is.character(t) || is.factor(t)) {
+ } else if (is.character(t) || is.factor(t)) {
  if (length(t) == 1) {
  shiny::validate('incomplete_blocks() requires more than one treatment.')
  } 
  nt <- length(t)
- }else if ((length(t) > 1)) {
+ } else if ((length(t) > 1)) {
  nt <- length(t)
  }
  data_up <- data.frame(list(ENTRY = 1:nt, TREATMENT = paste0("G-", 1:nt)))
@@ -138,20 +138,40 @@ incomplete_blocks <- function(t = NULL, k = NULL, r = NULL, l = 1, plotNumber =
  if (square) {
  mydes <- blocksdesign::blocks(treatments = nt, replicates = r + 1, blocks = list(r + 1, b), seed = NULL)
  ##### Dropping the cyclical REP ######
- rep_to_drop <- mydes$Design %>%
- dplyr::group_by(Level_1, Level_2) %>%
- dplyr::mutate(treatments = as.numeric(treatments)) %>%
- dplyr::summarise(dif = sum(diff(sort(treatments)))/(dplyr::n()-1)) %>%
- dplyr::filter(dif == 1) %>%
- dplyr::pull(Level_1) %>%
- unique()
- if (length(rep_to_drop) > 0) {
- mydes$Design <- mydes$Design %>%
- dplyr::filter(Level_1 != rep_to_drop) %>%
- dplyr::mutate(Level_1 = rep(paste0("B", 1:r), each = nt))
+ # Function to check if treatments are consecutive
+ check_consecutive <- function(treatments) {
+ sorted_treatments <- sort(treatments)
+ all(diff(sorted_treatments) == 1)
+ }
+ # Apply check_consecutive function to each Level_2 group
+ raw_design <- as.data.frame(mydes$Design)
+ raw_design <- raw_design %>%
+ dplyr::mutate(
+ Level_1 = as.character(Level_1),
+ Level_2 = as.character(Level_2),
+ plots = as.integer(plots),
+ treatments = as.integer(treatments)
+ )
+ results <- raw_design %>%
+ dplyr::group_by(Level_1, Level_2) %>%
+ dplyr::summarise(are_consecutive = check_consecutive(treatments), .groups = "drop") %>%
+ dplyr::group_by(Level_1) %>%
+ dplyr::summarise(all_consecutive = all(are_consecutive))
+
+ # Filter Level_1 where all Level_2 levels have consecutive treatments
+ consecutive_levels <- results %>%
+ dplyr::filter(all_consecutive) %>%
+ dplyr::pull(Level_1) %>%
+ unique()
+
+ consecutive_levels_level_1 <- consecutive_levels
+
+ if (length(consecutive_levels_level_1) > 0) {
+ rep_to_drop <- consecutive_levels_level_1[1]
+ mydes$Design <- dplyr::filter(raw_design, Level_1 != rep_to_drop)
  } else {
-  mydes$Design <- mydes$Design %>%
-  dplyr::filter(Level_1 != paste0("B", r + 1)) 
+ mydes$Design <- raw_design %>%
+ dplyr::filter(Level_1 != paste0("B", r + 1)) 
  }
  } else {
  mydes <- blocksdesign::blocks(treatments = nt, replicates = r, blocks = list(r, b), seed = NULL)
@@ -168,6 +188,8 @@ incomplete_blocks <- function(t = NULL, k = NULL, r = NULL, l = 1, plotNumber =
  OutIBD <- dplyr::bind_rows(outIBD_loc)
  OutIBD <- as.data.frame(OutIBD)
  OutIBD$ENTRY <- as.numeric(OutIBD$ENTRY)
+ OutIBD_test <- OutIBD
+ OutIBD_test$ID <- 1:nrow(OutIBD_test)
  if(lookup) {
  OutIBD <- dplyr::inner_join(OutIBD, dataLookUp, by = "ENTRY")
  OutIBD <- OutIBD[,-6]
@@ -178,6 +200,7 @@ incomplete_blocks <- function(t = NULL, k = NULL, r = NULL, l = 1, plotNumber =
  }
  ID <- 1:nrow(OutIBD)
  OutIBD_new <- cbind(ID, OutIBD)
+ validateTreatments(OutIBD_new)
  lambda <- r*(k - 1)/(nt - 1)
  infoDesign <- list(Reps = r, iBlocks = b, NumberTreatments = nt, NumberLocations = l,
  Locations = locationNames, seed = seed, lambda = lambda, 

diff --git a/R/globals.R b/R/globals.R
@@ -29,4 +29,7 @@ utils::globalVariables(c("ENTRY",
  "USER_ENTRY",
  "NAME.x",
  "NAME.y",
- "Times"))
+ "Times", 
+ "all_consecutive", 
+ "are_consecutive", 
+ "plots"))
diff --git a/R/utils_validateTreatments.R b/R/utils_validateTreatments.R
@@ -0,0 +1,106 @@
+validateTreatments <- function(data) {
+ # Group the data by LOCATION, REP, and then TREATMENT, and count the occurrences
+ treatment_counts <- aggregate(ID ~ LOCATION + REP + ENTRY, data=data, FUN=length)
+
+ # Identify any treatment counts greater than 1, indicating duplicates within a LOCATION and REP
+ duplicates <- treatment_counts[treatment_counts$ID > 1, ]
+
+ if (nrow(duplicates) > 0) {
+ stop("There are duplicates within REP for some LOCATIONs:\n")
+ } else {
+ # Check if all treatments are present exactly once within each REP for each LOCATION
+ unique_treatments_per_location_rep <- aggregate(ENTRY ~ LOCATION + REP, data=data, function(x) length(unique(x)))
+ expected_treatments_count <- unique(unique_treatments_per_location_rep$ENTRY)
+
+ # if (length(expected_treatments_count) != 1) {
+ # stop("Not all treatments are present exactly once within each REP for each LOCATION. Here are the details:\n")
+ # } 
+ }
+}
+# 
+# library(blocksdesign)
+# # When producing a randomization for a resolvable row-column design
+# # with 36 entries, 6 reps, 2 rows, and a random seed of 20243, each entry
+# # does not appear in every replicate.
+# # set.seed(20243)
+# nt <- 36
+# r <- 6
+# b <- 2
+# l <- 1
+# k <- 2 #nrows
+# nincblock <- nt*r/k
+# N <- nt * r
+# locationNames <- "FARGO"
+# square <- FALSE
+# if (sqrt(nt) == round(sqrt(nt))) square <- TRUE
+# outIBD_loc <- vector(mode = "list", length = l)
+# for (i in 1:l) {
+# if (square) {
+# mydes <- blocksdesign::blocks(treatments = nt, replicates = r + 1, blocks = list(r + 1, b), seed = NULL)
+# ##### Dropping the cyclical REP ######
+# rep_to_drop <- mydes$Design %>%
+# dplyr::group_by(Level_1, Level_2) %>%
+# dplyr::mutate(treatments = as.numeric(treatments)) %>%
+# dplyr::summarise(dif = sum(diff(sort(treatments)))/(dplyr::n()-1)) %>%
+# dplyr::filter(dif == 1) %>%
+# dplyr::pull(Level_1) %>%
+# unique()
+# print(rep_to_drop)
+# if (length(rep_to_drop) > 0) {
+# mydes$Design <- mydes$Design %>%
+# dplyr::filter(Level_1 != rep_to_drop) %>%
+# dplyr::mutate(Level_1 = rep(paste0("B", 1:r), each = nt))
+# } else {
+# mydes$Design <- mydes$Design %>%
+# dplyr::filter(Level_1 != paste0("B", r + 1))
+# }
+# } else {
+# mydes <- blocksdesign::blocks(treatments = nt, replicates = r, blocks = list(r, b), seed = NULL)
+# }
+# # mydes <- blocksdesign::blocks(treatments = nt, replicates = r, blocks = list(r, b), seed = NULL)
+# ibd_plots <- list(1:216)
+# matdf <- base::data.frame(list(LOCATION = rep(locationNames[i], each = N)))
+# matdf$PLOT <- as.numeric(unlist(ibd_plots[[i]]))
+# matdf$BLOCK <- rep(c(1:r), each = nt)
+# matdf$iBLOCK <- rep(c(1:b), each = k)
+# matdf$UNIT <- rep(c(1:k), nincblock)
+# matdf$TREATMENT <- mydes$Design[,4]
+# colnames(matdf) <- c("LOCATION","PLOT", "REP", "IBLOCK", "UNIT", "ENTRY")
+# outIBD_loc[[i]] <- matdf
+# }
+# OutIBD <- dplyr::bind_rows(outIBD_loc)
+# OutIBD <- as.data.frame(OutIBD)
+# OutIBD$ENTRY <- as.numeric(OutIBD$ENTRY)
+# OutIBD_test <- OutIBD
+# OutIBD_test$ID <- 1:nrow(OutIBD_test)
+# lookup <- FALSE
+# if(lookup) {
+# OutIBD <- dplyr::inner_join(OutIBD, dataLookUp, by = "ENTRY")
+# OutIBD <- OutIBD[,-6]
+# colnames(OutIBD) <- c("LOCATION","PLOT", "REP", "IBLOCK", "UNIT", "TREATMENT")
+# OutIBD <- dplyr::inner_join(OutIBD, data_up, by = "TREATMENT")
+# OutIBD <- OutIBD[, c(1:5,7,6)]
+# colnames(OutIBD) <- c("LOCATION","PLOT", "REP", "IBLOCK", "UNIT", "ENTRY", "TREATMENT")
+# }
+# ID <- 1:nrow(OutIBD)
+# OutIBD_new <- cbind(ID, OutIBD)
+# 
+# 
+# # Load the data frame (replace this with your actual data loading code)
+# data1 <- read.csv("OutIBD_before_merging.csv")
+# View(data1)
+# data <- OutIBD_test
+# 
+# # Example usage:
+# validateTreatments(data)
+# 
+# 
+# data1 <- read.csv("IBD_row_example.csv")
+# View(data1)
+# 
+# data1 <- read.csv("IBD_row_example_before_drop_rep.csv")
+# View(data1)
+
+
+
+