--- title: "R Notebook for short gestation heifer paper" output: github_document: toc: true toc_depth: 3 editor_options: chunk_output_type: inline --- # R Setup ```{r} # List of packages for session .packages = c("ggplot2", "dplyr", "tidyr", "lme4", "lmerTest", "multcompView", "multcomp", "emmeans", "lsmeans", "TH.data", "car", "lubridate" ) # Install CRAN packages (if not already installed) .inst <- .packages %in% installed.packages() if(length(.packages[!.inst]) > 0) install.packages(.packages[!.inst], repos = "http://cran.us.r-project.org", dependencies = TRUE) # Load packages into session lapply(.packages, require, character.only=TRUE, quietly = TRUE) ``` # Raw Data import ```{r cache= TRUE} if(!(exists('AllDataRaw') && is.data.frame(get('AllDataRaw')))) { AllDataRaw <- read.csv2(file = "./Data/TableauExportv2.csv", header = T, strip.white = TRUE, dec = ".", sep = ',', na.strings = c('', 'NA') ) oldColumns <- names(AllDataRaw) newColumns <- gsub("\\.", "", oldColumns, perl=TRUE) #Strange name for AnimalId newColumns[1] <- "AnimalId" #Duplicate name for DaysInMilk newColumns[19] <- "DaysInMilkBin" names(AllDataRaw) <- newColumns AllDataRaw <- AllDataRaw %>% dplyr::arrange( HerdId, AnimalId, Date ) } ``` # Data manipulation ## Descriptives ```{r} AllDataUngrouped <- AllDataRaw %>% dplyr::filter( LactationNumber == 1, # DaysPregnant <= 283, #We drop all above 75th percentile because no interest at this stage, missing inseminations? M305 > 0 #No missing M305 calculations ) AllDataUngrouped %>% count() AllDataUngrouped %>% summarise(count = n_distinct(AnimalId)) AllDataUngrouped %>% summarise(count = n_distinct(HerdId)) ``` ```{r} #We inspect the quantile ranges quantile(AllDataRaw$DaysPregnant, c(0,0.001, 0.01, 0.05, 0.25,0.50,0.75,1)) AllData <- AllDataRaw %>% dplyr::filter( LactationNumber == 1, # DaysPregnant <= 283, #We drop all above 75th percentile because no interest at this stage, missing inseminations? M305 > 0 #No missing M305 calculations ) %>% dplyr::mutate( Date = mdy_hms(Date), #reformat ordering date Year = year(mdy_hms(CalvingDate)), Month = month(mdy_hms(CalvingDate)), DaysPregnantQuantile = case_when( DaysPregnant < 243 ~ "0-1th Pct", DaysPregnant < 267 ~ "1-25th Pct", DaysPregnant < 283 ~ "25-75th Pct", TRUE ~ "75-100 Pct" ) ) %>% dplyr::arrange( HerdId, AnimalId, Date ) %>% dplyr::group_by( AnimalId, HerdId, DaysPregnantQuantile, Year, Month, CalvingDate ) %>% summarise( lastM305 = as.integer(last(M305)), lastDIM = as.integer(last(DaysInMilk)), lastScale = as.numeric(last(Scale)), lastDecay = as.numeric(last(Decay)), lastRamp = as.numeric(last(Ramp)), lastPeakYield = as.numeric(last(PeakMilk)), lastTimeToPeak = as.integer(last(TimeToPeak)) ) ``` # Basic data exploration ```{r} summary(AllData[,c("lastM305", "lastDecay", "lastRamp", "lastScale", "lastPeakYield", "lastTimeToPeak")]) ``` # Basic data visualisation ```{r} op = par(mfrow=c(3, 2)) hist(AllData$lastM305, main = "M305", xlab="") hist(AllData$lastScale, main = "Milkbot scale", xlab="") hist(AllData$lastDecay, main = "Milkbot decay", xlab="") hist(AllData$lastRamp, main = "Milkbot ramp", xlab="") hist(AllData$lastPeakYield, main = "Milkbot peak yield", xlab="") hist(AllData$lastTimeToPeak, main = "Milkbot time to peak", xlab="") ``` # Models build * [Link to model M305](Models/M305.md) * [Link to model Scale](Models/Scale.md) * [Link to model Decay](Models/Decay.md) * [Link to model Ramp](Models/Ramp.md) * [Link to model Peak Yield](Models/PeakYield.md) * [Link to model Time To Peak](Models/TimeToPeak.md)