I think most variables and models are now complete. Data is local, no…

…t yet online
Bovi-analytics · Jun 23, 2019 · f25936f · f25936f
commit f25936f
Show file tree

Hide file tree

Showing 70 changed files with 3,878 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+*.csv
diff --git a/DataImport.R b/DataImport.R
@@ -0,0 +1,27 @@
+#data manipulation
+if (!require("dplyr")) {
+ install.packages("dplyr", dependencies = TRUE)
+ library(dplyr)
+}
+
+if(!(exists('AllDataRaw') && is.data.frame(get('AllDataRaw')))) {
+ AllDataRaw <- read.csv2(file = "../Data/TableauExportv2.csv", 
+ header = T,
+ strip.white = TRUE,
+ dec = ".",
+ sep = ',', na.strings = c('', 'NA')
+ )
+ oldColumns <- names(AllDataRaw)
+ newColumns <- gsub("\\.", "", oldColumns, perl=TRUE)
+ #Strange name for AnimalId
+ newColumns[1] <- "AnimalId"
+ #Duplicate name for DaysInMilk
+ newColumns[19] <- "DaysInMilkBin"
+ names(AllDataRaw) <- newColumns
+ AllDataRaw <- AllDataRaw %>% 
+ dplyr::arrange(
+ HerdId,
+ AnimalId,
+ Date
+ )
+}
diff --git a/Models/Decay.Rmd b/Models/Decay.Rmd
@@ -0,0 +1,113 @@
+---
+title: "R Notebook"
+output: 
+ github_document:
+ toc: true
+ toc_depth: 3
+editor_options: 
+ chunk_output_type: inline
+---
+
+```{r include=FALSE}
+#data manipulation
+if (!require("dplyr")) {
+ install.packages("dplyr", dependencies = TRUE)
+ library(dplyr)
+}
+#glmer
+if (!require("lme4")){install.packages("lme4", dependencies = TRUE)
+ library(lme4)
+}
+#lsmeans
+if (!require("lsmeans")){install.packages("lsmeans", dependencies = TRUE)
+ library(lsmeans)
+}
+#lsmeans
+if (!require("lubridate")){install.packages("lsmeans", dependencies = TRUE)
+ library(lubridate)
+}
+```
+
+# Raw Data import
+
+```{r cache= TRUE}
+source('../DataImport.R')
+```
+
+# Data manipulation
+
+```{r}
+#We inspect the quantile ranges
+
+quantile(AllDataRaw$DaysPregnant)
+
+AllData <- AllDataRaw %>% dplyr::filter(
+ LactationNumber == 1,
+ DaysPregnant <= 283, #We drop all above 75th percentile because no interest at this stage, missing inseminations?
+ M305 > 0 #No missing M305 calculations
+ ) %>% 
+ dplyr::mutate(
+ Date = mdy_hms(Date), #reformat ordering date
+ Year = year(mdy_hms(CalvingDate)),
+ Month = month(mdy_hms(CalvingDate)),
+ DaysPregnantQuantile = case_when(
+ DaysPregnant < 275 ~ "0-25th Pct",
+ TRUE ~ "25-75 Pct"
+ )
+ ) %>%
+ dplyr::arrange(
+ HerdId,
+ AnimalId,
+ Date
+ ) %>%
+ dplyr::group_by(
+ AnimalId,
+ HerdId,
+ DaysPregnantQuantile,
+ Year,
+ Month,
+ CalvingDate
+ ) %>% 
+ summarise(
+ Value = as.integer(last(Decay))
+ )
+
+ 
+```
+
+# Model M305
+
+## Base model
+
+```{r}
+baseline <- lmer(
+ Value ~ 1 + (1 | HerdId), 
+ data = AllData
+ )
+qqnorm(residuals(baseline, type = 'pearson'))
+```
+
+## Full model
+
+```{r message=FALSE, warning=FALSE, paged.print=FALSE}
+GLM <- lmer(
+ Value ~ 
+ DaysPregnantQuantile + Year + Month
+ + (1 | HerdId),
+ data = AllData
+ )
+qqnorm(residuals(GLM))
+```
+
+## Comparison of baseline and nested model
+
+```{r}
+anova(GLM,baseline, test="Chisq")
+```
+
+## Least square means
+
+```{r}
+lsmeans(GLM, pairwise~DaysPregnantQuantile, type = "response", adjust="tukey")
+```
+
diff --git a/Models/Decay.html b/Models/Decay.html
diff --git a/Models/Decay.md b/Models/Decay.md
@@ -0,0 +1,140 @@
+R Notebook
+================
+
+- [Raw Data import](#raw-data-import)
+- [Data manipulation](#data-manipulation)
+- [Model M305](#model-m305)
+ - [Base model](#base-model)
+ - [Full model](#full-model)
+ - [Comparison of baseline and nested model](#comparison-of-baseline-and-nested-model)
+ - [Least square means](#least-square-means)
+
+Raw Data import
+===============
+
+``` r
+source('../DataImport.R')
+```
+
+Data manipulation
+=================
+
+``` r
+#We inspect the quantile ranges
+
+quantile(AllDataRaw$DaysPregnant)
+```
+
+ ## 0% 25% 50% 75% 100% 
+ ## 150 275 278 283 297
+
+``` r
+AllData <- AllDataRaw %>% dplyr::filter(
+ LactationNumber == 1,
+ DaysPregnant <= 283, #We drop all above 75th percentile because no interest at this stage, missing inseminations?
+ M305 > 0 #No missing M305 calculations
+ ) %>% 
+ dplyr::mutate(
+ Date = mdy_hms(Date), #reformat ordering date
+ Year = year(mdy_hms(CalvingDate)),
+ Month = month(mdy_hms(CalvingDate)),
+ DaysPregnantQuantile = case_when(
+ DaysPregnant < 275 ~ "0-25th Pct",
+ TRUE ~ "25-75 Pct"
+ )
+ ) %>%
+ dplyr::arrange(
+ HerdId,
+ AnimalId,
+ Date
+ ) %>%
+ dplyr::group_by(
+ AnimalId,
+ HerdId,
+ DaysPregnantQuantile,
+ Year,
+ Month,
+ CalvingDate
+ ) %>% 
+ summarise(
+ Value = as.integer(last(Decay))
+ )
+```
+
+Model M305
+==========
+
+Base model
+----------
+
+``` r
+baseline <- lmer(
+ Value ~ 1 + (1 | HerdId), 
+ data = AllData
+ )
+qqnorm(residuals(baseline, type = 'pearson'))
+```
+
+![](Decay_files/figure-markdown_github/unnamed-chunk-4-1.png)
+
+Full model
+----------
+
+``` r
+GLM <- lmer(
+ Value ~ 
+ DaysPregnantQuantile + Year + Month
+ + (1 | HerdId),
+ data = AllData
+ )
+qqnorm(residuals(GLM))
+```
+
+![](Decay_files/figure-markdown_github/unnamed-chunk-5-1.png)
+
+Comparison of baseline and nested model
+---------------------------------------
+
+``` r
+anova(GLM,baseline, test="Chisq")
+```
+
+ ## refitting model(s) with ML (instead of REML)
+
+ ## Data: AllData
+ ## Models:
+ ## baseline: Value ~ 1 + (1 | HerdId)
+ ## GLM: Value ~ DaysPregnantQuantile + Year + Month + (1 | HerdId)
+ ## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
+ ## baseline 3 -Inf -Inf Inf -Inf 
+ ## GLM 6 -Inf -Inf Inf -Inf 3
+
+Least square means
+------------------
+
+``` r
+lsmeans(GLM, pairwise~DaysPregnantQuantile, type = "response", adjust="tukey")
+```
+
+ ## Warning in vcov.merMod(object, correlation = FALSE): Computed variance-covariance matrix problem: not a positive definite matrix;
+ ## returning NA matrix
+
+ ## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
+ ## To enable adjustments, set emm_options(pbkrtest.limit = 10881) or larger,
+ ## but be warned that this may result in large computation time and memory use.
+
+ ## Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
+ ## To enable adjustments, set emm_options(lmerTest.limit = 10881) or larger,
+ ## but be warned that this may result in large computation time and memory use.
+
+ ## $lsmeans
+ ## DaysPregnantQuantile lsmean SE df asymp.LCL asymp.UCL
+ ## 0-25th Pct 0 NA Inf NA NA
+ ## 25-75 Pct 0 NA Inf NA NA
+ ## 
+ ## Degrees-of-freedom method: asymptotic 
+ ## Confidence level used: 0.95 
+ ## 
+ ## $contrasts
+ ## contrast estimate SE df z.ratio p.value
+ ## 0-25th Pct - 25-75 Pct 0 NA Inf NA NA
diff --git a/Models/Decay_cache/markdown_github/__packages b/Models/Decay_cache/markdown_github/__packages
@@ -0,0 +1,13 @@
+base
+methods
+datasets
+utils
+grDevices
+graphics
+stats
+dplyr
+Matrix
+lme4
+emmeans
+lsmeans
+lubridate
diff --git a/Models/Decay_cache/markdown_github/unnamed-chunk-2_fadad1f0d4fc1c84f9a51f2c8a952595.RData b/Models/Decay_cache/markdown_github/unnamed-chunk-2_fadad1f0d4fc1c84f9a51f2c8a952595.RData
diff --git a/Models/Decay_cache/markdown_github/unnamed-chunk-2_fadad1f0d4fc1c84f9a51f2c8a952595.rdb b/Models/Decay_cache/markdown_github/unnamed-chunk-2_fadad1f0d4fc1c84f9a51f2c8a952595.rdb
diff --git a/Models/Decay_cache/markdown_github/unnamed-chunk-2_fadad1f0d4fc1c84f9a51f2c8a952595.rdx b/Models/Decay_cache/markdown_github/unnamed-chunk-2_fadad1f0d4fc1c84f9a51f2c8a952595.rdx
diff --git a/Models/Decay_files/figure-markdown_github/unnamed-chunk-4-1.png b/Models/Decay_files/figure-markdown_github/unnamed-chunk-4-1.png
diff --git a/Models/Decay_files/figure-markdown_github/unnamed-chunk-5-1.png b/Models/Decay_files/figure-markdown_github/unnamed-chunk-5-1.png