Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Function collection p to w #265

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
updated help files for functions whose name starts with P to W
  • Loading branch information
Tahmina Mojumder committed Sep 1, 2023
commit 045986e07ce120051bf53164a263db3b2902efd4
154 changes: 77 additions & 77 deletions BayesianTools/R/SBC.R
Original file line number Diff line number Diff line change
@@ -1,77 +1,77 @@
#' Simulation-based calibration tests
#'
#' This function performs simulation-based calibration tests based on the idea that posteriors averaged over the prior should yield the prior.
#'
#' @param posteriorList a list with posterior samples. List items must be of a class that is supported by \code{\link{getSample}}. This includes BayesianTools objects, but also matrix and data.frame
#' @param priorDraws a matrix with parameter values, drawn from the prior, that were used to simulate the data underlying the posteriorList. If colnames are provided, these will be used in the plots
#' @param ... arguments to be passed to \code{\link{getSample}}. Consider in particular the thinning option.
#'
#' @details The purpose of this function is to evaluate the results of a simulation-based calibration of an MCMC analysis.
#'
#' Briefly, the idea is to repeatedly
#'
#' 1. sample parameters from the prior,
#' 2. simulate new data based on these parameters,
#' 3. calculate the posterior for these data
#'
#' If the sampler and the likelihood are implemented correctly, the average over all the posterior distribution should then again yield the prior (e.g. Cook et al., 2006).
#'
#' To test if this is the case, we implement the methods suggested by Talts et al., which is to calculate the rank statistics between the parameter draws and the posterior draws, which we then formally evaluate with a qq unif plot, and a ks.test
#'
#' I speculate that a ks.test between the two distribution would likely give an identical result, but this is not noted in Talts et al.
#'
#' Cook, S. R., Gelman, A. and Rubin, D. B. (2006). Validation of Software for Bayesian Models Using Posterior Quantiles. J. Comput. Graph. Stat. 15 675-692.
#'
#' Talts, Sean, Michael Betancourt, Daniel Simpson, Aki Vehtari, and Andrew Gelman. "Validating Bayesian Inference Algorithms with Simulation-Based Calibration." arXiv preprint arXiv:1804.06788 (2018).
#'
#' @note This function was implemented for the tests in Maliet, Odile, Florian Hartig, and Hélène Morlon. "A model with many small shifts for estimating species-specific diversification rates." Nature ecology & evolution 3.7 (2019): 1086-1092. The code linked with this paper provides a further example of its use.
#'
#' @author Florian Hartig
#'
#' @export
#'
calibrationTest <- function(posteriorList, priorDraws, ...){

x = mergeChains(posteriorList, ...)

nPar <- ncol(x)

oldPar <- par(mfrow = getPanels(nPar*3))

res = numeric(nPar)
names(res) = colnames(priorDraws)

for(i in 1:nPar){

lim = range(x[,i], priorDraws[,i])

hist(x[,i], breaks = 50, freq = F, col = "#99000020", main = colnames(priorDraws)[i])
hist(priorDraws[,i], breaks = 50, freq = F, col = "#00990020", add = T)

cDens = ecdf(x[,i])
rankDist <- cDens(priorDraws[,i])
hist(rankDist, breaks = 50, freq = F)
abline(h = 1, col = "red")

gap::qqunif(rankDist,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = colnames(priorDraws)[i], cex.main = 1)

res[i] = ks.test(x[,i], priorDraws[,i])$p.value
legend("topleft", c(paste("KS test: p=", round(res[i], digits = 5)), paste("Deviation ", ifelse(res[i] < 0.05, "significant", "n.s."))), text.col = ifelse(res[i] < 0.05, "red", "black" ), bty="n")
}

par(oldPar)

out = list()
out$statistic = NULL
out$method = "ks.test on rank statistics posterior / parameters"
out$alternative = "two.sided"
out$p.value = res
class(out) = "htest"



return(out)

}


#' Simulation-based calibration tests
#'
#' This function performs simulation-based calibration tests based on the idea that posteriors averaged over the prior should yield the prior.
#'
#' @param posteriorList a list of posterior samples. List items must be of a class that is supported by \code{\link{getSample}}. This includes BayesianTools objects, but also matrix and data.frame objects.
#' @param priorDraws a matrix of parameter values, drawn from the prior, that were used to simulate the data underlying the posteriorList. If colnames are provided, they are used in the plots
#' @param ... arguments to be passed to \code{\link{getSample}}. Consider in particular the thinning option.
#'
#' @details The purpose of this function is to evaluate the results of a simulation-based calibration of an MCMC analysis.
#'
#' Briefly, the idea is to repeatedly
#'
#' 1. sample parameters from the prior,
#' 2. simulate new data based on these parameters,
#' 3. calculate the posterior for these data
#'
#' If the sampler and the likelihood are implemented correctly, the average over all the posterior distribution should then again yield the prior (e.g. Cook et al., 2006).
#'
#' To test if this is the case, we implement the methods suggested by Talts et al., which is to calculate the rank statistics between the parameter draws and the posterior draws, which we then formally evaluate with a qq unif plot, and a ks.test
#'
#' I speculate that a ks.test between the two distribution would likely give an identical result, but this is not noted in Talts et al.
#'
#' Cook, S. R., Gelman, A. and Rubin, D. B. (2006). Validation of Software for Bayesian Models Using Posterior Quantiles. J. Comput. Graph. Stat. 15 675-692.
#'
#' Talts, Sean, Michael Betancourt, Daniel Simpson, Aki Vehtari, and Andrew Gelman. "Validating Bayesian Inference Algorithms with Simulation-Based Calibration." arXiv preprint arXiv:1804.06788 (2018).
#'
#' @note This function was implemented for the tests in Maliet, Odile, Florian Hartig, and Hélène Morlon. "A model with many small shifts for estimating species-specific diversification rates." Nature ecology & evolution 3.7 (2019): 1086-1092. The code linked with this paper provides a further example of its use.
#'
#' @author Florian Hartig
#'
#' @export
#'
calibrationTest <- function(posteriorList, priorDraws, ...){
x = mergeChains(posteriorList, ...)
nPar <- ncol(x)
oldPar <- par(mfrow = getPanels(nPar*3))
res = numeric(nPar)
names(res) = colnames(priorDraws)
for(i in 1:nPar){
lim = range(x[,i], priorDraws[,i])
hist(x[,i], breaks = 50, freq = F, col = "#99000020", main = colnames(priorDraws)[i])
hist(priorDraws[,i], breaks = 50, freq = F, col = "#00990020", add = T)
cDens = ecdf(x[,i])
rankDist <- cDens(priorDraws[,i])
hist(rankDist, breaks = 50, freq = F)
abline(h = 1, col = "red")
gap::qqunif(rankDist,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = colnames(priorDraws)[i], cex.main = 1)
res[i] = ks.test(x[,i], priorDraws[,i])$p.value
legend("topleft", c(paste("KS test: p=", round(res[i], digits = 5)), paste("Deviation ", ifelse(res[i] < 0.05, "significant", "n.s."))), text.col = ifelse(res[i] < 0.05, "red", "black" ), bty="n")
}
par(oldPar)
out = list()
out$statistic = NULL
out$method = "ks.test on rank statistics posterior / parameters"
out$alternative = "two.sided"
out$p.value = res
class(out) = "htest"
return(out)
}
224 changes: 112 additions & 112 deletions BayesianTools/R/SMC.R
Original file line number Diff line number Diff line change
@@ -1,112 +1,112 @@
#' SMC sampler
#' @author Florian Hartig
#' @description Sequential Monte Carlo Sampler
#' @param bayesianSetup either an object of class bayesianSetup created by \code{\link{createBayesianSetup}} (recommended), or a log target function
#' @param initialParticles initial particles - either a draw from the prior, provided as a matrix with the single parameters as columns and each row being one particle (parameter vector), or a numeric value with the number of desired particles. In this case, the sampling option must be provided in the prior of the BayesianSetup.
#' @param iterations number of iterations
#' @param resampling if new particles should be created at each iteration
#' @param resamplingSteps how many resampling (MCMC) steps between the iterations
#' @param proposal optional proposal class
#' @param adaptive should the covariance of the proposal be adapted during sampling
#' @param proposalScale scaling factor for the proposal generation. Can be adapted if there is too much / too little rejection
#' @details The sampler can be used for rejection sampling as well as for sequential Monte Carlo. For the former case set the iterations to one.
#'
#' @note The SMC currently assumes that the initial particle is sampled from the prior. If a better initial estimate of the posterior distribution is available, this the sampler should be modified to include this. Currently, however, this is not included in the code, so the appropriate adjustments have to be done by hand.
#' @export
#' @example /inst/examples/SMCHelp.R
smcSampler <- function(bayesianSetup, initialParticles = 1000, iterations = 10, resampling = T, resamplingSteps = 2, proposal = NULL, adaptive = T, proposalScale = 0.5){

if(resamplingSteps < 1) stop("SMC error, resamplingSteps can't be < 1")

setup <- checkBayesianSetup(bayesianSetup)

info = list()
info$resamplingAcceptance = matrix(nrow = iterations, ncol = resamplingSteps)
info$survivingParticles = rep(NA, iterations)


if(inherits(initialParticles, "numeric")){
initialParticles = bayesianSetup$prior$sampler(initialParticles)
}

if (any(is.infinite(setup$prior$density(initialParticles)))) stop("initialParticles outside prior range")

particles <- initialParticles

rejectionRate = 0

particleSize = nrow(initialParticles)

acceptanceTarget = round(particleSize / 2)

posterior = matrix(nrow = particleSize, ncol = 3)

numPar <- ncol(initialParticles)

if (is.null(proposal)) proposalGenerator = createProposalGenerator(rep(40,numPar))

usedUp = 0

for (i in 1:iterations){

posterior = setup$posterior$density(particles, returnAll = T)

likelihoodValues <- posterior[,2]

# idea - adjust (1/iterations) such that always approx 30% of particles are maintain
#level = sort(likelihoodValues)[acceptanceTarget]
#best = likelihoodValues

ll = likelihoodValues - max(likelihoodValues, na.rm = T)

llCutoff = sort(ll)[acceptanceTarget]



relativeL = exp(likelihoodValues - max(likelihoodValues, na.rm = T))^(1/iterations)

sel = sample.int(n=length(likelihoodValues), size = length(likelihoodValues), replace = T, prob = relativeL)

info$survivingParticles[i] = length(unique(sel))

particles = particles[sel,]

if (numPar == 1) particles = matrix(particles, ncol = 1)

if (resampling == T){

if (adaptive == T){
proposalGenerator = updateProposalGenerator(proposalGenerator, particles)
}

for (j in 1:resamplingSteps){
particlesProposals = proposalGenerator$returnProposalMatrix(particles, scale = proposalScale)

jumpProb <- exp(setup$posterior$density(particlesProposals) - likelihoodValues[sel])^(i/iterations) * exp(setup$prior$density(particlesProposals) - setup$prior$density(particles))

accepted <- jumpProb > runif(length(jumpProb), 0 ,1)

rejectionRate = rejectionRate + sum(accepted)

particles[accepted, ] = particlesProposals[accepted, ]


}
}
}

info$rejectionRate = rejectionRate / (iterations * resamplingSteps)

out = list(
setup = setup,
initialParticles = initialParticles,
particles = particles,
posteriorValues = posterior,
proposalGenerator = proposalGenerator,
info = info
)

class(out) <- c("smcSampler", "bayesianOutput")
return(out)

}
#' SMC sampler
#' @author Florian Hartig
#' @description Sequential Monte Carlo Sampler
#' @param bayesianSetup either an object of class bayesianSetup created by \code{\link{createBayesianSetup}} (recommended), or a log target function
#' @param initialParticles initial particles - either a draw from the prior, provided as a matrix with the single parameters as columns and each row being one particle (parameter vector), or a numeric value with the number of desired particles. In this case, the sampling option must be provided in the prior of the BayesianSetup.
#' @param iterations number of iterations
#' @param resampling logical, specifies whether new particles should be created at each iteration
#' @param resamplingSteps how many resampling (MCMC) steps between the iterations
#' @param proposal optional, proposal class
#' @param adaptive logical, should the covariance of the proposal be adapted during sampling?
#' @param proposalScale scaling factor for the proposal generation. Can be adapted if there is too much / too little rejection
#' @details The sampler can be used for rejection sampling as well as for sequential Monte Carlo. For the former case set the iterations to one.
#'
#' @note The SMC currently assumes that the initial particle is sampled from the prior. If a better initial estimate of the posterior distribution is available, this the sampler should be modified to include this. Currently, however, this is not included in the code, so the appropriate adjustments have to be done by hand.
#' @export
#' @example /inst/examples/SMCHelp.R
smcSampler <- function(bayesianSetup, initialParticles = 1000, iterations = 10, resampling = T, resamplingSteps = 2, proposal = NULL, adaptive = T, proposalScale = 0.5){
if(resamplingSteps < 1) stop("SMC error, resamplingSteps can't be < 1")
setup <- checkBayesianSetup(bayesianSetup)
info = list()
info$resamplingAcceptance = matrix(nrow = iterations, ncol = resamplingSteps)
info$survivingParticles = rep(NA, iterations)
if(inherits(initialParticles, "numeric")){
initialParticles = bayesianSetup$prior$sampler(initialParticles)
}
if (any(is.infinite(setup$prior$density(initialParticles)))) stop("initialParticles outside prior range")
particles <- initialParticles
rejectionRate = 0
particleSize = nrow(initialParticles)
acceptanceTarget = round(particleSize / 2)
posterior = matrix(nrow = particleSize, ncol = 3)
numPar <- ncol(initialParticles)
if (is.null(proposal)) proposalGenerator = createProposalGenerator(rep(40,numPar))
usedUp = 0
for (i in 1:iterations){
posterior = setup$posterior$density(particles, returnAll = T)
likelihoodValues <- posterior[,2]
# idea - adjust (1/iterations) such that always approx 30% of particles are maintain
#level = sort(likelihoodValues)[acceptanceTarget]
#best = likelihoodValues
ll = likelihoodValues - max(likelihoodValues, na.rm = T)
llCutoff = sort(ll)[acceptanceTarget]
relativeL = exp(likelihoodValues - max(likelihoodValues, na.rm = T))^(1/iterations)
sel = sample.int(n=length(likelihoodValues), size = length(likelihoodValues), replace = T, prob = relativeL)
info$survivingParticles[i] = length(unique(sel))
particles = particles[sel,]
if (numPar == 1) particles = matrix(particles, ncol = 1)
if (resampling == T){
if (adaptive == T){
proposalGenerator = updateProposalGenerator(proposalGenerator, particles)
}
for (j in 1:resamplingSteps){
particlesProposals = proposalGenerator$returnProposalMatrix(particles, scale = proposalScale)
jumpProb <- exp(setup$posterior$density(particlesProposals) - likelihoodValues[sel])^(i/iterations) * exp(setup$prior$density(particlesProposals) - setup$prior$density(particles))
accepted <- jumpProb > runif(length(jumpProb), 0 ,1)
rejectionRate = rejectionRate + sum(accepted)
particles[accepted, ] = particlesProposals[accepted, ]
}
}
}
info$rejectionRate = rejectionRate / (iterations * resamplingSteps)
out = list(
setup = setup,
initialParticles = initialParticles,
particles = particles,
posteriorValues = posterior,
proposalGenerator = proposalGenerator,
info = info
)
class(out) <- c("smcSampler", "bayesianOutput")
return(out)
}
Loading