-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b159c94
commit b9ef4ca
Showing
18 changed files
with
605 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,19 @@ | ||
Package: bsts | ||
Version: 0.9.5 | ||
Date: 2020-04-29 | ||
Version: 0.9.6 | ||
Date: 2021-04-06 | ||
Title: Bayesian Structural Time Series | ||
Author: Steven L. Scott <[email protected]> | ||
Maintainer: Steven L. Scott <[email protected]> | ||
Description: Time series regression using dynamic linear models fit using | ||
MCMC. See Scott and Varian (2014) <DOI:10.1504/IJMMNO.2014.059942>, among many | ||
other sources. | ||
Depends: BoomSpikeSlab (>= 1.2.3), zoo (>= 1.8), xts, Boom (>= 0.9.6), | ||
Depends: BoomSpikeSlab (>= 1.2.4), zoo (>= 1.8), xts, Boom (>= 0.9.7), | ||
R(>= 3.4.0) | ||
Suggests: testthat | ||
LinkingTo: Boom (>= 0.9.6) | ||
LinkingTo: Boom (>= 0.9.7) | ||
License: LGPL-2.1 | file LICENSE | ||
Encoding: UTF-8 | ||
NeedsCompilation: yes | ||
Packaged: 2020-05-01 14:24:35 UTC; steve | ||
Packaged: 2021-04-07 04:18:51 UTC; steve | ||
Repository: CRAN | ||
Date/Publication: 2020-05-02 15:00:02 UTC | ||
Date/Publication: 2021-04-07 09:40:08 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
update.bsts <- function(model, newdata, trials.or.exposure=1, na.action=na.exclude, seed = NULL) { | ||
## Update the latent state draws for a bsts model in response to new data. | ||
## This is essentially "rolling" the state forward. Parameter draws are not | ||
## updated, but future calls to "predict" jump off from the update state | ||
## rather than the state saved at train time. | ||
|
||
stopifnot(inherits(model, "bsts")) | ||
if (model$has.regression) { | ||
stopifnot(inherits(newdata, "dataframe")) | ||
} else { | ||
stopifnot(is.numeric(newdata)) | ||
} | ||
|
||
update.data <- .FormatBstsUpdateData(model, newdata, trials.or.exposure=trials.or.exposure, na.action=na.action) | ||
|
||
updates <- .Call("analysis_common_r_update_bsts_final_state_", | ||
model, | ||
update.data, | ||
seed = seed) | ||
|
||
stopifnot(is.matrix(updates)) | ||
return(updates) | ||
} | ||
|
||
###====================================================================== | ||
|
||
.FormatBstsUpdateData <- function(object, newdata, trials.or.exposure, na.action) { | ||
formatted.data <- .FormatBstsPredictionData(object, newdata, trials.or.exposure, na.action) | ||
## TODO(steve): This will work for the POC, but lots of edge cases to handle here. | ||
formatted.data$response <- newdata | ||
formatted.data$response.is.observed <- !is.na(newdata) | ||
return(formatted.data) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
library(bsts) | ||
library(testthat) | ||
|
||
test_that("AddAr produces nonzero coefficients", { | ||
sample.size <- 100 | ||
residual.sd <- .001 | ||
# Actual values of the AR coefficients | ||
true.phi <- c(-.7, .3, .15) | ||
ar <- arima.sim(model = list(ar = true.phi), n = sample.size, sd = 3) | ||
## Layer some noise on top of the AR process. | ||
y <- ar + rnorm(sample.size, 0, residual.sd) | ||
ss <- AddAr(list(), lags = 3, sigma.prior = SdPrior(3.0, 1.0)) | ||
# Fit the model with knowledge with residual.sd essentially fixed at the true | ||
# value. | ||
model <- bsts(y, | ||
state.specification = ss, | ||
niter = 10, | ||
prior = SdPrior(residual.sd, 100000)) | ||
|
||
expect_true(any(model$AR3.coefficients != 0)) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
library(bsts) | ||
library(testthat) | ||
|
||
test_that("Student work without regression.", { | ||
data(AirPassengers) | ||
y <- log(AirPassengers) | ||
ss <- AddLocalLinearTrend(list(), y) | ||
ss <- AddSeasonal(ss, y, nseasons = 12) | ||
model <- bsts(y, state.specification = ss, niter = 200, ping = -1, | ||
family="student") | ||
full.pred <- predict(model, horizon = 12, burn = 100) | ||
|
||
training <- window(y, end = c(1959, 12)) | ||
small.model <- bsts(training, state.specification = ss, niter = 200, | ||
ping = -1, family="student") | ||
pred.holdout <- predict(small.model, horizon = 12) | ||
expect_equal(length(pred.holdout$original.series), length(y) - 12) | ||
|
||
updated.pred <- predict(small.model, horizon = 12, olddata = y) | ||
expect_equal(updated.pred$original.series, y) | ||
expect_equal(length(updated.pred$original.series), length(full.pred$original.series)) | ||
}) | ||
|
||
|
||
test_that("Student predictions work with regression.", { | ||
data(iclaims) | ||
training <- initial.claims[1:402, ] | ||
holdout1 <- initial.claims[403:450, ] | ||
holdout2 <- initial.claims[451:456, ] | ||
|
||
ss <- AddLocalLinearTrend(list(), training$iclaimsNSA) | ||
ss <- AddSeasonal(ss, training$iclaimsNSA, nseasons = 52) | ||
model <- bsts(iclaimsNSA ~ ., state.specification = ss, data = | ||
training, niter = 100, family = "student") | ||
|
||
## Predict the holdout set given the training set. | ||
## This is really fast, because we can use saved state from the MCMC | ||
## algorithm. | ||
pred.full <- predict(model, newdata = rbind(holdout1, holdout2)) | ||
expect_equal(length(pred.full), 5) | ||
expect_equal(names(pred.full), c("mean", "median", "interval", | ||
"distribution", "original.series")) | ||
expect_equal(length(pred.full$mean), 54) | ||
expect_true(is.matrix(pred.full$distribution)) | ||
expect_equal(ncol(pred.full$distribution), 54) | ||
|
||
## Predict holdout 2, given training and holdout1. | ||
## This is much slower because we need to re-filter the 'olddata' before | ||
## simulating the predictions. | ||
pred.update <- predict(model, newdata = holdout2, | ||
olddata = rbind(training, holdout1)) | ||
|
||
expect_equal(length(pred.update), 5) | ||
expect_equal(names(pred.update), c("mean", "median", "interval", | ||
"distribution", "original.series")) | ||
expect_equal(length(pred.update$mean), 6) | ||
expect_true(is.matrix(pred.update$distribution)) | ||
expect_equal(ncol(pred.update$distribution), 6) | ||
}) | ||
|
||
test_that("Off by one error is solved", { | ||
# Predictions for regression models experienced an off-by-one error. This | ||
# test demonstrates that the error is solved. | ||
|
||
## Simulate data. The pattern is a just a day-of-week pattern with | ||
## substantial day-to-day variation. | ||
N = 7*52 | ||
dateseq <- seq(as.Date("2000-01-02"), length.out = N, by = "days") | ||
proportions <- c(0.005, 0.14, 0.17, 0.23, 0.265, 0.18, 0.01) | ||
pMatrix <- matrix(0, ncol = 1, nrow = N) | ||
for (i in 1:7) { | ||
day.indicator <- which(weekdays(dateseq, FALSE) == weekday.names[i]) | ||
pMatrix[day.indicator, 1] = proportions[i] | ||
} | ||
set.seed(102) | ||
weekly.sim <- arima.sim(list(ma=-0.1), n = 52, sd = 0.01) | ||
weekly.sim <- exp(diffinv(weekly.sim, differences = 1, xi = log(200)))[-1] | ||
# plot(weekly.sim, type = "l") | ||
|
||
seqindx <- c(seq(1, N, by = 7), N + 1) | ||
daily.sim.allocate <- do.call(rbind, lapply(1:(length(seqindx) - 1), function(i){ | ||
cbind(pMatrix[seqindx[i]:(seqindx[i + 1] - 1)] * weekly.sim[i]) | ||
})) | ||
# add a holiday effect | ||
independence <- which(dateseq == "2000-07-04") | ||
daily.sim.allocate[independence] <- daily.sim.allocate[independence] * 0.1 | ||
xreg <- rep(0, N) | ||
xreg[independence] <- 1 | ||
|
||
# colnames(xreg) <- "independence" | ||
dat <- data.frame(y = as.numeric(daily.sim.allocate), independence = xreg, | ||
days = weekdays(dateseq), stringsAsFactors = FALSE) | ||
|
||
rownames(dat) <- dateseq | ||
# dat <- zoo(dat, dateseq) | ||
# plot(dat[,1], type = "l") | ||
tail(dat[1:(independence+24),]) | ||
|
||
## estimation part | ||
spec <- AddLocalLevel(list(), y = as.numeric(dat[1:(independence+24),1])) | ||
spec <- AddSeasonal(spec, y = as.numeric(dat[1:(independence+24),1]), | ||
nseasons = 7) | ||
|
||
train <- 1:(independence + 24) | ||
|
||
# estimate with regressor and intercept | ||
niter <- 250 | ||
seed <- 8675309 | ||
mod1 <- bsts(y~independence, data = dat[train, 1:2], family = "student", | ||
state.specification = spec, niter = niter, seed = seed) | ||
|
||
# estimate with regressor and no intercept | ||
## mod2 <- bsts(y~independence-1, data = dat[train, 1:2], | ||
## state.specification = spec, niter = niter, seed = seed) | ||
|
||
# estimate with no regressor and no intercept | ||
mod3 <- bsts(as.numeric(dat[train, 1]), family = "student", | ||
state.specification = spec, niter = niter, seed = seed) | ||
|
||
## forecast part | ||
test <- (independence + 25):N | ||
horizon <- length(test) | ||
# the newdata has only zeros. | ||
newdata <- dat[test, 2, drop = FALSE] | ||
|
||
p1 <- predict(mod1, newdata = newdata, seed = seed) | ||
# p2 <- predict(mod2, newdata = newdata, seed = seed) | ||
p3 <- predict(mod3, horizon = horizon, seed = seed) | ||
|
||
# Comparison of output. The predictions from models 1 and 2 (which have a | ||
# regression component) are off by one when compared to the model sans | ||
# regression component. | ||
comparison <- data.frame( | ||
day = weekdays(as.Date(rownames(dat[test, 2, drop = FALSE]))), | ||
reg1 = colMeans(p1$distribution), | ||
# reg2 = colMeans(p2$distribution), | ||
ts = colMeans(p3$distribution), | ||
actual = dat[test, 1]) | ||
|
||
## > head(comparison) | ||
## day reg1 reg2 ts actual | ||
## 1 Saturday 3.034669 3.001731 2.038222 2.153567 | ||
## 2 Sunday 1.971062 1.922798 1.437442 1.074628 | ||
## 3 Monday 30.622732 30.597797 29.827300 30.089592 | ||
## 4 Tuesday 36.785562 36.711367 34.199968 36.537362 | ||
## 5 Wednesday 49.437153 49.367559 48.581829 49.432902 | ||
## 6 Thursday 56.878702 56.844302 55.889856 56.955300 | ||
|
||
# In the error state the first row showed reg1 and reg2 with very large values | ||
# because they were using an off-by-1 seasonal effect | ||
expect_true(all(abs(comparison$reg1 - comparison$ts) < 4)) | ||
expect_true(cor(comparison$reg1, comparison$ts) > .98) | ||
}) |
Oops, something went wrong.