version 0.9.6

cran · Apr 7, 2021 · b9ef4ca · b9ef4ca
1 parent b159c94
commit b9ef4ca
Show file tree

Hide file tree

Showing 18 changed files with 605 additions and 42 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,19 +1,19 @@
 Package: bsts
-Version: 0.9.5
-Date: 2020-04-29
+Version: 0.9.6
+Date: 2021-04-06
 Title: Bayesian Structural Time Series
 Author: Steven L. Scott <[email protected]>
 Maintainer: Steven L. Scott <[email protected]>
 Description: Time series regression using dynamic linear models fit using
  MCMC. See Scott and Varian (2014) <DOI:10.1504/IJMMNO.2014.059942>, among many
  other sources.
-Depends: BoomSpikeSlab (>= 1.2.3), zoo (>= 1.8), xts, Boom (>= 0.9.6),
+Depends: BoomSpikeSlab (>= 1.2.4), zoo (>= 1.8), xts, Boom (>= 0.9.7),
  R(>= 3.4.0)
 Suggests: testthat
-LinkingTo: Boom (>= 0.9.6)
+LinkingTo: Boom (>= 0.9.7)
 License: LGPL-2.1 | file LICENSE
 Encoding: UTF-8
 NeedsCompilation: yes
-Packaged: 2020-05-01 14:24:35 UTC; steve
+Packaged: 2021-04-07 04:18:51 UTC; steve
 Repository: CRAN
-Date/Publication: 2020-05-02 15:00:02 UTC
+Date/Publication: 2021-04-07 09:40:08 UTC
diff --git a/MD5 b/MD5
@@ -1,4 +1,4 @@
-758a97a1f5e02321348bc9386de9d546 *DESCRIPTION
+9c8148b5624fac66f8165acdd3ed9668 *DESCRIPTION
 7eb09e6fd83eb49ed22911c3b2f06744 *LICENSE
 f37f71bbbb4895f02ea12ce0d71b23cf *NAMESPACE
 ceb07fe9db975f5c42496dfaa33a5d14 *R/add.ar.R
@@ -9,7 +9,7 @@ f9d3fe80f95e42e2cc4c2444c22bfbfd *R/add.local.linear.trend.R
 f0c5dc316a9e9472ea4073a526f93ac0 *R/add.monthly.annual.cycle.R
 c55b4c89ff05f1a9c4e9fd18b7fdcc54 *R/add.random.walk.holiday.R
 21e49e3ebe10ffac95c24260320a921d *R/add.regression.holiday.R
-be7dbb9f6b44ced309469e03439952ea *R/add.seasonal.R
+19adabb8f2d359cbab78194c4c598a6e *R/add.seasonal.R
 d4f5df46c6a15e0d4e47ede074775446 *R/add.semilocal.linear.trend.R
 f965dd362d2321286bea83acd0b40c2d *R/add.static.intercept.R
 c69a20b8a59408f1c7e47c51c180b36b *R/add.student.local.linear.trend.R
@@ -24,12 +24,13 @@ cd4f60ef6f034426065ac031ee9bd537 *R/format.timestamps.R
 769da1aee699d16ed0209b0f050c111e *R/holiday.R
 958b7e37110dd2b656fa7d8eef0c023d *R/mbsts.R
 14b5d35d71c65d0c33ffa731332c9201 *R/mbsts.plots.R
-c2a346a0d66201f93f85d29b01575365 *R/mixed.frequency.R
+53d1cd68e9ec96ad82ec6ed3d108df5d *R/mixed.frequency.R
 63bc374c03e9d515b6aca12da8729631 *R/plot_seasonal_effect.R
 9855bc34d9b463799387b55ac2184112 *R/plots.R
 408c9fa167d8a809727fb221448cc028 *R/predict.bsts.R
 55b01ea970a54ee73360d9bcd3c1930a *R/predict.mbsts.R
 09187c9baabe7cdb4a0301f38b5648f1 *R/summary.bsts.R
+01a0b0b09f90f4512941278ced2b2eb0 *R/update.bsts.R
 04dc0efa0ef403afd48e252eba7e7271 *R/utils.R
 e707d635a33700be517ad31745786dc7 *data/gdp.RData
 d754a8d5f216044d2bf2d01bac709648 *data/goog.RData
@@ -39,6 +40,7 @@ e96e68697531b9dbeffbded63b4ee8a2 *data/iclaims.RData
 38a19670e134d95d2cc0ed134abe3539 *data/shark.RData
 761daf2618eaf1014ccc6500f3c82e61 *data/turkish.RData
 0465bac211a2e128119a6fbd17f19c56 *inst/tests/tests/testthat.R
+2c297be76520a1e86a59e50ffce13d00 *inst/tests/tests/testthat/test-ar.R
 630452660972de113c484c1dd4d52269 *inst/tests/tests/testthat/test-autoar.R
 172df9e68c0f7876dcaade3768c2d5f9 *inst/tests/tests/testthat/test-date-range.R
 1caabb1def19e5a88b94e2d6ae0cb760 *inst/tests/tests/testthat/test-dirm.R
@@ -52,8 +54,10 @@ abadc45049de045297dc88b776546d08 *inst/tests/tests/testthat/test-poisson.R
 e91ff5f823c6acbff0b7b7d6810a82a8 *inst/tests/tests/testthat/test-prediction.R
 6d0e435f602c43c1c921f580edef8b39 *inst/tests/tests/testthat/test-regressionholiday.R
 ebe710afd159f82025b7c8b9d6e6d237 *inst/tests/tests/testthat/test-seasonal.R
+6c0c26d250eb86a7f004138b65297c90 *inst/tests/tests/testthat/test-student.R
 9f6f10bf2045923b08f20734cef76b22 *inst/tests/tests/testthat/test-trig.R
 0465bac211a2e128119a6fbd17f19c56 *inst/tests/testthat.R
+2c297be76520a1e86a59e50ffce13d00 *inst/tests/testthat/test-ar.R
 630452660972de113c484c1dd4d52269 *inst/tests/testthat/test-autoar.R
 172df9e68c0f7876dcaade3768c2d5f9 *inst/tests/testthat/test-date-range.R
 1caabb1def19e5a88b94e2d6ae0cb760 *inst/tests/testthat/test-dirm.R
@@ -67,6 +71,7 @@ abadc45049de045297dc88b776546d08 *inst/tests/testthat/test-poisson.R
 e91ff5f823c6acbff0b7b7d6810a82a8 *inst/tests/testthat/test-prediction.R
 6d0e435f602c43c1c921f580edef8b39 *inst/tests/testthat/test-regressionholiday.R
 ebe710afd159f82025b7c8b9d6e6d237 *inst/tests/testthat/test-seasonal.R
+6c0c26d250eb86a7f004138b65297c90 *inst/tests/testthat/test-student.R
 9f6f10bf2045923b08f20734cef76b22 *inst/tests/testthat/test-trig.R
 65ad45f30d10d63352989bcab06c428f *man/HarveyCumulator.Rd
 7d1eef9eb650d72d09f64620466950eb *man/MATCH.NumericTimestamps.Rd
@@ -87,14 +92,14 @@ dd0f4a731a23a55e13853e991fd64d2c *man/add.trig.Rd
 98e85cce5007520dfeb7e492ade2722a *man/aggregate.time.series.Rd
 bd91ad411945df7d74b054e3561cc2a6 *man/aggregate.weeks.to.months.Rd
 db4593d8299a0b801fe53df3ecd777da *man/auto.ar.Rd
-7264c8a1cc321da3fac2a94a68e436e1 *man/bsts.Rd
+46167431326cedf0a047ae9a8dc18862 *man/bsts.Rd
 177c7d07d4182091f2d0b810a1dd0819 *man/bsts.options.Rd
 2ceaadc5491d127d9021ff75b21c8e7e *man/compare.bsts.models.Rd
 99b12efa80a168f273cf27497dd8ba2b *man/date.range.Rd
 2400693969a5522e89c8306a7a5e9fe1 *man/descriptive-plots.Rd
 e1fe02375d1426b93dd3bd3489fef5fd *man/diagnostic-plots.Rd
 a5f78dc83c30bdd3316201478760be28 *man/dirm-model-options.Rd
-feecdcbc883a69f293297584f1c88f22 *man/dirm.Rd
+8e7c43796fc2a75d548d73327f611cc8 *man/dirm.Rd
 8d13a17d97c3769c3ebaa2e2ca3a7cce *man/estimate.time.scale.Rd
 4e1a7d996dbfc702d3790389eb8c9626 *man/extend.time.Rd
 ca6575264d7fa72e0166c23af660d9f4 *man/format.timestamps.Rd
@@ -107,7 +112,7 @@ e7b9351ff2f8b0c117c1b2377303603d *man/geometric.sequence.Rd
 cf7b7d18e7252f5eccd0341eb26a6d82 *man/last.day.in.month.Rd
 d001026a7f503a1d7e4c3139b193bfe5 *man/match.week.to.month.Rd
 5c321a7f4189941f9d151f71627da8d2 *man/max.window.width.Rd
-9ddd53fb98bff70c7eb8dbeb5452e2d1 *man/mbsts.Rd
+a5ff12324d7b00902ad46241136f8fba *man/mbsts.Rd
 43b82cce35af62b441d08cda809329a1 *man/mixed.frequency.Rd
 4c9639c7bfb85396081677060adc4541 *man/month.distance.Rd
 129cf408079a8ce4c6fea7e5e1c54515 *man/named.holidays.Rd
@@ -165,24 +170,26 @@ a432aebec2404eb926fe13e9bcb5fb25 *src/state_space_logit_model_manager.cc
 e216be891919e3263441168f35507631 *src/state_space_poisson_model_manager.h
 d12a59708f82f866ed02944f5431bb59 *src/state_space_regression_model_manager.cc
 7ac07cf5d57c124a845c31baeb64f782 *src/state_space_regression_model_manager.h
-06440dfd21a4660c76b6e0bb313dc944 *src/state_space_student_model_manager.cc
+c3e2c5e82908e0b2f98b248f5db8cfae *src/state_space_student_model_manager.cc
 70d7c3d65ab85979568d692b4a217f9f *src/state_space_student_model_manager.h
 b8f60bed801663863a327aa8bd4d367e *src/timestamp_info.cc
 1c682544f82b2cf9331c827ec6c3f0cf *src/timestamp_info.h
 758355f181bebfed943c020342250e5e *src/utils.cc
 9b299499f0d977e24d5e96e61872bf62 *src/utils.h
 0465bac211a2e128119a6fbd17f19c56 *tests/testthat.R
+2c297be76520a1e86a59e50ffce13d00 *tests/testthat/test-ar.R
 630452660972de113c484c1dd4d52269 *tests/testthat/test-autoar.R
 172df9e68c0f7876dcaade3768c2d5f9 *tests/testthat/test-date-range.R
-1caabb1def19e5a88b94e2d6ae0cb760 *tests/testthat/test-dirm.R
+0126f04464776f5b1b05c32d25d25f55 *tests/testthat/test-dirm.R
 3de38eb5bcb72ddf38c3b818d9518c0d *tests/testthat/test-dynamic-regression.R
 236c11dceb0ea37b1966a706f7ac3a62 *tests/testthat/test-goog.R
 8e8e34dbc5a1d1735ec663091bcd03a9 *tests/testthat/test-holidays.R
 8ab76fcd982a9ba00fd8b18dc9c0ae1b *tests/testthat/test-multivariate.R
 b8cf1c7b59eede2825f50fea5feac4c1 *tests/testthat/test-plot-components.R
 abadc45049de045297dc88b776546d08 *tests/testthat/test-poisson.R
-673847af82904f53ddaaaabf8daa45a0 *tests/testthat/test-prediction-errors.R
+5c420ef79df05489ab47c1ee48ee0323 *tests/testthat/test-prediction-errors.R
 e91ff5f823c6acbff0b7b7d6810a82a8 *tests/testthat/test-prediction.R
-6d0e435f602c43c1c921f580edef8b39 *tests/testthat/test-regressionholiday.R
+74913baa6a0f8135591ba1c6d7b7e46e *tests/testthat/test-regressionholiday.R
 ebe710afd159f82025b7c8b9d6e6d237 *tests/testthat/test-seasonal.R
+6c0c26d250eb86a7f004138b65297c90 *tests/testthat/test-student.R
 9f6f10bf2045923b08f20734cef76b22 *tests/testthat/test-trig.R
diff --git a/R/add.seasonal.R b/R/add.seasonal.R
@@ -85,7 +85,7 @@ plot.Seasonal <- function(x,
  style = c("dynamic", "boxplot"),
  ylim = NULL,
  ...) {
- ## S3 method for plotting a RegressionHolidayStateModel
+ ## S3 method for plotting a Seasonal state model.
  ##
  ## Args:
  ## x: An object inheriting from RegressionHolidayStateModel.
@@ -116,7 +116,7 @@ plot.Seasonal <- function(x,
  if (is.null(.FindStateSpecification(state.specification, bsts.object))) {
  stop("The state specification is not part of the bsts object.")
  }
- 
+
  if (state.specification$nseasons == 7 &&
  state.specification$season.duration == 1) {
  PlotDayOfWeekCycle(bsts.object, burn = burn, time = time, ylim = ylim,

diff --git a/R/mixed.frequency.R b/R/mixed.frequency.R
@@ -128,8 +128,8 @@ bsts.mixed <- function(target.series,
 
  if (is.null(regression.prior)) {
  fine.frequency <- nrow(predictors) / length(target.series)
- mean.fine.series <- mean(target.series) / fine.frequency
- sd.fine.series <- sd(target.series) / sqrt(fine.frequency)
+ mean.fine.series <- mean(target.series, na.rm = TRUE) / fine.frequency
+ sd.fine.series <- sd(target.series, na.rm = TRUE) / sqrt(fine.frequency)
  ## By default, don't accept any draws of the residual standard
  ## deviation that are greater than 20% larger than the empirical
  ## SD.

diff --git a/R/update.bsts.R b/R/update.bsts.R
@@ -0,0 +1,33 @@
+update.bsts <- function(model, newdata, trials.or.exposure=1, na.action=na.exclude, seed = NULL) {
+ ## Update the latent state draws for a bsts model in response to new data.
+ ## This is essentially "rolling" the state forward. Parameter draws are not
+ ## updated, but future calls to "predict" jump off from the update state
+ ## rather than the state saved at train time.
+
+ stopifnot(inherits(model, "bsts"))
+ if (model$has.regression) {
+ stopifnot(inherits(newdata, "dataframe"))
+ } else {
+ stopifnot(is.numeric(newdata))
+ }
+
+ update.data <- .FormatBstsUpdateData(model, newdata, trials.or.exposure=trials.or.exposure, na.action=na.action)
+
+ updates <- .Call("analysis_common_r_update_bsts_final_state_",
+ model,
+ update.data,
+ seed = seed)
+
+ stopifnot(is.matrix(updates))
+ return(updates)
+}
+
+###======================================================================
+
+.FormatBstsUpdateData <- function(object, newdata, trials.or.exposure, na.action) {
+ formatted.data <- .FormatBstsPredictionData(object, newdata, trials.or.exposure, na.action)
+ ## TODO(steve): This will work for the POC, but lots of edge cases to handle here.
+ formatted.data$response <- newdata
+ formatted.data$response.is.observed <- !is.na(newdata)
+ return(formatted.data)
+}
diff --git a/inst/tests/tests/testthat/test-ar.R b/inst/tests/tests/testthat/test-ar.R
@@ -0,0 +1,21 @@
+library(bsts)
+library(testthat)
+
+test_that("AddAr produces nonzero coefficients", {
+ sample.size <- 100
+ residual.sd <- .001
+ # Actual values of the AR coefficients
+ true.phi <- c(-.7, .3, .15)
+ ar <- arima.sim(model = list(ar = true.phi), n = sample.size, sd = 3)
+ ## Layer some noise on top of the AR process.
+ y <- ar + rnorm(sample.size, 0, residual.sd)
+ ss <- AddAr(list(), lags = 3, sigma.prior = SdPrior(3.0, 1.0))
+ # Fit the model with knowledge with residual.sd essentially fixed at the true
+ # value.
+ model <- bsts(y,
+ state.specification = ss,
+ niter = 10,
+ prior = SdPrior(residual.sd, 100000))
+
+ expect_true(any(model$AR3.coefficients != 0))
+})
diff --git a/inst/tests/tests/testthat/test-student.R b/inst/tests/tests/testthat/test-student.R
@@ -0,0 +1,153 @@
+library(bsts)
+library(testthat)
+
+test_that("Student work without regression.", {
+ data(AirPassengers)
+ y <- log(AirPassengers)
+ ss <- AddLocalLinearTrend(list(), y)
+ ss <- AddSeasonal(ss, y, nseasons = 12)
+ model <- bsts(y, state.specification = ss, niter = 200, ping = -1,
+ family="student")
+ full.pred <- predict(model, horizon = 12, burn = 100)
+
+ training <- window(y, end = c(1959, 12))
+ small.model <- bsts(training, state.specification = ss, niter = 200,
+ ping = -1, family="student")
+ pred.holdout <- predict(small.model, horizon = 12)
+ expect_equal(length(pred.holdout$original.series), length(y) - 12)
+
+ updated.pred <- predict(small.model, horizon = 12, olddata = y)
+ expect_equal(updated.pred$original.series, y)
+ expect_equal(length(updated.pred$original.series), length(full.pred$original.series))
+})
+
+
+test_that("Student predictions work with regression.", {
+ data(iclaims)
+ training <- initial.claims[1:402, ]
+ holdout1 <- initial.claims[403:450, ]
+ holdout2 <- initial.claims[451:456, ]
+
+ ss <- AddLocalLinearTrend(list(), training$iclaimsNSA)
+ ss <- AddSeasonal(ss, training$iclaimsNSA, nseasons = 52)
+ model <- bsts(iclaimsNSA ~ ., state.specification = ss, data =
+ training, niter = 100, family = "student")
+
+ ## Predict the holdout set given the training set.
+ ## This is really fast, because we can use saved state from the MCMC
+ ## algorithm.
+ pred.full <- predict(model, newdata = rbind(holdout1, holdout2))
+ expect_equal(length(pred.full), 5)
+ expect_equal(names(pred.full), c("mean", "median", "interval",
+ "distribution", "original.series"))
+ expect_equal(length(pred.full$mean), 54)
+ expect_true(is.matrix(pred.full$distribution))
+ expect_equal(ncol(pred.full$distribution), 54)
+
+ ## Predict holdout 2, given training and holdout1.
+ ## This is much slower because we need to re-filter the 'olddata' before
+ ## simulating the predictions.
+ pred.update <- predict(model, newdata = holdout2,
+ olddata = rbind(training, holdout1))
+
+ expect_equal(length(pred.update), 5)
+ expect_equal(names(pred.update), c("mean", "median", "interval",
+ "distribution", "original.series"))
+ expect_equal(length(pred.update$mean), 6)
+ expect_true(is.matrix(pred.update$distribution))
+ expect_equal(ncol(pred.update$distribution), 6)
+})
+
+test_that("Off by one error is solved", {
+ # Predictions for regression models experienced an off-by-one error. This
+ # test demonstrates that the error is solved.
+
+ ## Simulate data. The pattern is a just a day-of-week pattern with
+ ## substantial day-to-day variation.
+ N = 7*52
+ dateseq <- seq(as.Date("2000-01-02"), length.out = N, by = "days")
+ proportions <- c(0.005, 0.14, 0.17, 0.23, 0.265, 0.18, 0.01)
+ pMatrix <- matrix(0, ncol = 1, nrow = N)
+ for (i in 1:7) {
+ day.indicator <- which(weekdays(dateseq, FALSE) == weekday.names[i])
+ pMatrix[day.indicator, 1] = proportions[i]
+ }
+ set.seed(102)
+ weekly.sim <- arima.sim(list(ma=-0.1), n = 52, sd = 0.01)
+ weekly.sim <- exp(diffinv(weekly.sim, differences = 1, xi = log(200)))[-1]
+# plot(weekly.sim, type = "l")
+
+ seqindx <- c(seq(1, N, by = 7), N + 1)
+ daily.sim.allocate <- do.call(rbind, lapply(1:(length(seqindx) - 1), function(i){
+ cbind(pMatrix[seqindx[i]:(seqindx[i + 1] - 1)] * weekly.sim[i])
+ }))
+ # add a holiday effect
+ independence <- which(dateseq == "2000-07-04")
+ daily.sim.allocate[independence] <- daily.sim.allocate[independence] * 0.1
+ xreg <- rep(0, N)
+ xreg[independence] <- 1
+
+ # colnames(xreg) <- "independence"
+ dat <- data.frame(y = as.numeric(daily.sim.allocate), independence = xreg,
+ days = weekdays(dateseq), stringsAsFactors = FALSE)
+
+ rownames(dat) <- dateseq
+ # dat <- zoo(dat, dateseq)
+ # plot(dat[,1], type = "l")
+ tail(dat[1:(independence+24),])
+
+ ## estimation part
+ spec <- AddLocalLevel(list(), y = as.numeric(dat[1:(independence+24),1]))
+ spec <- AddSeasonal(spec, y = as.numeric(dat[1:(independence+24),1]),
+ nseasons = 7)
+
+ train <- 1:(independence + 24)
+
+ # estimate with regressor and intercept
+ niter <- 250
+ seed <- 8675309
+ mod1 <- bsts(y~independence, data = dat[train, 1:2], family = "student",
+ state.specification = spec, niter = niter, seed = seed)
+
+ # estimate with regressor and no intercept
+ ## mod2 <- bsts(y~independence-1, data = dat[train, 1:2],
+ ## state.specification = spec, niter = niter, seed = seed)
+
+ # estimate with no regressor and no intercept
+ mod3 <- bsts(as.numeric(dat[train, 1]), family = "student",
+ state.specification = spec, niter = niter, seed = seed)
+
+ ## forecast part
+ test <- (independence + 25):N
+ horizon <- length(test)
+ # the newdata has only zeros.
+ newdata <- dat[test, 2, drop = FALSE]
+
+ p1 <- predict(mod1, newdata = newdata, seed = seed)
+# p2 <- predict(mod2, newdata = newdata, seed = seed)
+ p3 <- predict(mod3, horizon = horizon, seed = seed)
+
+ # Comparison of output. The predictions from models 1 and 2 (which have a
+ # regression component) are off by one when compared to the model sans
+ # regression component.
+ comparison <- data.frame(
+ day = weekdays(as.Date(rownames(dat[test, 2, drop = FALSE]))),
+ reg1 = colMeans(p1$distribution),
+# reg2 = colMeans(p2$distribution),
+ ts = colMeans(p3$distribution),
+ actual = dat[test, 1])
+
+ ## > head(comparison)
+ ## day reg1 reg2 ts actual
+ ## 1 Saturday 3.034669 3.001731 2.038222 2.153567
+ ## 2 Sunday 1.971062 1.922798 1.437442 1.074628
+ ## 3 Monday 30.622732 30.597797 29.827300 30.089592
+ ## 4 Tuesday 36.785562 36.711367 34.199968 36.537362
+ ## 5 Wednesday 49.437153 49.367559 48.581829 49.432902
+ ## 6 Thursday 56.878702 56.844302 55.889856 56.955300
+
+ # In the error state the first row showed reg1 and reg2 with very large values
+ # because they were using an off-by-1 seasonal effect
+ expect_true(all(abs(comparison$reg1 - comparison$ts) < 4))
+ expect_true(cor(comparison$reg1, comparison$ts) > .98)
+})