From b9ef4ca4df1e187c9c48c37cb09ec67adf1cdcbd Mon Sep 17 00:00:00 2001 From: "Steven L. Scott" Date: Wed, 7 Apr 2021 08:40:08 +0000 Subject: [PATCH] version 0.9.6 --- DESCRIPTION | 12 +- MD5 | 27 ++-- R/add.seasonal.R | 4 +- R/mixed.frequency.R | 4 +- R/update.bsts.R | 33 +++++ inst/tests/tests/testthat/test-ar.R | 21 ++++ inst/tests/tests/testthat/test-student.R | 153 +++++++++++++++++++++++ inst/tests/testthat/test-ar.R | 21 ++++ inst/tests/testthat/test-student.R | 153 +++++++++++++++++++++++ man/bsts.Rd | 4 +- man/dirm.Rd | 2 +- man/mbsts.Rd | 14 +-- src/state_space_student_model_manager.cc | 11 +- tests/testthat/test-ar.R | 21 ++++ tests/testthat/test-dirm.R | 8 +- tests/testthat/test-prediction-errors.R | 2 +- tests/testthat/test-regressionholiday.R | 4 +- tests/testthat/test-student.R | 153 +++++++++++++++++++++++ 18 files changed, 605 insertions(+), 42 deletions(-) create mode 100644 R/update.bsts.R create mode 100644 inst/tests/tests/testthat/test-ar.R create mode 100644 inst/tests/tests/testthat/test-student.R create mode 100644 inst/tests/testthat/test-ar.R create mode 100644 inst/tests/testthat/test-student.R create mode 100644 tests/testthat/test-ar.R create mode 100644 tests/testthat/test-student.R diff --git a/DESCRIPTION b/DESCRIPTION index 746a964..0ad00be 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,19 @@ Package: bsts -Version: 0.9.5 -Date: 2020-04-29 +Version: 0.9.6 +Date: 2021-04-06 Title: Bayesian Structural Time Series Author: Steven L. Scott Maintainer: Steven L. Scott Description: Time series regression using dynamic linear models fit using MCMC. See Scott and Varian (2014) , among many other sources. -Depends: BoomSpikeSlab (>= 1.2.3), zoo (>= 1.8), xts, Boom (>= 0.9.6), +Depends: BoomSpikeSlab (>= 1.2.4), zoo (>= 1.8), xts, Boom (>= 0.9.7), R(>= 3.4.0) Suggests: testthat -LinkingTo: Boom (>= 0.9.6) +LinkingTo: Boom (>= 0.9.7) License: LGPL-2.1 | file LICENSE Encoding: UTF-8 NeedsCompilation: yes -Packaged: 2020-05-01 14:24:35 UTC; steve +Packaged: 2021-04-07 04:18:51 UTC; steve Repository: CRAN -Date/Publication: 2020-05-02 15:00:02 UTC +Date/Publication: 2021-04-07 09:40:08 UTC diff --git a/MD5 b/MD5 index b3057eb..1c969d1 100644 --- a/MD5 +++ b/MD5 @@ -1,4 +1,4 @@ -758a97a1f5e02321348bc9386de9d546 *DESCRIPTION +9c8148b5624fac66f8165acdd3ed9668 *DESCRIPTION 7eb09e6fd83eb49ed22911c3b2f06744 *LICENSE f37f71bbbb4895f02ea12ce0d71b23cf *NAMESPACE ceb07fe9db975f5c42496dfaa33a5d14 *R/add.ar.R @@ -9,7 +9,7 @@ f9d3fe80f95e42e2cc4c2444c22bfbfd *R/add.local.linear.trend.R f0c5dc316a9e9472ea4073a526f93ac0 *R/add.monthly.annual.cycle.R c55b4c89ff05f1a9c4e9fd18b7fdcc54 *R/add.random.walk.holiday.R 21e49e3ebe10ffac95c24260320a921d *R/add.regression.holiday.R -be7dbb9f6b44ced309469e03439952ea *R/add.seasonal.R +19adabb8f2d359cbab78194c4c598a6e *R/add.seasonal.R d4f5df46c6a15e0d4e47ede074775446 *R/add.semilocal.linear.trend.R f965dd362d2321286bea83acd0b40c2d *R/add.static.intercept.R c69a20b8a59408f1c7e47c51c180b36b *R/add.student.local.linear.trend.R @@ -24,12 +24,13 @@ cd4f60ef6f034426065ac031ee9bd537 *R/format.timestamps.R 769da1aee699d16ed0209b0f050c111e *R/holiday.R 958b7e37110dd2b656fa7d8eef0c023d *R/mbsts.R 14b5d35d71c65d0c33ffa731332c9201 *R/mbsts.plots.R -c2a346a0d66201f93f85d29b01575365 *R/mixed.frequency.R +53d1cd68e9ec96ad82ec6ed3d108df5d *R/mixed.frequency.R 63bc374c03e9d515b6aca12da8729631 *R/plot_seasonal_effect.R 9855bc34d9b463799387b55ac2184112 *R/plots.R 408c9fa167d8a809727fb221448cc028 *R/predict.bsts.R 55b01ea970a54ee73360d9bcd3c1930a *R/predict.mbsts.R 09187c9baabe7cdb4a0301f38b5648f1 *R/summary.bsts.R +01a0b0b09f90f4512941278ced2b2eb0 *R/update.bsts.R 04dc0efa0ef403afd48e252eba7e7271 *R/utils.R e707d635a33700be517ad31745786dc7 *data/gdp.RData d754a8d5f216044d2bf2d01bac709648 *data/goog.RData @@ -39,6 +40,7 @@ e96e68697531b9dbeffbded63b4ee8a2 *data/iclaims.RData 38a19670e134d95d2cc0ed134abe3539 *data/shark.RData 761daf2618eaf1014ccc6500f3c82e61 *data/turkish.RData 0465bac211a2e128119a6fbd17f19c56 *inst/tests/tests/testthat.R +2c297be76520a1e86a59e50ffce13d00 *inst/tests/tests/testthat/test-ar.R 630452660972de113c484c1dd4d52269 *inst/tests/tests/testthat/test-autoar.R 172df9e68c0f7876dcaade3768c2d5f9 *inst/tests/tests/testthat/test-date-range.R 1caabb1def19e5a88b94e2d6ae0cb760 *inst/tests/tests/testthat/test-dirm.R @@ -52,8 +54,10 @@ abadc45049de045297dc88b776546d08 *inst/tests/tests/testthat/test-poisson.R e91ff5f823c6acbff0b7b7d6810a82a8 *inst/tests/tests/testthat/test-prediction.R 6d0e435f602c43c1c921f580edef8b39 *inst/tests/tests/testthat/test-regressionholiday.R ebe710afd159f82025b7c8b9d6e6d237 *inst/tests/tests/testthat/test-seasonal.R +6c0c26d250eb86a7f004138b65297c90 *inst/tests/tests/testthat/test-student.R 9f6f10bf2045923b08f20734cef76b22 *inst/tests/tests/testthat/test-trig.R 0465bac211a2e128119a6fbd17f19c56 *inst/tests/testthat.R +2c297be76520a1e86a59e50ffce13d00 *inst/tests/testthat/test-ar.R 630452660972de113c484c1dd4d52269 *inst/tests/testthat/test-autoar.R 172df9e68c0f7876dcaade3768c2d5f9 *inst/tests/testthat/test-date-range.R 1caabb1def19e5a88b94e2d6ae0cb760 *inst/tests/testthat/test-dirm.R @@ -67,6 +71,7 @@ abadc45049de045297dc88b776546d08 *inst/tests/testthat/test-poisson.R e91ff5f823c6acbff0b7b7d6810a82a8 *inst/tests/testthat/test-prediction.R 6d0e435f602c43c1c921f580edef8b39 *inst/tests/testthat/test-regressionholiday.R ebe710afd159f82025b7c8b9d6e6d237 *inst/tests/testthat/test-seasonal.R +6c0c26d250eb86a7f004138b65297c90 *inst/tests/testthat/test-student.R 9f6f10bf2045923b08f20734cef76b22 *inst/tests/testthat/test-trig.R 65ad45f30d10d63352989bcab06c428f *man/HarveyCumulator.Rd 7d1eef9eb650d72d09f64620466950eb *man/MATCH.NumericTimestamps.Rd @@ -87,14 +92,14 @@ dd0f4a731a23a55e13853e991fd64d2c *man/add.trig.Rd 98e85cce5007520dfeb7e492ade2722a *man/aggregate.time.series.Rd bd91ad411945df7d74b054e3561cc2a6 *man/aggregate.weeks.to.months.Rd db4593d8299a0b801fe53df3ecd777da *man/auto.ar.Rd -7264c8a1cc321da3fac2a94a68e436e1 *man/bsts.Rd +46167431326cedf0a047ae9a8dc18862 *man/bsts.Rd 177c7d07d4182091f2d0b810a1dd0819 *man/bsts.options.Rd 2ceaadc5491d127d9021ff75b21c8e7e *man/compare.bsts.models.Rd 99b12efa80a168f273cf27497dd8ba2b *man/date.range.Rd 2400693969a5522e89c8306a7a5e9fe1 *man/descriptive-plots.Rd e1fe02375d1426b93dd3bd3489fef5fd *man/diagnostic-plots.Rd a5f78dc83c30bdd3316201478760be28 *man/dirm-model-options.Rd -feecdcbc883a69f293297584f1c88f22 *man/dirm.Rd +8e7c43796fc2a75d548d73327f611cc8 *man/dirm.Rd 8d13a17d97c3769c3ebaa2e2ca3a7cce *man/estimate.time.scale.Rd 4e1a7d996dbfc702d3790389eb8c9626 *man/extend.time.Rd ca6575264d7fa72e0166c23af660d9f4 *man/format.timestamps.Rd @@ -107,7 +112,7 @@ e7b9351ff2f8b0c117c1b2377303603d *man/geometric.sequence.Rd cf7b7d18e7252f5eccd0341eb26a6d82 *man/last.day.in.month.Rd d001026a7f503a1d7e4c3139b193bfe5 *man/match.week.to.month.Rd 5c321a7f4189941f9d151f71627da8d2 *man/max.window.width.Rd -9ddd53fb98bff70c7eb8dbeb5452e2d1 *man/mbsts.Rd +a5ff12324d7b00902ad46241136f8fba *man/mbsts.Rd 43b82cce35af62b441d08cda809329a1 *man/mixed.frequency.Rd 4c9639c7bfb85396081677060adc4541 *man/month.distance.Rd 129cf408079a8ce4c6fea7e5e1c54515 *man/named.holidays.Rd @@ -165,24 +170,26 @@ a432aebec2404eb926fe13e9bcb5fb25 *src/state_space_logit_model_manager.cc e216be891919e3263441168f35507631 *src/state_space_poisson_model_manager.h d12a59708f82f866ed02944f5431bb59 *src/state_space_regression_model_manager.cc 7ac07cf5d57c124a845c31baeb64f782 *src/state_space_regression_model_manager.h -06440dfd21a4660c76b6e0bb313dc944 *src/state_space_student_model_manager.cc +c3e2c5e82908e0b2f98b248f5db8cfae *src/state_space_student_model_manager.cc 70d7c3d65ab85979568d692b4a217f9f *src/state_space_student_model_manager.h b8f60bed801663863a327aa8bd4d367e *src/timestamp_info.cc 1c682544f82b2cf9331c827ec6c3f0cf *src/timestamp_info.h 758355f181bebfed943c020342250e5e *src/utils.cc 9b299499f0d977e24d5e96e61872bf62 *src/utils.h 0465bac211a2e128119a6fbd17f19c56 *tests/testthat.R +2c297be76520a1e86a59e50ffce13d00 *tests/testthat/test-ar.R 630452660972de113c484c1dd4d52269 *tests/testthat/test-autoar.R 172df9e68c0f7876dcaade3768c2d5f9 *tests/testthat/test-date-range.R -1caabb1def19e5a88b94e2d6ae0cb760 *tests/testthat/test-dirm.R +0126f04464776f5b1b05c32d25d25f55 *tests/testthat/test-dirm.R 3de38eb5bcb72ddf38c3b818d9518c0d *tests/testthat/test-dynamic-regression.R 236c11dceb0ea37b1966a706f7ac3a62 *tests/testthat/test-goog.R 8e8e34dbc5a1d1735ec663091bcd03a9 *tests/testthat/test-holidays.R 8ab76fcd982a9ba00fd8b18dc9c0ae1b *tests/testthat/test-multivariate.R b8cf1c7b59eede2825f50fea5feac4c1 *tests/testthat/test-plot-components.R abadc45049de045297dc88b776546d08 *tests/testthat/test-poisson.R -673847af82904f53ddaaaabf8daa45a0 *tests/testthat/test-prediction-errors.R +5c420ef79df05489ab47c1ee48ee0323 *tests/testthat/test-prediction-errors.R e91ff5f823c6acbff0b7b7d6810a82a8 *tests/testthat/test-prediction.R -6d0e435f602c43c1c921f580edef8b39 *tests/testthat/test-regressionholiday.R +74913baa6a0f8135591ba1c6d7b7e46e *tests/testthat/test-regressionholiday.R ebe710afd159f82025b7c8b9d6e6d237 *tests/testthat/test-seasonal.R +6c0c26d250eb86a7f004138b65297c90 *tests/testthat/test-student.R 9f6f10bf2045923b08f20734cef76b22 *tests/testthat/test-trig.R diff --git a/R/add.seasonal.R b/R/add.seasonal.R index 438114e..7eccf6f 100644 --- a/R/add.seasonal.R +++ b/R/add.seasonal.R @@ -85,7 +85,7 @@ plot.Seasonal <- function(x, style = c("dynamic", "boxplot"), ylim = NULL, ...) { - ## S3 method for plotting a RegressionHolidayStateModel + ## S3 method for plotting a Seasonal state model. ## ## Args: ## x: An object inheriting from RegressionHolidayStateModel. @@ -116,7 +116,7 @@ plot.Seasonal <- function(x, if (is.null(.FindStateSpecification(state.specification, bsts.object))) { stop("The state specification is not part of the bsts object.") } - + if (state.specification$nseasons == 7 && state.specification$season.duration == 1) { PlotDayOfWeekCycle(bsts.object, burn = burn, time = time, ylim = ylim, diff --git a/R/mixed.frequency.R b/R/mixed.frequency.R index 489cc25..83ead3c 100644 --- a/R/mixed.frequency.R +++ b/R/mixed.frequency.R @@ -128,8 +128,8 @@ bsts.mixed <- function(target.series, if (is.null(regression.prior)) { fine.frequency <- nrow(predictors) / length(target.series) - mean.fine.series <- mean(target.series) / fine.frequency - sd.fine.series <- sd(target.series) / sqrt(fine.frequency) + mean.fine.series <- mean(target.series, na.rm = TRUE) / fine.frequency + sd.fine.series <- sd(target.series, na.rm = TRUE) / sqrt(fine.frequency) ## By default, don't accept any draws of the residual standard ## deviation that are greater than 20% larger than the empirical ## SD. diff --git a/R/update.bsts.R b/R/update.bsts.R new file mode 100644 index 0000000..ca9a007 --- /dev/null +++ b/R/update.bsts.R @@ -0,0 +1,33 @@ +update.bsts <- function(model, newdata, trials.or.exposure=1, na.action=na.exclude, seed = NULL) { + ## Update the latent state draws for a bsts model in response to new data. + ## This is essentially "rolling" the state forward. Parameter draws are not + ## updated, but future calls to "predict" jump off from the update state + ## rather than the state saved at train time. + + stopifnot(inherits(model, "bsts")) + if (model$has.regression) { + stopifnot(inherits(newdata, "dataframe")) + } else { + stopifnot(is.numeric(newdata)) + } + + update.data <- .FormatBstsUpdateData(model, newdata, trials.or.exposure=trials.or.exposure, na.action=na.action) + + updates <- .Call("analysis_common_r_update_bsts_final_state_", + model, + update.data, + seed = seed) + + stopifnot(is.matrix(updates)) + return(updates) +} + +###====================================================================== + +.FormatBstsUpdateData <- function(object, newdata, trials.or.exposure, na.action) { + formatted.data <- .FormatBstsPredictionData(object, newdata, trials.or.exposure, na.action) + ## TODO(steve): This will work for the POC, but lots of edge cases to handle here. + formatted.data$response <- newdata + formatted.data$response.is.observed <- !is.na(newdata) + return(formatted.data) +} diff --git a/inst/tests/tests/testthat/test-ar.R b/inst/tests/tests/testthat/test-ar.R new file mode 100644 index 0000000..bb607d9 --- /dev/null +++ b/inst/tests/tests/testthat/test-ar.R @@ -0,0 +1,21 @@ +library(bsts) +library(testthat) + +test_that("AddAr produces nonzero coefficients", { + sample.size <- 100 + residual.sd <- .001 + # Actual values of the AR coefficients + true.phi <- c(-.7, .3, .15) + ar <- arima.sim(model = list(ar = true.phi), n = sample.size, sd = 3) + ## Layer some noise on top of the AR process. + y <- ar + rnorm(sample.size, 0, residual.sd) + ss <- AddAr(list(), lags = 3, sigma.prior = SdPrior(3.0, 1.0)) + # Fit the model with knowledge with residual.sd essentially fixed at the true + # value. + model <- bsts(y, + state.specification = ss, + niter = 10, + prior = SdPrior(residual.sd, 100000)) + + expect_true(any(model$AR3.coefficients != 0)) +}) diff --git a/inst/tests/tests/testthat/test-student.R b/inst/tests/tests/testthat/test-student.R new file mode 100644 index 0000000..8e39355 --- /dev/null +++ b/inst/tests/tests/testthat/test-student.R @@ -0,0 +1,153 @@ +library(bsts) +library(testthat) + +test_that("Student work without regression.", { + data(AirPassengers) + y <- log(AirPassengers) + ss <- AddLocalLinearTrend(list(), y) + ss <- AddSeasonal(ss, y, nseasons = 12) + model <- bsts(y, state.specification = ss, niter = 200, ping = -1, + family="student") + full.pred <- predict(model, horizon = 12, burn = 100) + + training <- window(y, end = c(1959, 12)) + small.model <- bsts(training, state.specification = ss, niter = 200, + ping = -1, family="student") + pred.holdout <- predict(small.model, horizon = 12) + expect_equal(length(pred.holdout$original.series), length(y) - 12) + + updated.pred <- predict(small.model, horizon = 12, olddata = y) + expect_equal(updated.pred$original.series, y) + expect_equal(length(updated.pred$original.series), length(full.pred$original.series)) +}) + + +test_that("Student predictions work with regression.", { + data(iclaims) + training <- initial.claims[1:402, ] + holdout1 <- initial.claims[403:450, ] + holdout2 <- initial.claims[451:456, ] + + ss <- AddLocalLinearTrend(list(), training$iclaimsNSA) + ss <- AddSeasonal(ss, training$iclaimsNSA, nseasons = 52) + model <- bsts(iclaimsNSA ~ ., state.specification = ss, data = + training, niter = 100, family = "student") + + ## Predict the holdout set given the training set. + ## This is really fast, because we can use saved state from the MCMC + ## algorithm. + pred.full <- predict(model, newdata = rbind(holdout1, holdout2)) + expect_equal(length(pred.full), 5) + expect_equal(names(pred.full), c("mean", "median", "interval", + "distribution", "original.series")) + expect_equal(length(pred.full$mean), 54) + expect_true(is.matrix(pred.full$distribution)) + expect_equal(ncol(pred.full$distribution), 54) + + ## Predict holdout 2, given training and holdout1. + ## This is much slower because we need to re-filter the 'olddata' before + ## simulating the predictions. + pred.update <- predict(model, newdata = holdout2, + olddata = rbind(training, holdout1)) + + expect_equal(length(pred.update), 5) + expect_equal(names(pred.update), c("mean", "median", "interval", + "distribution", "original.series")) + expect_equal(length(pred.update$mean), 6) + expect_true(is.matrix(pred.update$distribution)) + expect_equal(ncol(pred.update$distribution), 6) +}) + +test_that("Off by one error is solved", { + # Predictions for regression models experienced an off-by-one error. This + # test demonstrates that the error is solved. + + ## Simulate data. The pattern is a just a day-of-week pattern with + ## substantial day-to-day variation. + N = 7*52 + dateseq <- seq(as.Date("2000-01-02"), length.out = N, by = "days") + proportions <- c(0.005, 0.14, 0.17, 0.23, 0.265, 0.18, 0.01) + pMatrix <- matrix(0, ncol = 1, nrow = N) + for (i in 1:7) { + day.indicator <- which(weekdays(dateseq, FALSE) == weekday.names[i]) + pMatrix[day.indicator, 1] = proportions[i] + } + set.seed(102) + weekly.sim <- arima.sim(list(ma=-0.1), n = 52, sd = 0.01) + weekly.sim <- exp(diffinv(weekly.sim, differences = 1, xi = log(200)))[-1] +# plot(weekly.sim, type = "l") + + seqindx <- c(seq(1, N, by = 7), N + 1) + daily.sim.allocate <- do.call(rbind, lapply(1:(length(seqindx) - 1), function(i){ + cbind(pMatrix[seqindx[i]:(seqindx[i + 1] - 1)] * weekly.sim[i]) + })) + # add a holiday effect + independence <- which(dateseq == "2000-07-04") + daily.sim.allocate[independence] <- daily.sim.allocate[independence] * 0.1 + xreg <- rep(0, N) + xreg[independence] <- 1 + + # colnames(xreg) <- "independence" + dat <- data.frame(y = as.numeric(daily.sim.allocate), independence = xreg, + days = weekdays(dateseq), stringsAsFactors = FALSE) + + rownames(dat) <- dateseq + # dat <- zoo(dat, dateseq) + # plot(dat[,1], type = "l") + tail(dat[1:(independence+24),]) + + ## estimation part + spec <- AddLocalLevel(list(), y = as.numeric(dat[1:(independence+24),1])) + spec <- AddSeasonal(spec, y = as.numeric(dat[1:(independence+24),1]), + nseasons = 7) + + train <- 1:(independence + 24) + + # estimate with regressor and intercept + niter <- 250 + seed <- 8675309 + mod1 <- bsts(y~independence, data = dat[train, 1:2], family = "student", + state.specification = spec, niter = niter, seed = seed) + + # estimate with regressor and no intercept + ## mod2 <- bsts(y~independence-1, data = dat[train, 1:2], + ## state.specification = spec, niter = niter, seed = seed) + + # estimate with no regressor and no intercept + mod3 <- bsts(as.numeric(dat[train, 1]), family = "student", + state.specification = spec, niter = niter, seed = seed) + + ## forecast part + test <- (independence + 25):N + horizon <- length(test) + # the newdata has only zeros. + newdata <- dat[test, 2, drop = FALSE] + + p1 <- predict(mod1, newdata = newdata, seed = seed) +# p2 <- predict(mod2, newdata = newdata, seed = seed) + p3 <- predict(mod3, horizon = horizon, seed = seed) + + # Comparison of output. The predictions from models 1 and 2 (which have a + # regression component) are off by one when compared to the model sans + # regression component. + comparison <- data.frame( + day = weekdays(as.Date(rownames(dat[test, 2, drop = FALSE]))), + reg1 = colMeans(p1$distribution), +# reg2 = colMeans(p2$distribution), + ts = colMeans(p3$distribution), + actual = dat[test, 1]) + + ## > head(comparison) + ## day reg1 reg2 ts actual + ## 1 Saturday 3.034669 3.001731 2.038222 2.153567 + ## 2 Sunday 1.971062 1.922798 1.437442 1.074628 + ## 3 Monday 30.622732 30.597797 29.827300 30.089592 + ## 4 Tuesday 36.785562 36.711367 34.199968 36.537362 + ## 5 Wednesday 49.437153 49.367559 48.581829 49.432902 + ## 6 Thursday 56.878702 56.844302 55.889856 56.955300 + + # In the error state the first row showed reg1 and reg2 with very large values + # because they were using an off-by-1 seasonal effect + expect_true(all(abs(comparison$reg1 - comparison$ts) < 4)) + expect_true(cor(comparison$reg1, comparison$ts) > .98) +}) diff --git a/inst/tests/testthat/test-ar.R b/inst/tests/testthat/test-ar.R new file mode 100644 index 0000000..bb607d9 --- /dev/null +++ b/inst/tests/testthat/test-ar.R @@ -0,0 +1,21 @@ +library(bsts) +library(testthat) + +test_that("AddAr produces nonzero coefficients", { + sample.size <- 100 + residual.sd <- .001 + # Actual values of the AR coefficients + true.phi <- c(-.7, .3, .15) + ar <- arima.sim(model = list(ar = true.phi), n = sample.size, sd = 3) + ## Layer some noise on top of the AR process. + y <- ar + rnorm(sample.size, 0, residual.sd) + ss <- AddAr(list(), lags = 3, sigma.prior = SdPrior(3.0, 1.0)) + # Fit the model with knowledge with residual.sd essentially fixed at the true + # value. + model <- bsts(y, + state.specification = ss, + niter = 10, + prior = SdPrior(residual.sd, 100000)) + + expect_true(any(model$AR3.coefficients != 0)) +}) diff --git a/inst/tests/testthat/test-student.R b/inst/tests/testthat/test-student.R new file mode 100644 index 0000000..8e39355 --- /dev/null +++ b/inst/tests/testthat/test-student.R @@ -0,0 +1,153 @@ +library(bsts) +library(testthat) + +test_that("Student work without regression.", { + data(AirPassengers) + y <- log(AirPassengers) + ss <- AddLocalLinearTrend(list(), y) + ss <- AddSeasonal(ss, y, nseasons = 12) + model <- bsts(y, state.specification = ss, niter = 200, ping = -1, + family="student") + full.pred <- predict(model, horizon = 12, burn = 100) + + training <- window(y, end = c(1959, 12)) + small.model <- bsts(training, state.specification = ss, niter = 200, + ping = -1, family="student") + pred.holdout <- predict(small.model, horizon = 12) + expect_equal(length(pred.holdout$original.series), length(y) - 12) + + updated.pred <- predict(small.model, horizon = 12, olddata = y) + expect_equal(updated.pred$original.series, y) + expect_equal(length(updated.pred$original.series), length(full.pred$original.series)) +}) + + +test_that("Student predictions work with regression.", { + data(iclaims) + training <- initial.claims[1:402, ] + holdout1 <- initial.claims[403:450, ] + holdout2 <- initial.claims[451:456, ] + + ss <- AddLocalLinearTrend(list(), training$iclaimsNSA) + ss <- AddSeasonal(ss, training$iclaimsNSA, nseasons = 52) + model <- bsts(iclaimsNSA ~ ., state.specification = ss, data = + training, niter = 100, family = "student") + + ## Predict the holdout set given the training set. + ## This is really fast, because we can use saved state from the MCMC + ## algorithm. + pred.full <- predict(model, newdata = rbind(holdout1, holdout2)) + expect_equal(length(pred.full), 5) + expect_equal(names(pred.full), c("mean", "median", "interval", + "distribution", "original.series")) + expect_equal(length(pred.full$mean), 54) + expect_true(is.matrix(pred.full$distribution)) + expect_equal(ncol(pred.full$distribution), 54) + + ## Predict holdout 2, given training and holdout1. + ## This is much slower because we need to re-filter the 'olddata' before + ## simulating the predictions. + pred.update <- predict(model, newdata = holdout2, + olddata = rbind(training, holdout1)) + + expect_equal(length(pred.update), 5) + expect_equal(names(pred.update), c("mean", "median", "interval", + "distribution", "original.series")) + expect_equal(length(pred.update$mean), 6) + expect_true(is.matrix(pred.update$distribution)) + expect_equal(ncol(pred.update$distribution), 6) +}) + +test_that("Off by one error is solved", { + # Predictions for regression models experienced an off-by-one error. This + # test demonstrates that the error is solved. + + ## Simulate data. The pattern is a just a day-of-week pattern with + ## substantial day-to-day variation. + N = 7*52 + dateseq <- seq(as.Date("2000-01-02"), length.out = N, by = "days") + proportions <- c(0.005, 0.14, 0.17, 0.23, 0.265, 0.18, 0.01) + pMatrix <- matrix(0, ncol = 1, nrow = N) + for (i in 1:7) { + day.indicator <- which(weekdays(dateseq, FALSE) == weekday.names[i]) + pMatrix[day.indicator, 1] = proportions[i] + } + set.seed(102) + weekly.sim <- arima.sim(list(ma=-0.1), n = 52, sd = 0.01) + weekly.sim <- exp(diffinv(weekly.sim, differences = 1, xi = log(200)))[-1] +# plot(weekly.sim, type = "l") + + seqindx <- c(seq(1, N, by = 7), N + 1) + daily.sim.allocate <- do.call(rbind, lapply(1:(length(seqindx) - 1), function(i){ + cbind(pMatrix[seqindx[i]:(seqindx[i + 1] - 1)] * weekly.sim[i]) + })) + # add a holiday effect + independence <- which(dateseq == "2000-07-04") + daily.sim.allocate[independence] <- daily.sim.allocate[independence] * 0.1 + xreg <- rep(0, N) + xreg[independence] <- 1 + + # colnames(xreg) <- "independence" + dat <- data.frame(y = as.numeric(daily.sim.allocate), independence = xreg, + days = weekdays(dateseq), stringsAsFactors = FALSE) + + rownames(dat) <- dateseq + # dat <- zoo(dat, dateseq) + # plot(dat[,1], type = "l") + tail(dat[1:(independence+24),]) + + ## estimation part + spec <- AddLocalLevel(list(), y = as.numeric(dat[1:(independence+24),1])) + spec <- AddSeasonal(spec, y = as.numeric(dat[1:(independence+24),1]), + nseasons = 7) + + train <- 1:(independence + 24) + + # estimate with regressor and intercept + niter <- 250 + seed <- 8675309 + mod1 <- bsts(y~independence, data = dat[train, 1:2], family = "student", + state.specification = spec, niter = niter, seed = seed) + + # estimate with regressor and no intercept + ## mod2 <- bsts(y~independence-1, data = dat[train, 1:2], + ## state.specification = spec, niter = niter, seed = seed) + + # estimate with no regressor and no intercept + mod3 <- bsts(as.numeric(dat[train, 1]), family = "student", + state.specification = spec, niter = niter, seed = seed) + + ## forecast part + test <- (independence + 25):N + horizon <- length(test) + # the newdata has only zeros. + newdata <- dat[test, 2, drop = FALSE] + + p1 <- predict(mod1, newdata = newdata, seed = seed) +# p2 <- predict(mod2, newdata = newdata, seed = seed) + p3 <- predict(mod3, horizon = horizon, seed = seed) + + # Comparison of output. The predictions from models 1 and 2 (which have a + # regression component) are off by one when compared to the model sans + # regression component. + comparison <- data.frame( + day = weekdays(as.Date(rownames(dat[test, 2, drop = FALSE]))), + reg1 = colMeans(p1$distribution), +# reg2 = colMeans(p2$distribution), + ts = colMeans(p3$distribution), + actual = dat[test, 1]) + + ## > head(comparison) + ## day reg1 reg2 ts actual + ## 1 Saturday 3.034669 3.001731 2.038222 2.153567 + ## 2 Sunday 1.971062 1.922798 1.437442 1.074628 + ## 3 Monday 30.622732 30.597797 29.827300 30.089592 + ## 4 Tuesday 36.785562 36.711367 34.199968 36.537362 + ## 5 Wednesday 49.437153 49.367559 48.581829 49.432902 + ## 6 Thursday 56.878702 56.844302 55.889856 56.955300 + + # In the error state the first row showed reg1 and reg2 with very large values + # because they were using an off-by-1 seasonal effect + expect_true(all(abs(comparison$reg1 - comparison$ts) < 4)) + expect_true(cor(comparison$reg1, comparison$ts) > .98) +}) diff --git a/man/bsts.Rd b/man/bsts.Rd index 57c9779..632e2d6 100755 --- a/man/bsts.Rd +++ b/man/bsts.Rd @@ -217,7 +217,7 @@ bsts(formula, Durbin and Koopman (2001), "Time series analysis by state space methods", Oxford University Press. - Goerge and McCulloch (1997) + George and McCulloch (1997) "Approaches for Bayesian variable selection", Statistica Sinica pp 339--374. @@ -356,7 +356,7 @@ model <- bsts(shark$Attacks, ss.level, niter = 500, ## Poisson data can have an 'exposure' as the second column of a ## two-column matrix. model <- bsts(cbind(shark$Attacks, shark$Population / 1000), - state.specification = ss.level, niter = 500, + state.specification = ss.level, niter = 500, family = "poisson", ping = 250, seed = 8675309) } diff --git a/man/dirm.Rd b/man/dirm.Rd index 04395b6..40dd75f 100644 --- a/man/dirm.Rd +++ b/man/dirm.Rd @@ -155,7 +155,7 @@ dirm(formula, Durbin and Koopman (2001), "Time series analysis by state space methods", Oxford University Press. - Goerge and McCulloch (1997) + George and McCulloch (1997) "Approaches for Bayesian variable selection", Statistica Sinica pp 339--374. diff --git a/man/mbsts.Rd b/man/mbsts.Rd index 8edac8b..f251ffb 100644 --- a/man/mbsts.Rd +++ b/man/mbsts.Rd @@ -20,7 +20,7 @@ \Rdversion{1.1} \description{ Fit a multivariate Bayesian structural time series model, also known - as a "dynamic factor model." + as a "dynamic factor model." ** NOTE ** This code is experimental. Please feel free to experiment with it and report any bugs to the maintainer. Expect it to improve @@ -41,7 +41,7 @@ mbsts(formula, ping = niter / 10, data.format = c("long", "wide"), seed = NULL, - ...) + ...) } \arguments{ @@ -100,7 +100,7 @@ mbsts(formula, \item{series.id}{A factor (or object coercible to factor) indicating the series to which each observation in "long" format belongs. This argument is ignored for data in "wide" format. } - + \item{prior}{A list of \code{\link{SpikeSlabPrior}} objects, one for each time series. Or this argument can be NULL in which case a default prior will be used. Note that the prior is on both the @@ -108,7 +108,7 @@ mbsts(formula, \item{opts}{A list containing model options. This is currently only used for debugging, so leave this as \code{NULL}.} - + \item{contrasts}{An optional list containing the names of contrast functions to use when converting factors numeric variables in a regression formula. This argument works exactly as it does in @@ -140,7 +140,7 @@ mbsts(formula, in time) format. For \code{"long"} see \code{timestamps} and \code{series.id}. } - + \item{seed}{An integer to use as the random seed for the underlying C++ code. If \code{NULL} then the seed will be set using the clock.} @@ -185,10 +185,10 @@ mbsts(formula, Durbin and Koopman (2001), "Time series analysis by state space methods", Oxford University Press. - Goerge and McCulloch (1997) + George and McCulloch (1997) "Approaches for Bayesian variable selection", Statistica Sinica pp 339--374. - + } \author{ diff --git a/src/state_space_student_model_manager.cc b/src/state_space_student_model_manager.cc index 75e000f..b8e5479 100644 --- a/src/state_space_student_model_manager.cc +++ b/src/state_space_student_model_manager.cc @@ -219,12 +219,13 @@ namespace BOOM { int SSSMM::UnpackForecastData(SEXP r_prediction_data) { UnpackForecastTimestamps(r_prediction_data); - SEXP r_horizon = getListElement(r_prediction_data, "horizon"); - if (Rf_isNull(r_horizon)) { - forecast_predictors_ = ToBoomMatrix(getListElement( - r_prediction_data, "predictors")); + SEXP r_predictors = getListElement(r_prediction_data, "predictors"); + if (!Rf_isNull(r_predictors)) { + forecast_predictors_ = ToBoomMatrix(r_predictors); } else { - forecast_predictors_ = Matrix(Rf_asInteger(r_horizon), 1, 1.0); + int horizon = Rf_asInteger(getListElement( + r_prediction_data, "horizon")); + forecast_predictors_ = Matrix(horizon, 1, 1.0); } return forecast_predictors_.nrow(); } diff --git a/tests/testthat/test-ar.R b/tests/testthat/test-ar.R new file mode 100644 index 0000000..bb607d9 --- /dev/null +++ b/tests/testthat/test-ar.R @@ -0,0 +1,21 @@ +library(bsts) +library(testthat) + +test_that("AddAr produces nonzero coefficients", { + sample.size <- 100 + residual.sd <- .001 + # Actual values of the AR coefficients + true.phi <- c(-.7, .3, .15) + ar <- arima.sim(model = list(ar = true.phi), n = sample.size, sd = 3) + ## Layer some noise on top of the AR process. + y <- ar + rnorm(sample.size, 0, residual.sd) + ss <- AddAr(list(), lags = 3, sigma.prior = SdPrior(3.0, 1.0)) + # Fit the model with knowledge with residual.sd essentially fixed at the true + # value. + model <- bsts(y, + state.specification = ss, + niter = 10, + prior = SdPrior(residual.sd, 100000)) + + expect_true(any(model$AR3.coefficients != 0)) +}) diff --git a/tests/testthat/test-dirm.R b/tests/testthat/test-dirm.R index b0a3d55..c9bab22 100644 --- a/tests/testthat/test-dirm.R +++ b/tests/testthat/test-dirm.R @@ -1,7 +1,7 @@ set.seed(8675309) library(bsts) - +library(testthat) SimulateDirmData <- function(observation.sd = 1, trend.sd = .1, time.dimension = 100, nobs.per.period = 3, xdim = 4) { @@ -35,6 +35,6 @@ model2 <- dirm(response ~ predictors, ss, niter = 50, data = data, test_that("Models are identical", { expect_that(model, is_a("DynamicIntercept")) expect_that(model$coefficients, is_a("matrix")) - expect_true(all(model$coefficients == model2$coefficients)) - expect_true(all(model$sigma.obs == model2$sigma.obs)) - }) + expect_true(all(abs(model$coefficients - model2$coefficients) < 1e-8)) + expect_true(all(abs(model$sigma.obs - model2$sigma.obs) < 1e-8)) +}) diff --git a/tests/testthat/test-prediction-errors.R b/tests/testthat/test-prediction-errors.R index a60a855..27329f4 100644 --- a/tests/testthat/test-prediction-errors.R +++ b/tests/testthat/test-prediction-errors.R @@ -15,7 +15,7 @@ test_that("Scaled prediction errors are reasonable.", { ## The errors should be highly but not perfectly correlated. expect_gt(cor(se[[1]][30, ], errors[[1]][30, ]), .8) - expect_lt(cor(se[[1]][30, ], errors[[1]][30, ]), 1.0) + expect_lte(cor(se[[1]][30, ], errors[[1]][30, ]), 1.0) }) test_that("Prediction errors work for student family", { diff --git a/tests/testthat/test-regressionholiday.R b/tests/testthat/test-regressionholiday.R index 45bf74e..77c404f 100644 --- a/tests/testthat/test-regressionholiday.R +++ b/tests/testthat/test-regressionholiday.R @@ -60,7 +60,7 @@ holiday.list <- list(memorial.day, labor.day, presidents.day) number.of.holidays <- length(holiday.list) ## In a real example you'd want more than 100 MCMC iterations. -niter <- 200 +niter <- 500 test_that("regression holiday model works", { ss <- AddLocalLevel(list(), y) @@ -80,7 +80,7 @@ test_that("regression holiday model works", { test_that("hierarchical model runs", { ## Try again with some shrinkage. With only 3 holidays there won't be much - ## shrinkage. + ## shrinkage. ss2 <- AddLocalLevel(list(), y) ss2 <- AddHierarchicalRegressionHoliday(ss2, y, holiday.list = holiday.list) model2 <- bsts(y, state.specification = ss2, niter = niter, seed = 8675309, ping = niter) diff --git a/tests/testthat/test-student.R b/tests/testthat/test-student.R new file mode 100644 index 0000000..8e39355 --- /dev/null +++ b/tests/testthat/test-student.R @@ -0,0 +1,153 @@ +library(bsts) +library(testthat) + +test_that("Student work without regression.", { + data(AirPassengers) + y <- log(AirPassengers) + ss <- AddLocalLinearTrend(list(), y) + ss <- AddSeasonal(ss, y, nseasons = 12) + model <- bsts(y, state.specification = ss, niter = 200, ping = -1, + family="student") + full.pred <- predict(model, horizon = 12, burn = 100) + + training <- window(y, end = c(1959, 12)) + small.model <- bsts(training, state.specification = ss, niter = 200, + ping = -1, family="student") + pred.holdout <- predict(small.model, horizon = 12) + expect_equal(length(pred.holdout$original.series), length(y) - 12) + + updated.pred <- predict(small.model, horizon = 12, olddata = y) + expect_equal(updated.pred$original.series, y) + expect_equal(length(updated.pred$original.series), length(full.pred$original.series)) +}) + + +test_that("Student predictions work with regression.", { + data(iclaims) + training <- initial.claims[1:402, ] + holdout1 <- initial.claims[403:450, ] + holdout2 <- initial.claims[451:456, ] + + ss <- AddLocalLinearTrend(list(), training$iclaimsNSA) + ss <- AddSeasonal(ss, training$iclaimsNSA, nseasons = 52) + model <- bsts(iclaimsNSA ~ ., state.specification = ss, data = + training, niter = 100, family = "student") + + ## Predict the holdout set given the training set. + ## This is really fast, because we can use saved state from the MCMC + ## algorithm. + pred.full <- predict(model, newdata = rbind(holdout1, holdout2)) + expect_equal(length(pred.full), 5) + expect_equal(names(pred.full), c("mean", "median", "interval", + "distribution", "original.series")) + expect_equal(length(pred.full$mean), 54) + expect_true(is.matrix(pred.full$distribution)) + expect_equal(ncol(pred.full$distribution), 54) + + ## Predict holdout 2, given training and holdout1. + ## This is much slower because we need to re-filter the 'olddata' before + ## simulating the predictions. + pred.update <- predict(model, newdata = holdout2, + olddata = rbind(training, holdout1)) + + expect_equal(length(pred.update), 5) + expect_equal(names(pred.update), c("mean", "median", "interval", + "distribution", "original.series")) + expect_equal(length(pred.update$mean), 6) + expect_true(is.matrix(pred.update$distribution)) + expect_equal(ncol(pred.update$distribution), 6) +}) + +test_that("Off by one error is solved", { + # Predictions for regression models experienced an off-by-one error. This + # test demonstrates that the error is solved. + + ## Simulate data. The pattern is a just a day-of-week pattern with + ## substantial day-to-day variation. + N = 7*52 + dateseq <- seq(as.Date("2000-01-02"), length.out = N, by = "days") + proportions <- c(0.005, 0.14, 0.17, 0.23, 0.265, 0.18, 0.01) + pMatrix <- matrix(0, ncol = 1, nrow = N) + for (i in 1:7) { + day.indicator <- which(weekdays(dateseq, FALSE) == weekday.names[i]) + pMatrix[day.indicator, 1] = proportions[i] + } + set.seed(102) + weekly.sim <- arima.sim(list(ma=-0.1), n = 52, sd = 0.01) + weekly.sim <- exp(diffinv(weekly.sim, differences = 1, xi = log(200)))[-1] +# plot(weekly.sim, type = "l") + + seqindx <- c(seq(1, N, by = 7), N + 1) + daily.sim.allocate <- do.call(rbind, lapply(1:(length(seqindx) - 1), function(i){ + cbind(pMatrix[seqindx[i]:(seqindx[i + 1] - 1)] * weekly.sim[i]) + })) + # add a holiday effect + independence <- which(dateseq == "2000-07-04") + daily.sim.allocate[independence] <- daily.sim.allocate[independence] * 0.1 + xreg <- rep(0, N) + xreg[independence] <- 1 + + # colnames(xreg) <- "independence" + dat <- data.frame(y = as.numeric(daily.sim.allocate), independence = xreg, + days = weekdays(dateseq), stringsAsFactors = FALSE) + + rownames(dat) <- dateseq + # dat <- zoo(dat, dateseq) + # plot(dat[,1], type = "l") + tail(dat[1:(independence+24),]) + + ## estimation part + spec <- AddLocalLevel(list(), y = as.numeric(dat[1:(independence+24),1])) + spec <- AddSeasonal(spec, y = as.numeric(dat[1:(independence+24),1]), + nseasons = 7) + + train <- 1:(independence + 24) + + # estimate with regressor and intercept + niter <- 250 + seed <- 8675309 + mod1 <- bsts(y~independence, data = dat[train, 1:2], family = "student", + state.specification = spec, niter = niter, seed = seed) + + # estimate with regressor and no intercept + ## mod2 <- bsts(y~independence-1, data = dat[train, 1:2], + ## state.specification = spec, niter = niter, seed = seed) + + # estimate with no regressor and no intercept + mod3 <- bsts(as.numeric(dat[train, 1]), family = "student", + state.specification = spec, niter = niter, seed = seed) + + ## forecast part + test <- (independence + 25):N + horizon <- length(test) + # the newdata has only zeros. + newdata <- dat[test, 2, drop = FALSE] + + p1 <- predict(mod1, newdata = newdata, seed = seed) +# p2 <- predict(mod2, newdata = newdata, seed = seed) + p3 <- predict(mod3, horizon = horizon, seed = seed) + + # Comparison of output. The predictions from models 1 and 2 (which have a + # regression component) are off by one when compared to the model sans + # regression component. + comparison <- data.frame( + day = weekdays(as.Date(rownames(dat[test, 2, drop = FALSE]))), + reg1 = colMeans(p1$distribution), +# reg2 = colMeans(p2$distribution), + ts = colMeans(p3$distribution), + actual = dat[test, 1]) + + ## > head(comparison) + ## day reg1 reg2 ts actual + ## 1 Saturday 3.034669 3.001731 2.038222 2.153567 + ## 2 Sunday 1.971062 1.922798 1.437442 1.074628 + ## 3 Monday 30.622732 30.597797 29.827300 30.089592 + ## 4 Tuesday 36.785562 36.711367 34.199968 36.537362 + ## 5 Wednesday 49.437153 49.367559 48.581829 49.432902 + ## 6 Thursday 56.878702 56.844302 55.889856 56.955300 + + # In the error state the first row showed reg1 and reg2 with very large values + # because they were using an off-by-1 seasonal effect + expect_true(all(abs(comparison$reg1 - comparison$ts) < 4)) + expect_true(cor(comparison$reg1, comparison$ts) > .98) +})