diff --git a/DESCRIPTION b/DESCRIPTION index ddfc806..ad8c91f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,19 @@ Package: actuar Type: Package Title: Actuarial functions -Version: 0.9-7 -Date: 2008-03-25 -Author: Vincent Goulet, Sébastien Auclair, Christophe Dutang, Tommy Ouellet, Louis-Philippe Pouliot, Mathieu Pigeon +Version: 1.0-0 +Date: 2008-09-15 +Author: Vincent Goulet, Sébastien Auclair, Christophe Dutang, Xavier Milhaud, Tommy Ouellet, Louis-Philippe Pouliot, Mathieu Pigeon Maintainer: Vincent Goulet URL: http://www.actuar-project.org -Description: Collection of functions and data sets related to - actuarial science applications, mostly loss distributions, risk - theory (including ruin theory), simulation of compound hierarchical - models and credibility theory, for the moment. +Description: Additional actuarial science functionality, mostly in the + fields of loss distributions, risk theory (including ruin theory), + simulation of compound hierarchical models and credibility theory, + for the moment. Depends: R (>= 2.6.0) License: GPL (>= 2) Encoding: latin1 LazyLoad: yes LazyData: yes ZipData: yes -Packaged: Tue Mar 25 15:43:40 2008; vincent +Packaged: Mon Sep 15 13:52:31 2008; vincent diff --git a/NAMESPACE b/NAMESPACE index 523de8c..b9ed5d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,7 @@ useDynLib(actuar, .registration = TRUE) ### Exports export(## Credibility theory - bstraub, cm, + cm, ## Simulation of compound models simul, simpf, severity, unroll, ## Risk theory @@ -82,7 +82,6 @@ S3method(plot, ogive) S3method(plot, ruin) S3method(predict, bstraub) -S3method(predict, bstraub.old) S3method(predict, cm) S3method(predict, hache) S3method(predict, hierarc) @@ -97,6 +96,7 @@ S3method(print, summary.aggregateDist) S3method(print, summary.cm) S3method(quantile, aggregateDist) +S3method(quantile, grouped.data) S3method(severity, default) S3method(severity, portfolio) diff --git a/R/aggregateDist.R b/R/aggregateDist.R index b522bae..d713176 100644 --- a/R/aggregateDist.R +++ b/R/aggregateDist.R @@ -38,7 +38,6 @@ aggregateDist <- FUN <- npower(moments[1], moments[2], moments[3]) comment(FUN) <- "Normal Power approximation" } - else if (method == "simulation") { if (missing(nb.simul)) @@ -48,7 +47,6 @@ aggregateDist <- FUN <- simS(nb.simul, model.freq = model.freq, model.sev = model.sev) comment(FUN) <- "Approximation by simulation" } - else { ## "recursive" and "convolution" cases. Both require a diff --git a/R/bstraub.R b/R/bstraub.R index b82baf8..6b4012f 100644 --- a/R/bstraub.R +++ b/R/bstraub.R @@ -1,13 +1,16 @@ ### ===== actuar: an R package for Actuarial Science ===== ### -### Bühlmann-Straub credibility model calculations +### Bühlmann-Straub credibility model calculations. +### +### Computation of the between variance estimators has been moved to +### external functions bvar.unbiased() and bvar.iterative() to share +### with hache(). ### ### AUTHORS: Vincent Goulet , -### Sébastien Auclair, and Louis-Philippe Pouliot +### Sébastien Auclair, Louis-Philippe Pouliot bstraub <- function(ratios, weights, method = c("unbiased", "iterative"), - tol = sqrt(.Machine$double.eps), maxit = 100, - echo = FALSE, old.format = TRUE) + tol = sqrt(.Machine$double.eps), maxit = 100, echo = FALSE) { ## If weights are not specified, use equal weights as in ## Bühlmann's model. @@ -24,7 +27,7 @@ bstraub <- function(ratios, weights, method = c("unbiased", "iterative"), if (nrow(ratios) < 2) stop("there must be more than one node") if (!identical(which(is.na(ratios)), which(is.na(weights)))) - stop("missing values are not in the same positions in weights and in ratios") + stop("missing values are not in the same positions in 'weights' and in 'ratios'") if (all(!weights, na.rm = TRUE)) stop("no available data to fit model") @@ -44,102 +47,50 @@ bstraub <- function(ratios, weights, method = c("unbiased", "iterative"), ## Collective weighted average. weights.ss <- sum(weights.s) - ratios.ww <- sum(weights.s * ratios.w) / weights.ss - ## Estimation of s^2. + ## Estimation of s^2 s2 <- sum(weights * (ratios - ratios.w)^2, na.rm = TRUE) / (ntotal - ncontracts) ## First estimation of a. Always compute the unbiased estimator. - ac <- weights.ss * (sum(weights.s * (ratios.w - ratios.ww)^2) - (ncontracts - 1) * s2) / (weights.ss^2 - sum(weights.s^2)) + a <- bvar.unbiased(ratios.w, weights.s, s2, ncontracts) ## Iterative estimation of a. Compute only if ## 1. asked to in argument; - ## 2. the unbiased estimator is > 0; - ## 3. weights are not all equal (Bühlmann model). + ## 2. weights are not all equal (Bühlmann model). + ## 3. the unbiased estimator is > 0; method <- match.arg(method) - if (method == "iterative") + if (method == "iterative" && + diff(range(weights, na.rm = TRUE)) > .Machine$double.eps^0.5) { - if (ac > 0) - { - if (diff(range(weights, na.rm = TRUE)) > .Machine$double.eps^0.5) - { - if (echo) - { - cat("Iteration\tBetween variance estimator\n") - exp <- expression(cat(" ", count, "\t\t ", at1 <- at, - fill = TRUE)) - } - else - exp <- expression(at1 <- at) - - at <- ac - count <- 0 - repeat - { - eval(exp) - - if (maxit < (count <- count + 1)) - { - warning("maximum number of iterations reached before obtaining convergence") - break - } - - cred <- 1 / (1 + s2/(weights.s * at)) - ratios.zw <- sum(cred * ratios.w) / sum(cred) - at <- sum(cred * (ratios.w - ratios.zw)^2) / (ncontracts - 1) - - if (abs((at - at1)/at1) < tol) - break - } - } + a <- + if (a > 0) + bvar.iterative(ratios.w, weights.s, s2, ncontracts, start = a, + tol = tol, maxit = maxit, echo = echo) else - at <- ac - } - else - at <- 0 - a <- at - } - else - { - a <- ac - at <- NULL + 0 } ## Final credibility factors and estimator of the collective mean. if (a > 0) { cred <- 1 / (1 + s2/(weights.s * a)) - ratios.zw <- sum(cred * ratios.w) / sum(cred) + ratios.zw <- drop(crossprod(cred, ratios.w)) / sum(cred) } else { cred <- numeric(length(weights.s)) - ratios.zw <- ratios.ww + ratios.zw <- drop(crossprod(weights.s, ratios.w)) / sum(weights.s) } - if (old.format) - { - warning("this output format is deprecated") - structure(list(individual = ratios.w, - collective = ratios.zw, - weights = weights.s, - s2 = s2, - unbiased = ac, - iterative = at, - cred = cred), - class = "bstraub.old", - model = "Buhlmann-Straub") - } - else - structure(list(means = list(ratios.zw, ratios.w), - weights = list(if (a > 0) sum(cred) else weights.ss, weights.s), - unbiased = c(ac, s2), - iterative = if (!is.null(at)) c(at, s2), - cred = cred, - nodes = list(nrow(weights))), - class = "bstraub", - model = "Buhlmann-Straub") + structure(list(means = list(ratios.zw, ratios.w), + weights = list(if (a > 0) sum(cred) else weights.ss, weights.s), + unbiased = if (method == "unbiased") c(a, s2), + iterative = if (method == "iterative") c(a, s2), + cred = cred, + nodes = list(nrow(weights))), + class = "bstraub", + model = "Buhlmann-Straub") } predict.bstraub.old <- function(object, ...) @@ -147,3 +98,46 @@ predict.bstraub.old <- function(object, ...) predict.bstraub <- function(object, levels = NULL, newdata, ...) object$means[[1]] + object$cred * (object$means[[2]] - object$means[[1]]) + +bvar.unbiased <- function(x, w, within, n) +{ + w.s <- sum(w) + x.w <- drop(crossprod(w, x)) / w.s + + w.s * (drop(crossprod(w, (x - x.w)^2)) - (n - 1) * within) / (w.s^2 - sum(w^2)) +} + +bvar.iterative <- function(x, w, within, n, start, + tol = sqrt(.Machine$double.eps), maxit = 100, + echo = FALSE) +{ + if (echo) + { + cat("Iteration\tBetween variance estimator\n") + exp <- expression(cat(" ", count, "\t\t ", a1 <- a, fill = TRUE)) + } + else + exp <- expression(a1 <- a) + + a <- start + count <- 0 + + repeat + { + eval(exp) + + if (maxit < (count <- count + 1)) + { + warning("maximum number of iterations reached before obtaining convergence") + break + } + + cred <- 1 / (1 + within/(w * a)) + x.z <- drop(crossprod(cred, x)) / sum(cred) + a <- drop(crossprod(cred, (x - x.z)^2)) / (n - 1) + + if (abs((a - a1)/a1) < tol) + break + } + a +} diff --git a/R/cm.R b/R/cm.R index 286a341..e43ec39 100644 --- a/R/cm.R +++ b/R/cm.R @@ -5,7 +5,8 @@ ### AUTHORS: Louis-Philippe Pouliot, Tommy Ouellet, ### Vincent Goulet . -cm <- function(formula, data, ratios, weights, subset, xreg = NULL, +cm <- function(formula, data, ratios, weights, subset, + regformula = NULL, regdata, adj.intercept = FALSE, method = c("Buhlmann-Gisler", "Ohlsson", "iterative"), tol = sqrt(.Machine$double.eps), maxit = 100, echo = FALSE) @@ -38,7 +39,7 @@ cm <- function(formula, data, ratios, weights, subset, xreg = NULL, ## if (any(duplicated(level.numbers))) stop("unsupported interactions in 'formula'") - if (nlevels > 1 && !is.null(xreg)) + if (nlevels > 1 && !is.null(regformula)) stop("hierarchical regression models not supported") if (missing(ratios) & !missing(weights)) stop("ratios have to be supplied if weights are") @@ -107,30 +108,44 @@ cm <- function(formula, data, ratios, weights, subset, xreg = NULL, as.matrix(eval(cl, parent.frame())) # weights as matrix } - ## Dispatch to appropriate calculation function + ## == DISPATCH TO APPROPRIATE CALCULATION FUNCTION == + ## + ## Buhlmann-Straub models are handled by bstraub(), regression + ## models by hache() and hierarcahical models by hierarc(). if (nlevels < 2) # one-dimensional model { - if (is.null(xreg)) # Buhlmann-Straub - { - ## bstraub() accepts only "unbiased" and "iterative" for - ## argument 'method'. - method <- match.arg(method) - if (method == "Buhlmann-Gisler" || method == "Ohlsson") - method <- "unbiased" + ## One-dimensional models accept only "unbiased" and + ## "iterative" for argument 'method'. + method <- match.arg(method) + if (method == "Buhlmann-Gisler" || method == "Ohlsson") + method <- "unbiased" - ## *** The 'old.format = FALSE' argument is necessary in - ## *** the deprecation phase of the output format of *** - ## bstraub(). To delete later. + if (is.null(regformula)) # Buhlmann-Straub + { res <- bstraub(ratios, weights, method = method, - tol = tol, maxit = maxit, echo = echo, - old.format = FALSE) + tol = tol, maxit = maxit, echo = echo) } else # Hachemeister - res <- hache(ratios, weights, xreg, tol = tol, - maxit = maxit, echo = echo) + { + ## If regression model is actually empty or has only an + ## intercept, call bstraub(). + trf <- terms(regformula) + res <- + if (length(attr(trf, "factors")) == 0) + { + warning("empty regression model; fitting with Buhlmann-Straub's model") + bstraub(ratios, weights, method = method, + tol = tol, maxit = maxit, echo = echo, + old.format = FALSE) + } + else + hache(ratios, weights, regformula, regdata, + adj.intercept = adj.intercept, + method = method, tol = tol, + maxit = maxit, echo = echo) + } - ## Add quantities not taken into account in calculation - ## functions to results list. + ## Add missing quantities to results. res$classification <- levs res$ordering <- list(seq_along(levs)) } diff --git a/R/coverage.R b/R/coverage.R index 720d698..31b730b 100644 --- a/R/coverage.R +++ b/R/coverage.R @@ -15,6 +15,12 @@ coverage <- function(pdf, cdf, deductible = 0, franchise = FALSE, { Call <- match.call() + ## First determine if the cdf is needed or not. It is when there + ## is a deductible or a limit and, of course, if the output + ## function should compute the cdf. + is.cdf <- missing(pdf) || is.null(pdf) # return cdf? + needs.cdf <- any(deductible > 0, limit < Inf, is.cdf) # cdf needed? + ## Sanity check of arguments if (any(deductible < 0, limit < 0, coinsurance < 0, inflation < 0)) stop("coverage modifications must be positive") @@ -22,71 +28,83 @@ coverage <- function(pdf, cdf, deductible = 0, franchise = FALSE, stop("deductible must be smaller than the limit") if (coinsurance > 1) stop("coinsurance must be between 0 and 1") - if (missing(cdf)) + if (missing(cdf) & needs.cdf) stop("'cdf' must be supplied") - ## Determine if the output function should compute the cdf or the - ## pdf of the modified random variable. It is the latter if 'pdf' - ## is missing or NULL. (It would make for sense to have a variable - ## 'has.pdf', but 'cdf' was a TRUE/FALSE at the time the rest of - ## the code was written.) - is.cdf <- missing(pdf) || is.null(pdf) - - ## Arguments 'pdf' and 'cdf' can be character strings giving - ## function names or straight function objects. In the latter - ## case, 'pdf' and 'cdf' will contain function definitions and the - ## output function of coverage() will contain multiple function - ## definitions. Not nice. Instead, always treat 'pdf' and 'cdf' as - ## character strings. - ## - cdf <- as.character(Call$cdf) - if (!is.cdf) - pdf <- as.character(Call$pdf) - - ## The cumulative distribution function is always needed. Get its - ## argument list to build function calls and, eventually, specify - ## arguments of the output function. - formalsCDF <- formals(cdf) # arguments as list - argsCDF <- names(formalsCDF) # arguments names as strings - - ## Remember if argument 'lower.tail' is available, so we can use - ## it later. Then, drop unsupported arguments 'lower.tail' and - ## 'log.p'. - has.lower <- if ("lower.tail" %in% argsCDF) TRUE else FALSE - argsCDF <- setdiff(argsCDF, c("lower.tail", "log.p")) - - ## 1. Set arguments of the output function. Should be those of - ## 'cdf' or 'pdf' depending if 'pdf' is missing or not, - ## respectively. - ## - ## 2. Set the symbol representing the variable in function - ## calls. Should be the first argument of the output - ## function. - ## - ## 3. Drop unsupported argument 'log', if present, in 'pdf'. - ## - ## 4. Drop the first argument of 'cdf' and 'pdf' which are no - ## longer used after this block and prepare argument list for - ## use in do.call(). - if (is.cdf) # return cdf of the modified r.v. + ## Quantites often used + r <- 1 + inflation + d <- deductible/r + u <- limit/r + + ## Prepare the cdf object for cases needing the cdf: output + ## function is a cdf or there is a deductible or a limit. + if (needs.cdf) { - argsFUN <- formalsCDF[argsCDF] # arguments of output function - x <- as.name(argsCDF[1]) # symbol + ## Arguments 'cdf' can be a character string giving the + ## function name or a straight function object. In the latter + ## case, 'cdf' will contain function definitions and the + ## output function of coverage() will contain multiple + ## function definitions. Not nice. Instead, always treat 'cdf' + ## (and 'pdf', below) as character strings. + cdf <- as.character(Call$cdf) + + ## Get argument list to build function calls and, eventually, + ## to specify arguments of the output function. + formalsCDF <- formals(cdf) # arguments as list + argsCDF <- names(formalsCDF) # arguments names as strings + + ## Remember if argument 'lower.tail' is available, so we can + ## use it later. Then, drop unsupported arguments 'lower.tail' + ## and 'log.p'. + has.lower <- "lower.tail" %in% argsCDF + argsCDF <- setdiff(argsCDF, c("lower.tail", "log.p")) + + ## If output function is a cdf: + ## + ## 1. Set its arguments to those of 'cdf'. + ## 2. Set the symbol representing the variable in function + ## calls. Should be the first argument of the output + ## function. + ## 3. Drop the first argument of 'cdf' since it is no longer + ## used after this block. + if (is.cdf) + { + argsFUN <- formalsCDF[argsCDF] # arguments of output function + x <- as.name(argsCDF[1]) # symbol + } + + ## Prepare argument list for use in do.call + argsCDF <- sapply(argsCDF[-1], as.name) # for use in do.call() + + ## Definitions of 1 - F(d) and 1 - F(u), using 'lower.tail = + ## FALSE' if available in 'cdf'. + if (has.lower) + { + Sd <- substitute(do.call(F, a), + list(F = cdf, a = c(d, argsCDF, lower.tail = FALSE))) + Su <- substitute(do.call(F, a), + list(F = cdf, a = c(u, argsCDF, lower.tail = FALSE))) + } + else + { + Sd <- substitute(1 - do.call(F, a), + list(F = cdf, a = c(d, argsCDF))) + Su <- substitute(1 - do.call(F, a), + list(F = cdf, a = c(u, argsCDF))) + } } - else + + ## Repeat same steps as above for case needing the pdf: output + ## function is a pdf. + if (!is.cdf) { + pdf <- as.character(Call$pdf) # same as 'cdf' above formalsPDF <- formals(pdf) # arguments as list argsPDF <- setdiff(names(formalsPDF), "log") # drop argument 'log' argsFUN <- formalsPDF[argsPDF] # arguments of output function x <- as.name(argsPDF[1]) # symbol argsPDF <- sapply(argsPDF[-1], as.name) # for use in do.call() } - argsCDF <- sapply(argsCDF[-1], as.name) # for use in do.call() - - ## Quantites often used - r <- 1 + inflation - d <- deductible/r - u <- limit/r ## Build the value at which the underlying pdf/cdf will be called ## for non special case values of 'x'. @@ -119,23 +137,6 @@ coverage <- function(pdf, cdf, deductible = 0, franchise = FALSE, cond2 <- substitute(0 < x & x < b, list(x = x, b = bound2)) } - ## Definitions of 1 - F(d) and 1 - F(u), using 'lower.tail = - ## FALSE' if available in 'cdf'. - if (has.lower) - { - Sd <- substitute(do.call(F, a), - list(F = cdf, a = c(d, argsCDF, lower.tail = FALSE))) - Su <- substitute(do.call(F, a), - list(F = cdf, a = c(u, argsCDF, lower.tail = FALSE))) - } - else - { - Sd <- substitute(1 - do.call(F, a), - list(F = cdf, a = c(d, argsCDF))) - Su <- substitute(1 - do.call(F, a), - list(F = cdf, a = c(u, argsCDF))) - } - ## Function definition for the first branch. f1 <- if (per.loss & deductible) substitute(do.call(F, a), list(F = cdf, a = c(d, argsCDF))) @@ -172,11 +173,11 @@ coverage <- function(pdf, cdf, deductible = 0, franchise = FALSE, ## Output function eval(substitute(FUN <- function() - ifelse(cond1, f1, - ifelse(cond2, f2, - ifelse(cond3, f3, 0))), - list(cond1 = cond1, cond2 = cond2, cond3 = cond3, - f1 = f1, f2 = f2, f3 = f3))) + ifelse(cond1, f1, + ifelse(cond2, f2, + ifelse(cond3, f3, 0))), + list(cond1 = cond1, cond2 = cond2, cond3 = cond3, + f1 = f1, f2 = f2, f3 = f3))) formals(FUN) <- argsFUN # set arguments environment(FUN) <- new.env() # new, empty environment FUN diff --git a/R/elev.R b/R/elev.R index eedd3c3..c5c52de 100644 --- a/R/elev.R +++ b/R/elev.R @@ -27,6 +27,8 @@ elev.default <- function(x, ...) FUN } +### This function assumes right-closed intervals, but the numerical +### values are identical for left-closed intervals. elev.grouped.data <- function(x, ...) { if (!exists("Call", inherits = FALSE)) @@ -40,7 +42,7 @@ elev.grouped.data <- function(x, ...) limit <- pmin(limit, cj[r + 1]) ## Class in which the limit is located. - cl <- cut(limit, cj, include.lowest = TRUE, labels = FALSE) + cl <- findInterval(limit, cj, all.inside = TRUE) ## Means for all classes below each limit. cjt <- head(cj, max(cl)) # upper bounds diff --git a/R/exact.R b/R/exact.R index 2f1a6fc..99dd59b 100644 --- a/R/exact.R +++ b/R/exact.R @@ -27,7 +27,7 @@ exact <- function(fx, pn, x.scale = 1) fs[pos] <- fs[pos] + fxc * pn[i + 1] } - FUN <- approxfun((0:(length(fs) - 1)) * x.scale, cumsum(fs), + FUN <- approxfun((0:(length(fs) - 1)) * x.scale, pmin(cumsum(fs), 1), method = "constant", yleft = 0, yright = 1, f = 0, ties = "ordered") class(FUN) <- c("ecdf", "stepfun", class(FUN)) diff --git a/R/grouped.data.R b/R/grouped.data.R index fd09f4c..b8cd973 100644 --- a/R/grouped.data.R +++ b/R/grouped.data.R @@ -7,8 +7,8 @@ ### AUTHORS: Vincent Goulet , ### Mathieu Pigeon, Louis-Philippe Pouliot -grouped.data <- function(..., row.names = NULL, check.rows = FALSE, - check.names = TRUE) +grouped.data <- function(..., right = TRUE, row.names = NULL, + check.rows = FALSE, check.names = TRUE) { ## Utility function numform <- function(x, w) @@ -40,7 +40,9 @@ grouped.data <- function(..., row.names = NULL, check.rows = FALSE, ## Return a data frame with formatted group boundaries in the ## first column. w <- max(nchar(x[-1, ])) # longest upper boundary - xfmt <- paste("(", numform(x[-nx, ], -1), ", ", numform(x[-1, ], w), "]", + xfmt <- paste(if (right) "(" else "[", + numform(x[-nx, ], -1), ", ", numform(x[-1, ], w), + if (right) "]" else ")", sep = "") res <- data.frame(xfmt, y, row.names = row.names, check.rows = check.rows, check.names = check.names) @@ -48,5 +50,6 @@ grouped.data <- function(..., row.names = NULL, check.rows = FALSE, class(res) <- c("grouped.data", "data.frame") environment(res) <- new.env() assign("cj", unlist(x, use.names = FALSE), environment(res)) + attr(res, "right") <- right res } diff --git a/R/hache.R b/R/hache.R index 31653ba..18a9671 100644 --- a/R/hache.R +++ b/R/hache.R @@ -1,146 +1,88 @@ ### ===== actuar: an R package for Actuarial Science ===== ### -### Credibility in the Regression Case +### Credibility in the regression case using the Hachemeister (1975) +### model with possibly an adjustment to put the intercept at the +### barycenter of time (see Buhlmann & Gisler, 2005). ### -### The Hachemeister Regression Model (1975). -### -### AUTHORS: Tommy Ouellet, Vincent Goulet +### AUTHORS: Xavier Milhaud, Tommy Ouellet, Vincent Goulet +### -hache <- function(ratios, weights, xreg, tol = sqrt(.Machine$double.eps), +hache <- function(ratios, weights, formula, data, adj.intercept = FALSE, + method = c("unbiased", "iterative"), + tol = sqrt(.Machine$double.eps), maxit = 100, echo = FALSE) { Call <- match.call() - ## Frequently used values - ncontracts <- NROW(ratios) # number of contracts - nyears <- NCOL(ratios) # number of years - p <- NCOL(xreg) + 1 # dimension of design matrix - s <- seq_len(ncontracts) # 1:ncontracts - - ## Fit linear model to each contract and make summary of each - ## model for later extraction of key quantities. - xreg <- cbind(xreg) # force dims and colnames - fo <- as.formula(paste("y ~ ", paste(colnames(xreg), collapse = "+"))) - f <- function(i) - { - DF <- data.frame(y = ratios[i, ], xreg, w = weights[i, ]) - lm(fo, data = DF, weights = w) - } - fits <- lapply(s, f) - sfits <- lapply(fits, summary) - - ## Regression coefficients, residuals and the analog of the inverse - ## of the total contract weights (to be used to compute the - ## credibility matrices). for each contract - ind <- sapply(fits, coef) - r <- sapply(fits, residuals) - sigma2 <- sapply(sfits, "[[", "sigma")^2 - weights.s <- lapply(sfits, "[[", "cov.unscaled") - - ## === ESTIMATION OF WITHIN VARIANCE === - s2 <- mean(sigma2) - - ## === ESTIMATION OF THE BETWEEN VARIANCE-COVARIANCE MATRIX === - ## - ## This is an iterative procedure similar to the Bischel-Straub - ## estimator. Following Goovaerts & Hoogstad, stopping criterion - ## is based in the collective regression coefficients estimates. - ## - ## Starting credibility matrices and collective regression - ## coefficients. The credibility matrices are stored in an array - ## of dimension p x p x ncontracts. - cred <- array(diag(p), dim = c(p, p, ncontracts)) # identity matrices - coll <- rowMeans(ind) # coherent with above cred. matrices - - ## If printing of iterations was asked for, start by printing a - ## header and the starting values. - if (echo) - { - cat("Iteration\tCollective regression coefficients\n") - exp <- expression(cat(" ", count, "\t\t ", coll1 <- coll, - fill = TRUE)) - } - else - exp <- expression(coll1 <- coll) - - ## Iterative procedure - count <- 0 - repeat + ## If weights are not specified, use equal weights as in + ## Buhlmann's model. + if (missing(weights)) { - eval(exp) - - ## Stop after 'maxit' iterations - if (maxit < (count <- count + 1)) - { - warning("maximum number of iterations reached before obtaining convergence") - break - } - - ## As calculated here, the between variance-covariance matrix - ## is actually a vector. It is turned into a matrix by adding - ## a 'dim' attribute. - A <- rowSums(sapply(s, - function(i) cred[, , i] %*% tcrossprod(ind[, i] - coll))) / (ncontracts - 1) - dim(A) <- c(p, p) - - ## Symmetrize A - A <- (A + t(A))/2 - - ## New credibility matrices - cred <- sapply(weights.s, function(w) A %*% solve(A + s2 * w)) - dim(cred) <- c(p, p, ncontracts) - - ## New collective regression coefficients - cred.s <- apply(cred, c(1, 2), sum) - coll <- solve(cred.s, - rowSums(sapply(s, function(i) cred[, , i] %*% ind[, i]))) - - ## Test for convergence - if (max(abs((coll - coll1)/coll1)) < tol) - break + if (any(is.na(ratios))) + stop("missing ratios not allowed when weights are not supplied") + weights <- array(1, dim(ratios)) } - ## Final calculation of the between variance-covariance matrix and - ## credibility matrices. - A <- rowSums(sapply(s, - function(i) cred[, , i] %*% tcrossprod(ind[, i] - coll))) / (ncontracts - 1) - dim(A) <- c(p, p) - A <- (A + t(A))/2 - cred <- sapply(weights.s, function(w) A %*% solve(A + s2 * w)) - dim(cred) <- c(p, p, ncontracts) - - ## Credibility adjusted coefficients. The coefficients of the - ## models are replaced with these values. That way, prediction - ## will be trivial using predict.lm(). - for (i in s) - fits[[i]]$coefficients <- coll + drop(cred[, , i] %*% (ind[, i] - coll)) - - ## Add names to the collective coefficients vector. - names(coll) <- rownames(ind) + ## Check other bad arguments. + if (NCOL(ratios) < 2) + stop("there must be at least one node with more than one period of experience") + if (NROW(ratios) < 2) + stop("there must be more than one node") + if (!identical(which(is.na(ratios)), which(is.na(weights)))) + stop("missing values are not in the same positions in 'weights' and in 'ratios'") + if (all(!weights, na.rm = TRUE)) + stop("no available data to fit model") + + ## Build the design matrix + mf <- model.frame(formula, data, drop.unused.levels = TRUE) + mt <- attr(mf, "terms") + xreg <- model.matrix(mt, mf) + + ## Do computations in auxiliary functions. + res <- + if (adj.intercept) + hache.barycenter(ratios, weights, xreg, + method = match.arg(method), + tol = tol, maxit = maxit, echo = echo) + else + hache.origin(ratios, weights, xreg, + tol = tol, maxit = maxit, echo = echo) + + ## Add the terms object to the result for use in predict.hache() + ## [and thus predict.lm()]. + res$terms <- mt ## Results - structure(list(means = list(coll, ind), - weights = list(cred.s, lapply(weights.s, solve)), - unbiased = NULL, - iterative = list(A, s2), - cred = cred, - adj.models = fits, - nodes = list(ncontracts)), - class = "hache", - model = "regression") + attr(res, "class") <- "hache" + attr(res, "model") <- "regression" + res } predict.hache <- function(object, levels = NULL, newdata, ...) { - ## Predictors can be given as a simple vector for one dimensional - ## models. For use in predict.lm(), these must be converted into a - ## data frame. - if (is.null(dim(newdata))) - newdata <- data.frame(xreg = newdata) + ## If model was fitted at the barycenter of time (there is a + ## transition matrix in the object), then also convert the + ## regression coefficients in the base of the (original) design + ## matrix. + if (!is.null(R <- object$transition)) + { + for (i in seq_along(object$adj.models)) + { + b <- coefficients(object$adj.models[[i]]) + object$adj.models[[i]]$coefficients <- solve(R, b) + } + } ## Prediction (credibility premiums) using predict.lm() on each of - ## the adjusted individual models. - sapply(object$adj.models, predict, newdata = newdata) + ## the adjusted individual models. This first requires to add a + ## 'terms' component to each adjusted model. + f <- function(z, ...) + { + z$terms <- object$terms + unname(predict.lm(z, ...)) + } + + sapply(object$adj.models, f, newdata = newdata) } print.hache <- function(x, ...) diff --git a/R/hache.barycenter.R b/R/hache.barycenter.R new file mode 100644 index 0000000..267116c --- /dev/null +++ b/R/hache.barycenter.R @@ -0,0 +1,154 @@ +### ===== actuar: an R package for Actuarial Science ===== +### +### Auxiliary function to fit regression credibility model by +### positioning the intercept at the barycenter of time. +### +### AUTHORS: Xavier Milhaud, Vincent Goulet + +hache.barycenter <- function(ratios, weights, xreg, method, + tol, maxit, echo) +{ + ## Frequently used values + weights.s <- rowSums(weights, na.rm = TRUE) # contract total weights + has.data <- which(weights.s > 0) # contracts with data + ncontracts <- nrow(ratios) # number of contracts + eff.ncontracts <- length(has.data) # effective number of contracts + p <- ncol(xreg) # rank (>= 2) of design matrix + n <- nrow(xreg) # number of observations + + ## Putting the intercept at the barycenter of time amounts to use + ## a "weighted orthogonal" design matrix in the regression (that + ## is, X'WX = I for some diagonal weight matrix W). In theory, + ## there would be one orthogonal design matrix per contract. In + ## practice, we orthogonalize with a "collective" barycenter. We + ## use average weights per period across contracts since these + ## will be closest to the their individual analogues. + ## + ## We orthogonalize the original design matrix using QR + ## decomposition. We also keep matrix R as a transition matrix + ## between the original base and the orthogonal base. + w <- colSums(weights, na.rm = TRUE)/sum(weights.s) + Xqr <- qr(xreg * sqrt(w)) # QR decomposition + R <- qr.R(Xqr) # transition matrix + x <- qr.Q(Xqr) / sqrt(w) # weighted orthogonal matrix + + ## Fit linear model to each contract. For contracts without data, + ## fit some sort of empty model to ease use in predict.hache(). + f <- function(i) + { + z <- + if (i %in% has.data) # contract with data + { + y <- ratios[i, ] + not.na <- !is.na(y) + lm.wfit(x[not.na, , drop = FALSE], y[not.na], weights[i, not.na]) + } + else # contract without data + lm.fit(x, rep.int(0, n)) + z[c("coefficients", "residuals", "weights", "rank", "qr")] + } + fits <- lapply(seq_len(ncontracts), f) + + ## Individual regression coefficients + ind <- sapply(fits, coef) + ind[is.na(ind)] <- 0 + + ## Individual variance estimators. The contribution of contracts + ## without data is 0. + S <- function(z) # from stats:::summary.lm + { + nQr <- NROW(z$qr$qr) + rank <- z$rank + r <- z$residuals + w <- z$weights + sum(w * r^2) / (nQr - rank) + } + sigma2 <- sapply(fits[has.data], S) + sigma2[is.nan(sigma2)] <- 0 + + ## Initialization of a few containers: p x p x ncontracts arrays + ## for the weight and credibility matrices; p x p matrices for the + ## between variance-covariance matrix and total weight matrix; a + ## vector of length p for the collective regression coefficients. + cred <- W <- array(0, c(p, p, ncontracts)) + A <- W.s <- matrix(0, p, p) + coll <- numeric(p) + + ## Weight matrices: we need here only the diagonal elements of + ## X'WX, where W = diag(w_{ij}) (and not w_{ij}/w_{i.} as in the + ## orthogonalization to keep a w_{i.} lying around). The first + ## element is w_{i.} and the off-diagonal elements are zero by + ## construction. Note that array W is quite different from the one + ## in hache.origin(). + W[1, 1, ] <- weights.s + for (i in 2:p) + W[i, i, has.data] <- + colSums(t(weights[has.data, ]) * x[, i]^2, na.rm = TRUE) + + ## === ESTIMATION OF THE WITHIN VARIANCE === + s2 <- mean(sigma2) + + ## === ESTIMATION OF THE BETWEEN VARIANCE-COVARIANCE MATRIX === + ## + ## By construction, we only estimate the diagonal of the matrix. + ## Variance components are estimated just like in the + ## Buhlmann-Straub model (see bstraub.R for details). + ## + ## Should we compute the iterative estimators? + do.iter <- method == "iterative" && diff(range(weights, na.rm = TRUE)) > .Machine$double.eps^0.5 + + ## Do the computations one regression parameter at a time. + for (i in seq_len(p)) + { + ## Unbiased estimator + a <- A[i, i] <- bvar.unbiased(ind[i, has.data], W[i, i, has.data], + s2, eff.ncontracts) + + ## Iterative estimator + if (do.iter) + { + a <- A[i, i] <- + if (a > 0) + bvar.iterative(ind[i, has.data], W[i, i, has.data], + s2, eff.ncontracts, start = a, + tol = tol, maxit = maxit, echo = echo) + else + 0 + } + + ## Credibility factors and estimator of the collective + ## regression coefficients. + if (a > 0) + { + z <- cred[i, i, has.data] <- 1/(1 + s2/(W[i, i, has.data] * a)) + z. <- W.s[i, i] <- sum(z) + coll[i] <- drop(crossprod(z, ind[i, has.data])) / z. + } + else + { + ## (credibility factors were already initialized to 0) + w <- W[i, i, has.data] + w. <- W.s[i, i] <- sum(w) + coll[i] <- drop(crossprod(w, ind[i, ])) / w. + } + } + + ## Credibility adjusted coefficients. The coefficients of the + ## models are replaced with these values. That way, prediction + ## will be trivial using predict.lm(). + for (i in seq_len(ncontracts)) + fits[[i]]$coefficients <- coll + drop(cred[, , i] %*% (ind[, i] - coll)) + + ## Add names to the collective coefficients vector. + names(coll) <- rownames(ind) + + ## Results + list(means = list(coll, ind), + weights = list(W.s, W), + unbiased = if (method == "unbiased") list(A, s2), + iterative = if (method == "iterative") list(A, s2), + cred = cred, + nodes = list(ncontracts), + adj.models = fits, + transition = R) +} diff --git a/R/hache.origin.R b/R/hache.origin.R new file mode 100644 index 0000000..0ebd0e0 --- /dev/null +++ b/R/hache.origin.R @@ -0,0 +1,158 @@ +### ===== actuar: an R package for Actuarial Science ===== +### +### Auxiliary function to fit regression credibility model using the +### original Hachemeister model. +### +### AUTHORS: Tommy Ouellet, Vincent Goulet + +hache.origin <- function(ratios, weights, xreg, tol, maxit, echo) +{ + ## Frequently used values + weights.s <- rowSums(weights, na.rm = TRUE) # contract total weights + has.data <- which(weights.s > 0) # contracts with data + ncontracts <- nrow(ratios) # number of contracts + eff.ncontracts <- length(has.data) # effective number of contracts + p <- ncol(xreg) # rank (>= 2) of design matrix + n <- nrow(xreg) # number of observations + + ## Fit linear model to each contract. For contracts without data, + ## fit some sort of empty model to ease use in predict.hache(). + f <- function(i) + { + z <- + if (i %in% has.data) # contract with data + { + y <- ratios[i, ] + not.na <- !is.na(y) + lm.wfit(xreg[not.na, , drop = FALSE], y[not.na], weights[i, not.na]) + } + else # contract without data + lm.fit(xreg, rep.int(0, n)) + z[c("coefficients", "residuals", "weights", "rank", "qr")] + } + fits <- lapply(seq_len(ncontracts), f) + + ## Individual regression coefficients + ind <- sapply(fits, coef) + ind[is.na(ind)] <- 0 + + ## Individual variance estimators. The contribution of contracts + ## without data is 0. + S <- function(z) # from stats:::summary.lm + { + nQr <- NROW(z$qr$qr) + r1 <- z$rank + r <- z$residuals + w <- z$weights + sum(w * r^2) / (nQr - r1) + } + sigma2 <- sapply(fits[has.data], S) + sigma2[is.nan(sigma2)] <- 0 + + ## Initialization of a few containers: p x p x ncontracts arrays + ## for the weight and credibility matrices; p x p matrices for the + ## between variance-covariance matrix and total credibility + ## matrix. + cred <- W <- array(0, c(p, p, ncontracts)) + A <- cred.s <- matrix(0, p, p) + + ## Weight matrices: we use directly (X'WX)^{-1}. This is quite + ## different from hache.barycenter(). + V <- function(z) # from stats:::summary.lm + { + r1 <- z$rank + if (r1 == 1) + diag(as.real(chol2inv(z$qr$qr[1, 1, drop = FALSE])), p) + else + chol2inv(z$qr$qr[1:r1, 1:r1, drop = FALSE]) + } + W[, , has.data] <- sapply(fits[has.data], V) + + ## Starting credibility matrices and collective regression + ## coefficients. + cred[, , has.data] <- diag(p) # identity matrices + coll <- rowSums(ind) / eff.ncontracts # coherent with above + + ## === ESTIMATION OF WITHIN VARIANCE === + s2 <- mean(sigma2) + + ## === ESTIMATION OF THE BETWEEN VARIANCE-COVARIANCE MATRIX === + ## + ## This is an iterative procedure similar to the Bischel-Straub + ## estimator. Following Goovaerts & Hoogstad, stopping criterion + ## is based in the collective regression coefficients estimates. + ## + ## If printing of iterations was asked for, start by printing a + ## header and the starting values. + if (echo) + { + cat("Iteration\tCollective regression coefficients\n") + exp <- expression(cat(" ", count, "\t\t ", coll1 <- coll, + fill = TRUE)) + } + else + exp <- expression(coll1 <- coll) + + ## Iterative procedure + count <- 0 + repeat + { + eval(exp) + + ## Stop after 'maxit' iterations + if (maxit < (count <- count + 1)) + { + warning("maximum number of iterations reached before obtaining convergence") + break + } + + ## Calculation of the between variance-covariance matrix. + A[] <- rowSums(sapply(has.data, function(i) + cred[, , i] %*% tcrossprod(ind[, i] - coll))) / + (eff.ncontracts - 1) + + ## Symmetrize A + A <- (A + t(A))/2 + + ## New credibility matrices + cred[, , has.data] <- sapply(has.data, function(i) + A %*% solve(A + s2 * W[, , i])) + + ## New collective regression coefficients + cred.s <- apply(cred[, , has.data], c(1, 2), sum) + coll <- solve(cred.s, + rowSums(sapply(has.data, function(i) + cred[, , i] %*% ind[, i]))) + + ## Test for convergence + if (max(abs((coll - coll1)/coll1)) < tol) + break + } + + ## Final calculation of the between variance-covariance matrix and + ## credibility matrices. + A[] <- rowSums(sapply(has.data, function(i) + cred[, , i] %*% tcrossprod(ind[, i] - coll))) / + (eff.ncontracts - 1) + A <- (A + t(A))/2 + cred[, , has.data] <- sapply(has.data, function(i) + A %*% solve(A + s2 * W[, , i])) + + ## Credibility adjusted coefficients. The coefficients of the + ## models are replaced with these values. That way, prediction + ## will be trivial using predict.lm(). + for (i in seq_len(ncontracts)) + fits[[i]]$coefficients <- coll + drop(cred[, , i] %*% (ind[, i] - coll)) + + ## Add names to the collective coefficients vector. + names(coll) <- rownames(ind) + + ## Results + list(means = list(coll, ind), + weights = list(cred.s, W), + unbiased = NULL, + iterative = list(A, s2), + cred = cred, + nodes = list(ncontracts), + adj.models = fits) +} diff --git a/R/hierarc.R b/R/hierarc.R index ec92649..61a092a 100644 --- a/R/hierarc.R +++ b/R/hierarc.R @@ -32,7 +32,7 @@ hierarc <- function(ratios, weights, classification, ## Sanity check if weights and ratios correspond. if (!identical(which(is.na(ratios)), which(is.na(weights)))) - stop("missing values are not in the same positions in weights and in ratios") + stop("missing values are not in the same positions in 'weights' and in 'ratios'") ## === NUMBER OF NODES AND SPLITTING FACTORS === ## @@ -158,7 +158,7 @@ hierarc <- function(ratios, weights, classification, ## level, hence this list is one shorter than the others. tweights <- vector("list", nlevels1p) # total level weights wmeans <- vector("list", nlevels1p) # weighted averages - bu <- bi <- c(numeric(nlevels), s2) # variance estimators + b <- c(numeric(nlevels), s2) # variance estimators cred <- vector("list", nlevels) # credibility factors ## Values already computed at the entity level. @@ -185,9 +185,9 @@ hierarc <- function(ratios, weights, classification, method <- match.arg(method) if (method == "Buhlmann-Gisler") - bexp <- expression(bu[i] <- mean(pmax(ifelse(ci != 0, bui/ci, 0), 0), na.rm = TRUE)) + bexp <- expression(b[i] <- mean(pmax(ifelse(ci != 0, bi/ci, 0), 0), na.rm = TRUE)) else # Ohlsson - bexp <- expression(bu[i] <- sum(bui, na.rm = TRUE) / sum(ci, na.rm = TRUE)) + bexp <- expression(b[i] <- sum(bi, na.rm = TRUE) / sum(ci, na.rm = TRUE)) for (i in nlevels:1) { @@ -206,10 +206,10 @@ hierarc <- function(ratios, weights, classification, ## Latest non-zero between variance estimate -- the one used ## in the estimator and in the credibility factors. - between <- bu[bu != 0][1] + between <- b[b != 0][1] ## Calculation of the per node variance estimate. - bui <- as.vector(tapply(tweights[[i + 1]] * + bi <- as.vector(tapply(tweights[[i + 1]] * (wmeans[[i + 1]] - wmeans[[i]][fnodes[[i]]])^2, fnodes[[i]], sum)) - @@ -225,9 +225,9 @@ hierarc <- function(ratios, weights, classification, ## replaced by the sum of the credibility factors and the ## weighted averages are recomputed with these new weights. #if (max(bu[i], 0)) # don't compute negative factors! - if (bu[i]) + if (b[i]) { - cred[[i]] <- 1/(1 + between/(bu[i] * tweights[[i + 1]])) + cred[[i]] <- 1/(1 + between/(b[i] * tweights[[i + 1]])) tweights[[i]] <- as.vector(tapply(cred[[i]], fnodes[[i]], sum)) wmeans[[i]] <- ifelse(tweights[[i]] > 0, @@ -254,19 +254,17 @@ hierarc <- function(ratios, weights, classification, ## the current level. if (method == "iterative") { - bi <- pmax(bu, 0) # truncation for starting values - if (any(head(bi, -1))) # at least one non-zero starting value - .External("do_hierarc", cred, tweights, wmeans, fnodes, denoms, bi, tol, maxit, echo) + b <- pmax(b, 0) # truncation for starting values + if (any(head(b, -1) > 0)) # at least one non-zero starting value + .External("do_hierarc", cred, tweights, wmeans, fnodes, denoms, + b, tol, maxit, echo) } - else - bi <- NULL - ## Results structure(list(means = wmeans, weights = tweights, - unbiased = bu, - iterative = bi, + unbiased = if (method != "iterative") b, + iterative = if (method == "iterative") b, cred = cred, nodes = nnodes, classification = classification[, -1], diff --git a/R/panjer.R b/R/panjer.R index 64975a9..7a8876d 100644 --- a/R/panjer.R +++ b/R/panjer.R @@ -107,7 +107,7 @@ panjer <- function(fx, dist, p0 = NULL, x.scale = 1, ..., fs <- .External("do_panjer", p0, p1, fs0, fx, a, b, tol, maxit, echo) - FUN <- approxfun((0:(length(fs) - 1)) * x.scale, cumsum(fs), + FUN <- approxfun((0:(length(fs) - 1)) * x.scale, pmin(cumsum(fs), 1), method = "constant", yleft = 0, yright = 1, f = 0, ties = "ordered") class(FUN) <- c("ecdf", "stepfun", class(FUN)) diff --git a/R/quantile.aggregateDist.R b/R/quantile.aggregateDist.R index 64d779e..054cd98 100644 --- a/R/quantile.aggregateDist.R +++ b/R/quantile.aggregateDist.R @@ -14,7 +14,8 @@ quantile.aggregateDist <- ## The Normal and Normal Power approximations are the only ## continuous distributions of class 'aggregateDist'. They are ## therefore treated differently, using the 'base' quantile - ## function qnorm(). + ## function qnorm() or numerical optimization through the 'base' + ## optimize function. if (label == "Normal approximation") res <- qnorm(probs, get("mean", environment(x)), sqrt(get("variance", environment(x)))) @@ -33,18 +34,20 @@ quantile.aggregateDist <- ## An empirical and discrete approach is used for ## 'aggregateDist' objects obtained from methods other than ## Normal and Normal Power. - Fs <- get("y", environment(x)) + y <- get("y", environment(x)) x <- get("x", environment(x)) - ind <- sapply(probs, function(q) match(TRUE, Fs >= q)) - res <- - if (smooth) - { - h <- (Fs[ind] - probs) / (Fs[ind] - Fs[ind - 1]) - (1 - h) * x[ind - 1] + h * x[ind] - } - else - x[ind] + ## Create the inverse function of either the cdf or the ogive. + fun <- + if (smooth) # ogive + approxfun(y, x, yleft = 0, yright = max(x), + method = "linear", ties = "ordered") + else # cdf + fun <- approxfun(y, x, yleft = 0, yright = max(x), + method = "constant", ties = "ordered") + + ## Quantiles + res <- fun(probs) } if (names) diff --git a/R/quantile.grouped.data.R b/R/quantile.grouped.data.R new file mode 100644 index 0000000..7591c4f --- /dev/null +++ b/R/quantile.grouped.data.R @@ -0,0 +1,28 @@ +### ===== actuar: an R package for Actuarial Science ===== +### +### Quantiles (inverse of the ogive) for grouped data +### +### AUTHOR: Vincent Goulet + +quantile.grouped.data <- function(x, probs = seq(0, 1, 0.25), + names = TRUE, ...) +{ + y <- x[, 2] + x <- eval(expression(cj), env = environment(x)) + + ## Inverse of the ogive + fun <- approxfun(c(0, cumsum(y))/sum(y), x, + yleft = min(x), yright = max(x), + method = "linear", ties = "ordered") + + ## Quantiles + res <- fun(probs) + + if (names) + { + dig <- max(2, getOption("digits")) + names(res) <- formatC(paste(100 * probs, "%", sep = ""), + format = "fg", wid = 1, digits = dig) + } + res +} diff --git a/R/simS.R b/R/simS.R index 629ba6c..fb2ae57 100644 --- a/R/simS.R +++ b/R/simS.R @@ -24,7 +24,7 @@ simS <- function(n, model.freq, model.sev) x <- sort(x) vals <- unique(x) fs <- tabulate(match(x, vals))/length(x) - FUN <- approxfun(vals, cumsum(fs), method = "constant", + FUN <- approxfun(vals, pmin(cumsum(fs), 1), method = "constant", yleft = 0, yright = 1, f = 0, ties = "ordered") class(FUN) <- c("ecdf", "stepfun", class(FUN)) assign("fs", fs, envir = environment(FUN)) diff --git a/R/simul.R b/R/simul.R index 9ce1643..e6c678e 100644 --- a/R/simul.R +++ b/R/simul.R @@ -54,10 +54,10 @@ simul <- function(nodes, model.freq = NULL, model.sev = NULL, weights = NULL) for (i in 2:nlevels) # first node doesn't need recycling nodes[[i]] <- rep(nodes[[i]], length = sum(nodes[[i - 1]])) - ## Simulation of the frequency risk (or mixing) parameters for - ## each level (e.g. class, contract) and, at the last level, the - ## actual frequencies. If 'model.freq' is NULL, this is equivalent - ## to having one claim per node. + ## Simulation of the frequency mixing parameters for each level + ## (e.g. class, contract) and, at the last level, the actual + ## frequencies. If 'model.freq' is NULL, this is equivalent to + ## having one claim per node. if (has.freq) { ## Normally, only the immediately above mixing parameter will @@ -162,11 +162,11 @@ simul <- function(nodes, model.freq = NULL, model.sev = NULL, weights = NULL) ## We must now distribute the claim amounts in vector 'severities' ## to the appropriate nodes. This is complicated by the ## possibility to have different number of nodes (years of - ## observation) for each "entity". The result must be a matrix + ## observation) for each entity. The result must be a matrix ## with the number of columns equal to the maximum number of last ## level nodes. ## - ## The number of nodes (years of observation) per "entity" is + ## The number of nodes (years of observation) per entity is ## given by 'n.current' since we reached the last level in (either ## one of) the above loops. ## @@ -217,7 +217,7 @@ simul <- function(nodes, model.freq = NULL, model.sev = NULL, weights = NULL) } ## Finally, create a matrix where each row contains the series of - ## identifiers for an "entity" in the portfolio, e.g. if the data + ## identifiers for an entity in the portfolio, e.g. if the data ## is denoted X_{ijkt}, one line of the matrix will contain ## subscripts i, j and k. As we move from right to left in the ## columns of 'm', the subcripts are increasingly repeated. diff --git a/R/simul.summaries.R b/R/simul.summaries.R index 42e8fdc..ab7cf58 100644 --- a/R/simul.summaries.R +++ b/R/simul.summaries.R @@ -72,7 +72,7 @@ aggregate.portfolio <- function(x, by = names(x$nodes), FUN = sum, frequency.portfolio <- function(x, by = names(x$nodes), classification = TRUE, prefix = NULL, ...) { - freq <- function(x) if (identical(x, NA)) NA else length(x) + freq <- function(x) if (identical(x, NA)) NA else length(x[!is.na(x)]) aggregate(x, by, freq, classification, prefix) } diff --git a/demo/credibility.R b/demo/credibility.R index b3e16b9..ee05ce4 100644 --- a/demo/credibility.R +++ b/demo/credibility.R @@ -30,13 +30,6 @@ fit <- cm(~state, hachemeister, ratios = ratio.1:ratio.12, summary(fit) predict(fit) -## Fitting of a Hachemeister regression model. This requires to -## specify a vector or matrix of regressors with argument 'xreg'. -fit <- cm(~state, hachemeister, ratios = ratio.1:ratio.12, - weights = weight.1:weight.12, xreg = 12:1) -summary(fit, newdata = 0) # 'newdata' is future value of regressor -predict(fit, newdata = 0) - ## Simulation of a three level hierarchical portfolio. nodes <- list(sector = 2, unit = c(3, 4), contract = c(10, 5, 8, 5, 7, 11, 4), year = 6) @@ -64,3 +57,16 @@ predict(fit) # credibility premiums predict(fit, levels = "unit") # unit credibility premiums only summary(fit) # portfolio summary summary(fit, levels = "unit") # unit portfolio summary only + +## Fitting of Hachemeister regression model with intercept at time origin. +fit <- cm(~state, hachemeister, ratios = ratio.1:ratio.12, + weights = weight.1:weight.12, regformula = ~time, + regdata = data.frame(time = 1:12)) +summary(fit, newdata = data.frame(time = 13)) # 'newdata' is the future value of regressor +predict(fit, newdata = data.frame(time = 13)) + +## Position the intercept at the barycenter of time. +fit <- cm(~state, hachemeister, ratios = ratio.1:ratio.12, + weights = weight.1:weight.12, regformula = ~time, + regdata = data.frame(time = 1:12), adj.intercept = TRUE) +summary(fit, newdata = data.frame(time = 13)) diff --git a/demo/lossdist.R b/demo/lossdist.R index 4ef050c..cfa399e 100644 --- a/demo/lossdist.R +++ b/demo/lossdist.R @@ -222,6 +222,12 @@ knots(Fnt) # group boundaries Fnt(knots(Fnt)) # ogive at group boundaries plot(Fnt) # plot of the ogive +## A method of 'quantile' for grouped data objects computes linearly +## smoothed quantiles from a sample, that is the inverse of the ogive +## in various points. +quantile(x) +Fnt(ogive(x)) + ### ### EMPIRICAL MOMENTS CALCULATION diff --git a/inst/NEWS b/inst/NEWS index 7a5a327..19fe842 100644 --- a/inst/NEWS +++ b/inst/NEWS @@ -1,5 +1,55 @@ === actuar: an R package for Actuarial Science === +Version 1.0-0 +============= + +NEW FEATURES + + o Improved support for regression credibility models. There is now + an option to make the computations with the intercept at the + barycenter of time. This assures that the credibility adjusted + regression line (or plane, or ...) lies between the individual and + collective ones. In addition, contracts without data are now + supported like in other credibility models. + + o Argument 'right' for grouped.data() to allow intervals closed on + the right (default) or on the left. + + o Method of quantile() for grouped data objects to compute the + inverse of the ogive. + +USER-VISIBLE CHANGES + + o cm() no longer returns the values of the unbiased estimators when + method = "iterative". + + o Specification of regression models in cm() has changed: one should + now provide the regression model as a formula and the regressors + in a separate matrix or data frame. + + o Due to above change, predict.cm() now expects 'newdata' to be a + data frame as for stats:::predict.lm(). + + o Function bstraub() is no longer exported. Users are expected to + use cm() as interface instead. + +BUG FIXES + + o Functions r() are now more consistent in warning when NA's + (specifically NaN's) are generated (as per the change in R 2.7.0). + + o frequency.portfolio was wrongly counting NAs. + + o Domain of pdfs returned by aggregateDist() now restricted to + [0, 1]. + + o Quantiles are now computed correctly (and more efficiently) in 0 + and 1 by quantile.aggregateDist(). + + o coverage() no longer requires a cdf when it is not needed, namely + when there is no deductible and no limit. + + Version 0.9-7 ============= diff --git a/inst/doc/.build.timestamp b/inst/doc/.build.timestamp new file mode 100644 index 0000000..e69de29 diff --git a/inst/doc/Rplots.pdf b/inst/doc/Rplots.pdf new file mode 100644 index 0000000..d0801f6 Binary files /dev/null and b/inst/doc/Rplots.pdf differ diff --git a/inst/doc/actuar.Rnw b/inst/doc/actuar.Rnw index de11e2f..7442619 100644 --- a/inst/doc/actuar.Rnw +++ b/inst/doc/actuar.Rnw @@ -1,7 +1,10 @@ \documentclass{article} + \usepackage{amsmath,color,hyperref} + \usepackage[round]{natbib} \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage[english]{babel} + \usepackage{lucidabr} \usepackage[noae]{Sweave} %\VignetteIndexEntry{Introduction to actuar} @@ -16,6 +19,23 @@ \newcommand{\proglang}[1]{\textsf{#1}} \newcommand{\pkg}[1]{\textbf{#1}} + \bibliographystyle{plainnat} + + \definecolor{Red}{rgb}{0.7,0,0} + \definecolor{Blue}{rgb}{0,0,0.8} + \hypersetup{% + hyperindex = {true}, + colorlinks = {true}, + linktocpage = {true}, + plainpages = {false}, + linkcolor = {Blue}, + citecolor = {Blue}, + urlcolor = {Red}, + pdfstartview = {Fit}, + pdfpagemode = {UseOutlines}, + pdfview = {XYZ null null null} + } + \begin{document} \maketitle @@ -23,16 +43,18 @@ \section{Introduction} \label{sec:introduction} -\pkg{actuar} is a package providing additional Actuarial Science -functionality to the \proglang{R} statistical system. Although various -packages on CRAN provide functions useful to actuaries, \pkg{actuar} -aims to serve as a central location for more specifically actuarial -functions and data sets. The project was officially launched in 2005 -and is under active development. +\pkg{actuar} \citep{actuar} is a package providing additional +Actuarial Science functionality to the \proglang{R} statistical +system. Although various packages on the Comprehensive \proglang{R} +Archive Network (CRAN) provide functions useful to actuaries, +\pkg{actuar} aims to serve as a central location for more specifically +actuarial functions and data sets. The project was officially launched +in 2005 and is under active development. -The feature set of the package can be split in four main categories: -loss distributions modeling, risk theory (including ruin theory), -simulation of compound hierarchical models and credibility theory. +The current feature set of the package can be split in four main +categories: loss distributions modeling, risk theory (including ruin +theory), simulation of compound hierarchical models and credibility +theory. As much as possible, the developers have tried to keep the ``user interface'' of the various functions of the package consistent. @@ -85,14 +107,16 @@ for information on how to cite the software. \section*{Acknowledgments} The package would not be at this stage of development without the -stimulating contribution of Sébastien Auclair, Louis-Philippe Pouliot -and Tommy Ouellet. +stimulating contribution of Sébastien Auclair, Xavier Milhaud, +Tommy Ouellet and Louis-Philippe Pouliot. This research benefited from financial support from the Natural Sciences and Engineering Research Council of Canada and from the \emph{Chaire d'actuariat} (Actuarial Science Foundation) of Université Laval. +\bibliography{actuar} + \end{document} %%% Local Variables: diff --git a/inst/doc/actuar.bib b/inst/doc/actuar.bib index 8991bde..6068870 100644 --- a/inst/doc/actuar.bib +++ b/inst/doc/actuar.bib @@ -4,16 +4,36 @@ @string{MVSVM @string{NAAJ = {North American Actuarial Journal}} -@Article{Beekman_68, - author = {Beekman, J. A.}, - title = {Collective risk results}, - journal = {Transactions of the Society of Actuaries}, - year = 1968, - volume = 20, - pages = {182-199}, +@Article{AsmussenRolski_91, + author = {Asmussen, S. and Rolski, T.}, + title = {Computational methods in risk theory: a + matrix-algorithmic approach}, + journal = IME, + year = 1991, + volume = 10, + pages = {259-274}, language = {english} } +@Article{BJ_87, + author = {Bühlmann, H. and Jewell, W. S.}, + title = {Hierarchical credibility revisited}, + year = 1987, + journal = MVSVM, + volume = 87, + pages = {35-54}, + language = {english} +} + +@Article{BS_70, + author = {Bühlmann, H. and Straub, E.}, + title = {Glaubgwürdigkeit für {S}chadensätze}, + year = 1970, + journal = MVSVM, + volume = 70, + pages = {111-133} +} + @InCollection{BeekmanFormula_EAS, author = {Kass, R.}, title = {Beekman's convolution formula}, @@ -26,77 +46,65 @@ @InCollection{BeekmanFormula_EAS language = {english} } -@Book{Neuts_81, - author = {Neuts, M. F.}, - title = {Matrix-geometric solutions in stochastic models: an - algorithmic approach}, - publisher = {Dover Publications}, - year = 1981, - isbn = {978-0-4866834-2-3}, +@Article{Beekman_68, + author = {Beekman, J. A.}, + title = {Collective risk results}, + journal = {Transactions of the Society of Actuaries}, + year = 1968, + volume = 20, + pages = {182-199}, language = {english} } -@Book{Daykin_et_al, - author = {Daykin, C.D. and Pentik{\"a}inen, T. and Pesonen, M.}, - title = {Practical Risk Theory for Actuaries}, - publisher = {Chapman \& Hall}, - year = 1994, - address = {London}, - isbn = {0-4124285-0-4}, +@Article{Buhlmann:regression:1997, + author = {Bühlmann, H. and Gisler, A.}, + title = {Credibility in the regression case revisited}, + journal = AB, + year = 1997, + volume = 27, + pages = {83-98}, language = {english} } -@InProceedings{Hachemeister_75, - author = {Hachemeister, C. A.}, - title = {Credibility for regression models with application - to trend}, - year = 1975, - booktitle = {Credibility, theory and applications}, - series = {Proceedings of the Berkeley actuarial research - conference on credibility}, - pages = {129-163}, - publisher = {Academic Press}, - address = {New York}, +@Article{Buhlmann_69, + author = {Bühlmann, H.}, + title = {Experience rating and credibility}, + year = 1969, + journal = AB, + volume = 5, + pages = {157-165}, language = {english} } -@Book{LossModels2e, - author = {Klugman, S. A. and Panjer, H. H. and Willmot, G.}, - title = {Loss Models: From Data to Decisions}, - edition = {Second}, - publisher = {Wiley}, - year = 2004, - address = {New York}, - isbn = {0-4712157-7-5}, +@Book{Buhlmann_Gisler, + author = {Bühlmann, H. and Gisler, A.}, + title = {A course in credibility theory and its applications}, + publisher = {Springer}, + year = 2005, + isbn = {3-5402575-3-5}, language = {english} } -@Article{cm, - author = {Goulet, V. and Ouellet, T.}, - title = {On estimation of variance components in hierarchical - credibility}, - year = 2008, - note = {In preparation}, - language = {english} +@Article{Centeno_02, + author = {Centeno, M. {d.} L.}, + title = {Measuring the effects of reinsurance by the + adjustment coefficient in the Sparre-Anderson model}, + journal = IME, + year = 2002, + volume = 30, + pages = {37-49} } -@Book{HoggKlugman, - author = {Hogg, R. V. and Klugman, S. A.}, - title = {Loss Distributions}, - publisher = {Wiley}, - year = 1984, - address = {New York}, - isbn = {0-4718792-9-0}, +@Book{Daykin_et_al, + author = {Daykin, C.D. and Pentikäinen, T. and Pesonen, M.}, + title = {Practical Risk Theory for Actuaries}, + publisher = {Chapman \& Hall}, + year = 1994, + address = {London}, + isbn = {0-4124285-0-4}, language = {english} } -@Manual{Matrix, - title = {Matrix: A Matrix package for R}, - author = {Bates, D. and Maechler, M.}, - year = {2007}, - note = {R package version 0.999375-3}, -} - @Book{DenuitCharpentier1, author = {Denuit, M. and Charpentier, A.}, title = {Math\'ematiques de l'assurance non-vie}, @@ -108,79 +116,48 @@ @Book{DenuitCharpentier1 language = {francais} } -@Book{LivreVert, - author = {Goovaerts, M. J. and Hoogstad, W. J.}, - title = {Credibility theory}, - series = {Surveys of actuarial studies}, - number = 4, - year = 1987, - publisher = {Nationale-Nederlanden N.V.}, - address = {Netherlands}, - language = {english} -} - -@Book{MASS, - author = {Venables, W. N. and Ripley, B. D.}, - title = {Modern applied statistics with {S}}, - publisher = {Springer}, - year = 2002, - edition = 4, - address = {New York}, - isbn = {0-3879545-7-0}, - language = {english} -} - -@Book{MART, - author = {Kaas, R. and Goovaerts, M. and Dhaene, J. and Denuit, M.}, - title = {Modern actuarial risk theory}, - publisher = {Kluwer {A}cademic {P}ublishers}, - year = 2001, - address = {Dordrecht}, - isbn = {0-7923763-6-6}, +@Book{Gerber_MRT, + author = {Gerber, H. U.}, + title = {An Introduction to Mathematical Risk Theory}, + publisher = {Huebner Foundation}, + year = 1979, + address = {Philadelphia}, language = {english} } -@Article{Buhlmann_69, - author = {B{\"u}hlmann, H.}, - title = {Experience rating and credibility}, - year = 1969, - journal = AB, - volume = 5, - pages = {157-165}, +@Article{Goulet:lossdist:2008, + author = {Goulet, V. and Pigeon, M.}, + title = {Statistical Modeling of Loss Distributions Using + \pkg{actuar}}, + journal = {R News}, + year = 2008, + volume = 8, + number = 1, + pages = {34-40}, + month = {May}, + url = {http://cran.r-project.org/doc/Rnews/Rnews_2008-1.pdf}, language = {english} } -@Article{Panjer_81, - author = {Panjer, H. H.}, - title = {Recursive evaluation of a family of compound distributions}, - journal = AB, - year = 1981, - volume = 12, - pages = {22-26}, +@Article{Goulet:simpf:2008, + author = {Goulet, V. and Pouliot, L.-P.}, + title = {Simulation of Compound Hierarchical Models in {R}}, + journal = NAAJ, + year = 2008, + note = {To appear}, language = {english} } -@Article{Jewell_75, - author = {Jewell, W. S.}, - title = {The use of collateral data in credibility theory: a - hierarchical model}, - year = 1975, - journal = {Giornale dell'Istituto Italiano degli Attuari}, - volume = 38, - pages = {1-16}, +@Article{Goulet_JAP, + author = {Goulet, V.}, + title = {Principles and application of credibility theory}, + year = 1998, + journal = {Journal of Actuarial Practice}, + volume = 6, + pages = {5-62}, language = {english} } -@Article{Centeno_02, - author = {Centeno, M. {d.} L.}, - title = {Measuring the effects of reinsurance by the - adjustment coefficient in the Sparre-Anderson model}, - journal = IME, - year = 2002, - volume = 30, - pages = {37-49} -} - @Article{Goulet_cfs, author = {Forgues, A. and Goulet, V. and Lu, J.}, title = {Credibility for severity revisited}, @@ -192,40 +169,49 @@ @Article{Goulet_cfs language = {english} } -@Manual{SuppDists, - title = {SuppDists: Supplementary distributions}, - author = {Wheeler, B.}, - year = {2006}, - note = {R package version 1.1-0}, - url = {http://www.bobwheeler.com/stat}, +@InProceedings{Hachemeister_75, + author = {Hachemeister, C. A.}, + title = {Credibility for Regression Models with Application + to Trend}, + year = 1975, + booktitle = {Credibility, theory and applications}, + series = {Proceedings of the berkeley Actuarial Research + Conference on Credibility}, + pages = {129-163}, + publisher = {Academic Press}, + address = {New York}, + language = {english} } -@Book{Buhlmann_Gisler, - author = {B{\"u}hlmann, H. and Gisler, A.}, - title = {A course in credibility theory and its applications}, - publisher = {Springer}, - year = 2005, - isbn = {3-5402575-3-5}, +@Book{HoggKlugman, + author = {Hogg, R. V. and Klugman, S. A.}, + title = {Loss Distributions}, + publisher = {Wiley}, + year = 1984, + address = {New York}, + isbn = {0-4718792-9-0}, language = {english} } -@Article{BJ_87, - author = {B{\"u}hlmann, H. and Jewell, W. S.}, - title = {Hierarchical credibility revisited}, - year = 1987, - journal = MVSVM, - volume = 87, - pages = {35-54}, +@Article{Jewell_75, + author = {Jewell, W. S.}, + title = {The use of collateral data in credibility theory: a + hierarchical model}, + year = 1975, + journal = {Giornale dell'Istituto Italiano degli Attuari}, + volume = 38, + pages = {1-16}, language = {english} } -@Article{Goulet_JAP, - author = {Goulet, V.}, - title = {Principles and application of credibility theory}, - year = 1998, - journal = {Journal of Actuarial Practice}, - volume = 6, - pages = {5-62}, +@Book{LivreVert, + author = {Goovaerts, M. J. and Hoogstad, W. J.}, + title = {Credibility theory}, + series = {Surveys of actuarial studies}, + number = 4, + year = 1987, + publisher = {Nationale-Nederlanden N.V.}, + address = {Netherlands}, language = {english} } @@ -239,6 +225,56 @@ @Book{LossModels language = {english} } +@Book{LossModels2e, + author = {Klugman, S. A. and Panjer, H. H. and Willmot, G.}, + title = {Loss Models: From Data to Decisions}, + edition = {Second}, + publisher = {Wiley}, + year = 2004, + address = {New York}, + isbn = {0-4712157-7-5}, + language = {english} +} + +@Book{MART, + author = {Kaas, R. and Goovaerts, M. and Dhaene, J. and Denuit, M.}, + title = {Modern actuarial risk theory}, + publisher = {Kluwer {A}cademic {P}ublishers}, + year = 2001, + address = {Dordrecht}, + isbn = {0-7923763-6-6}, + language = {english} +} + +@Book{MASS, + author = {Venables, W. N. and Ripley, B. D.}, + title = {Modern applied statistics with {S}}, + publisher = {Springer}, + year = 2002, + edition = 4, + address = {New York}, + isbn = {0-3879545-7-0}, + language = {english} +} + +@Manual{Matrix, + title = {Matrix: A Matrix package for \proglang{R}}, + author = {Bates, D. and Maechler, M.}, + year = 2008, + note = {\proglang{R} package version 0.999375-7}, + url = {http://cran.r-project.org/package=Matrix}, +} + +@Book{Neuts_81, + author = {Neuts, M. F.}, + title = {Matrix-geometric solutions in stochastic models: an + algorithmic approach}, + publisher = {Dover Publications}, + year = 1981, + isbn = {978-0-4866834-2-3}, + language = {english} +} + @Unpublished{Ohlsson, author = {Ohlsson, E.}, title = {Simplified estimation of structure parameters in @@ -249,32 +285,13 @@ @Unpublished{Ohlsson language = {english} } -@Book{Gerber_MRT, - author = {Gerber, H. U.}, - title = {An Introduction to Mathematical Risk Theory}, - publisher = {Huebner Foundation}, - year = 1979, - address = {Philadelphia}, - language = {english} -} - -@Article{BS_70, - author = {B{\"u}hlmann, H. and Straub, E.}, - title = {Glaubgw{\"u}rdigkeit f{\"u}r {S}chadens{\"a}tze}, - year = 1970, - journal = MVSVM, - volume = 70, - pages = {111-133} -} - -@Article{AsmussenRolski_91, - author = {Asmussen, S. and Rolski, T.}, - title = {Computational methods in risk theory: a - matrix-algorithmic approach}, - journal = IME, - year = 1991, - volume = 10, - pages = {259-274}, +@Article{Panjer_81, + author = {Panjer, H. H.}, + title = {Recursive evaluation of a family of compound distributions}, + journal = AB, + year = 1981, + volume = 12, + pages = {22-26}, language = {english} } @@ -288,3 +305,31 @@ @Article{Scollnik:2001:MCMC pages = {96-124}, language = {english} } + +@Manual{SuppDists, + title = {SuppDists: Supplementary distributions}, + author = {Wheeler, B.}, + year = 2008, + note = {\proglang{R} package version 1.1-2}, + url = {http://cran.r-project.org/package=SuppDists} +} + +@Article{actuar, + author = {Dutang, C and Goulet, V. and Pigeon, M.}, + title = {\pkg{actuar}: An {R} Package for Actuarial Science}, + journal = {Journal of Statistical Software}, + year = 2008, + volume = 25, + number = 7, + url = {http://www.actuar-project.org}, + language = {english} +} + +@Article{cm, + author = {Goulet, V. and Ouellet, T.}, + title = {On parameter estimation in hierarchical credibility}, + journal = AB, + year = 2008, + note = {Submitted for publication}, + language = {english} +} diff --git a/inst/doc/actuar.dvi b/inst/doc/actuar.dvi deleted file mode 100644 index 5e42b41..0000000 Binary files a/inst/doc/actuar.dvi and /dev/null differ diff --git a/inst/doc/actuar.pdf b/inst/doc/actuar.pdf index cbf8b1e..68ddd26 100644 Binary files a/inst/doc/actuar.pdf and b/inst/doc/actuar.pdf differ diff --git a/inst/doc/coverage.pdf b/inst/doc/coverage.pdf index d2ff438..48567e0 100644 Binary files a/inst/doc/coverage.pdf and b/inst/doc/coverage.pdf differ diff --git a/inst/doc/credibility.Rnw b/inst/doc/credibility.Rnw index e28b819..67ae464 100644 --- a/inst/doc/credibility.Rnw +++ b/inst/doc/credibility.Rnw @@ -14,6 +14,7 @@ \title{Credibility theory features of \pkg{actuar}} \author{Christophe Dutang \\ ISFA, Université Claude Bernard Lyon 1 \\[3ex] Vincent Goulet \\ École d'actuariat, Université Laval \\[3ex] + Xavier Milhaud \\ ISFA, Université Claude Bernard Lyon 1 \\[3ex] Mathieu Pigeon \\ École d'actuariat, Université Laval} \date{} @@ -25,6 +26,12 @@ \newcommand{\code}[1]{\texttt{#1}} \newcommand{\pt}{{\scriptscriptstyle \Sigma}} + %% Redefine Sweave environments with smaller font + \RecustomVerbatimEnvironment{Sinput}{Verbatim}{% + fontshape=sl,fontsize=\small,xleftmargin=0pt} + \RecustomVerbatimEnvironment{Soutput}{Verbatim}{% + fontsize=\small,xleftmargin=0pt} + \bibliographystyle{plainnat} \definecolor{Red}{rgb}{0.7,0,0} @@ -44,7 +51,7 @@ <>= library(actuar) -options(width = 62, digits = 4) +options(width = 68, digits = 4) @ \begin{document} @@ -60,13 +67,13 @@ among a heterogeneous group of policyholders (henceforth called applicable in any setting where repeated measures are made for subjects with different risk levels. -The credibility theory facilities of \pkg{actuar} consist of matrix +The credibility theory features of \pkg{actuar} consist of matrix \code{hachemeister} containing the famous data set of -\cite{Hachemeister_75} and function \code{cm} to fit hierarchical and -regression credibility models. Furthermore, function \code{simul} can -simulate portfolios of data satisfying the assumptions of the -aforementioned credibility models; see the \code{"simulation"} -vignette for details. +\cite{Hachemeister_75} and function \code{cm} to fit hierarchical +(including Bühlmann, Bühlmann-Straub) and regression credibility +models. Furthermore, function \code{simul} can simulate portfolios of +data satisfying the assumptions of the aforementioned credibility +models; see the \code{"simulation"} vignette for details. \section{Hachemeister data set} @@ -97,9 +104,10 @@ Function \code{cm} acts as a unified interface for all credibility models supported by the package. Currently, these are the unidimensional models of \cite{Buhlmann_69} and \cite{BS_70}, the hierarchical model of \cite{Jewell_75} (of which the first two are -special cases) and the regression model of \cite{Hachemeister_75}. The -modular design of \code{cm} makes it easy to add new models if -desired. +special cases) and the regression model of \cite{Hachemeister_75}, +optionally with the intercept at the barycenter of time +\citep[Section~8.4]{Buhlmann_Gisler}. The modular design of \code{cm} +makes it easy to add new models if desired. This subsection concentrates on usage of \code{cm} for hierarchical models. @@ -111,9 +119,16 @@ estimators of the between variance structure parameters: the unbiased estimators of \cite{Buhlmann_Gisler} (the default), the slightly different version of \cite{Ohlsson} and the iterative pseudo-estimators as found in \cite{LivreVert} or \cite{Goulet_JAP}. -For instance, for a two-level hierarchical model like -\eqref{eq:hierarchical-model:2}, the best linear prediction for year -$n + 1$ based on ratios $X_{ijt} = S_{ijt}/w_{ijt}$ is + +Consider an insurance portfolio where contracts are classified into +cohorts. In our terminology, this is a two-level hierarchical +classification structure. The observations are claim amounts +$S_{ijt}$, where index $i = 1, \dots, I$ identifies the cohort, index +$j = 1, \dots, J_i$ identifies the contract within the cohort and +index $t = 1, \dots, n_{ij}$ identifies the period (usually a year). +To each data point corresponds a weight --- or volume -- $w_{ijt}$. +Then, the best linear prediction for the next period outcome of a +contract based on ratios $X_{ijt} = S_{ijt}/w_{ijt}$ is \begin{equation} \label{eq:hierarchical:premiums} \begin{split} @@ -121,16 +136,21 @@ $n + 1$ based on ratios $X_{ijt} = S_{ijt}/w_{ijt}$ is \hat{\pi}_i &= z_i X_{izw} + (1 - z_i) m \\ \end{split} \end{equation} -with +with the credibility factors \begin{align*} z_{ij} - &= \frac{w_{ij\pt}}{w_{ij\pt} + s^2/a}, & - X_{ijw} - &= \sum_{t = 1}^{n_{ij}} \frac{w_{ijt}}{w_{ij\pt}}\, X_{ijt} \\ - z_i + &= \frac{w_{ij\pt}}{w_{ijk\pt} + s^2/a}, & + w_{ij\pt} + &= \sum_{t = 1}^{n_{ij}} w_{ijt} \\ + z_{i} &= \frac{z_{i\pt}}{z_{i\pt} + a/b}, & - X_{izw} - &= \sum_{j = 1}^{J_i} \frac{z_{ij}}{z_{i\pt}}\, X_{ijw}. + z_{i\pt} + &= \sum_{j = 1}^{J_i} z_{ij} +\end{align*} +and the weighted averages +\begin{align*} + X_{ijw} &= \sum_{t = 1}^{n_{ij}} \frac{w_{ijt}}{w_{ij\pt}}\, X_{ijt} \\ + X_{izw} &= \sum_{j = 1}^{J_i} \frac{z_{ij}}{z_{i\pt}}\, X_{ijw}. \end{align*} The estimator of $s^2$ is @@ -209,7 +229,7 @@ One will recognize the output format of \code{simul} and its summary methods. Then, function \code{cm} works much the same as \code{lm}. It takes in -argument a formula of the form \code{\~{} terms} describing the +argument: a formula of the form \code{\~{} terms} describing the hierarchical interactions in a data set; the data set containing the variables referenced in the formula; the names of the columns where the ratios and the weights are to be found in the data set. The @@ -304,25 +324,103 @@ weighted average of an entity's regression parameters and the group's parameters. In order to use \code{cm} to fit a credibility regression model to a -data set, one has to specify a vector or matrix of regressors by means -of argument \code{xreg}. For example, fitting the model +data set, one simply has to supply as additional arguments +\code{regformula} and \code{regdata}. The first one is a formula of +the form \code{\~{} terms} describing the regression model and the +second is a data frame of regressors. That is, arguments +\code{regformula} and \code{regdata} are in every respect equivalent +to arguments \code{formula} and \code{data} of \code{lm}, with the +minor difference that \code{regformula} does not need to have a left +hand side (and is ignored if present). For example, fitting the model \begin{displaymath} - X_{it} = \beta_0 + \beta_1 (12 - t) + \varepsilon_t, \quad + X_{it} = \beta_0 + \beta_1 t + \varepsilon_t, \quad t = 1, \dots, 12 \end{displaymath} -to the original data set of \citeauthor{Hachemeister_75} is done with +to the original data set of \cite{Hachemeister_75} is done with <>= -fit <- cm(~state, hachemeister, xreg = 12:1, +fit <- cm(~state, hachemeister, + regformula = ~ time, regdata = data.frame(time = 1:12), ratios = ratio.1:ratio.12, weights = weight.1:weight.12) fit @ Computing the credibility premiums requires to give the ``future'' -values of the regressors as in \code{predict.lm}, although with a -simplified syntax for the one regressor case: +values of the regressors as in \code{predict.lm}: +<>= +predict(fit, newdata = data.frame(time = 13)) +@ + +It is well known that the basic regression model has a major drawback: +there is no guarantee that the credibility regression line will lie +between the collective and individual ones. This may lead to grossly +inadequate premiums, as Figure~\ref{fig:state4} shows. + +\begin{figure}[t] + \centering +<>= +plot(NA, xlim = c(1, 13), ylim = c(1000, 2000), xlab = "", ylab = "") +x <- cbind(1, 1:12) +lines(1:12, x %*% fit$means$portfolio, + col = "blue", lwd = 2) +lines(1:12, x %*% fit$means$state[, 4], + col = "red", lwd = 2, lty = 2) +lines(1:12, x %*% coefficients(fit$adj.models[[4]]), + col = "darkgreen", lwd = 2, lty = 3) +points(13, predict(fit, newdata = data.frame(time = 13))[4], + pch = 8, col = "darkgreen") +legend("bottomright", + legend = c("collective", "individual", "credibility"), + col = c("blue", "red", "darkgreen"), lty = 1:3) +@ + \caption{Collective, individual and credibility regression lines for + State 4 of the Hachemeister data set. The point indicates the + credibility premium.} + \label{fig:state4} +\end{figure} + +The solution proposed by \cite{Buhlmann:regression:1997} is simply to +position the intercept at the barycenter of time instead of at time +origin \citep[see also][Section~8.4]{Buhlmann_Gisler}. In mathematical +terms, this essentially amounts to using an orthogonal design matrix. +By setting the argument \code{adj.intercept} to \code{TRUE} in the +call, \code{cm} will automatically fit the credibility regression +model with the intercept at the barycenter of time. The resulting +regression coefficients have little meaning, but the predictions are +sensible: <>= -predict(fit, newdata = 0) # future value of regressor +fit2 <- cm(~state, hachemeister, + regformula = ~ time, regdata = data.frame(time = 1:12), + adj.intercept = TRUE, + ratios = ratio.1:ratio.12, weights = weight.1:weight.12) +summary(fit2, newdata = data.frame(time = 13)) +@ % +Figure~\ref{fig:state4:2} shows the beneficient effect of the +intercept adjustment on the premium of State 4. + +\begin{figure}[t] + \centering +<>= +plot(NA, xlim = c(1, 13), ylim = c(1000, 2000), xlab = "", ylab = "") +x <- cbind(1, 1:12) +R <- fit2$transition +lines(1:12, x %*% solve(R, fit2$means$portfolio), + col = "blue", lwd = 2) +lines(1:12, x %*% solve(R, fit2$means$state[, 4]), + col = "red", lwd = 2, lty = 2) +lines(1:12, x %*% solve(R, coefficients(fit2$adj.models[[4]])), + col = "darkgreen", lwd = 2, lty = 3) +points(13, predict(fit2, newdata = data.frame(time = 13))[4], + pch = 8, col = "darkgreen") +legend("bottomright", + legend = c("collective", "individual", "credibility"), + col = c("blue", "red", "darkgreen"), lty = 1:3) @ + \caption{Collective, individual and credibility regression lines for + State 4 of the Hachemeister data set when the intercept is + positioned at the barycenter of time. The point indicates the + credibility premium.} + \label{fig:state4:2} +\end{figure} \bibliography{actuar} diff --git a/inst/doc/credibility.pdf b/inst/doc/credibility.pdf index d370c7d..6a4d4dc 100644 Binary files a/inst/doc/credibility.pdf and b/inst/doc/credibility.pdf differ diff --git a/inst/doc/lossdist.Rnw b/inst/doc/lossdist.Rnw index d7db06c..01d0b3f 100644 --- a/inst/doc/lossdist.Rnw +++ b/inst/doc/lossdist.Rnw @@ -17,9 +17,8 @@ Mathieu Pigeon \\ École d'actuariat, Université Laval} \date{} - %% Some new commands. Commands \E and \VAR redefined to fit within - %% the author's writing habits... - \newcommand{\E}[1]{\mathrm{E}[ #1 ]} + %% Some new commands + \newcommand{\E}[1]{E[ #1 ]} \newcommand{\VAR}[1]{\mathrm{Var} [ #1 ]} \newcommand{\LAS}{\mathrm{LAS}} \newcommand{\mat}[1]{\mathbf{#1}} @@ -27,6 +26,12 @@ \newcommand{\pkg}[1]{\textbf{#1}} \newcommand{\code}[1]{\texttt{#1}} + %% Redefine Sweave environments with smaller font + \RecustomVerbatimEnvironment{Sinput}{Verbatim}{% + fontshape=sl,fontsize=\small,xleftmargin=0pt} + \RecustomVerbatimEnvironment{Soutput}{Verbatim}{% + fontsize=\small,xleftmargin=0pt} + \bibliographystyle{plainnat} \definecolor{Red}{rgb}{0.7,0,0} @@ -56,14 +61,15 @@ options(width = 62, digits = 4) \section{Introduction} \label{sec:introduction} -One important task of actuaries is the modeling of claim amounts +One important task of actuaries is the modeling of claim amount distributions for ratemaking, loss reserving or other risk evaluation -purposes. Package \pkg{actuar} offers many functions -related to loss distributions modeling. The -following subsections detail the following \pkg{actuar} features: +purposes. Package \pkg{actuar} \citep{actuar} offers many functions +for loss distributions modeling. The following subsections detail the +following \pkg{actuar} features: \begin{enumerate} -\item introduction of 18 additional probability laws and functions to - get raw moments, limited moments and the moment generating function; +\item introduction of 18 additional probability laws and utility + functions to get raw moments, limited moments and the moment + generating function; \item fairly extensive support of grouped data; \item calculation of the empirical raw and limited moments; \item minimum distance estimation using three different measures; @@ -78,14 +84,15 @@ following subsections detail the following \pkg{actuar} features: density function (pdf), the cumulative distribution function (cdf) and the quantile function of a fair number of probability laws, as well as functions to generate variates from these laws. For some root -\code{foo}, the functions are named \code{dfoo}, \code{pfoo}, +\code{foo}, the utility functions are named \code{dfoo}, \code{pfoo}, \code{qfoo} and \code{rfoo}, respectively. The \pkg{actuar} package provides \code{d}, \code{p}, \code{q} and \code{r} functions for all the probability laws useful for loss severity modeling found in Appendix A of \cite{LossModels2e} and not -already present in base R, excluding the inverse Gaussian and log-$t$ -but including the loggamma distribution \citep{HoggKlugman}. +already present in base \proglang{R}, excluding the inverse Gaussian +and log-$t$ but including the loggamma distribution +\citep{HoggKlugman}. Table \ref{tab:distributions} lists the supported distributions as named in \cite{LossModels2e} along with the root names of the @@ -100,9 +107,6 @@ gamma, exponential and Weibull distributions in base \proglang{R}. \begin{table} \centering - \caption{Probability laws supported by \pkg{actuar} classified by - family and root names of the \proglang{R} functions.} - \label{tab:distributions} \begin{tabular}{lll} \toprule Family & Distribution & Root \\ @@ -128,6 +132,9 @@ gamma, exponential and Weibull distributions in base \proglang{R}. & Generalized beta & \code{genbeta} \\ \bottomrule \end{tabular} + \caption{Probability laws supported by \pkg{actuar} classified by + family and root names of the \proglang{R} functions.} + \label{tab:distributions} \end{table} In addition to the \code{d}, \code{p}, \code{q} and \code{r} @@ -140,7 +147,7 @@ functions to compute, respectively, theoretical raw moments theoretical limited moments \begin{equation} \label{eq:def:limited-moment} - \E{(X \wedge x)^k} = \E{(\min{X, x})^k} + \E{(X \wedge x)^k} = \E{\min(X, x)^k} \end{equation} and the moment generating function \begin{equation} @@ -149,13 +156,13 @@ and the moment generating function \end{equation} when it exists. Every probability law of Table \ref{tab:distributions} is supported, plus the following ones: beta, exponential, chi-square, -gamma, lognormal, normal (except \code{lev}), uniform and Weibull of -base \proglang{R} and the inverse Gaussian distribution of package +gamma, lognormal, normal (no \code{lev}), uniform and Weibull of base +\proglang{R} and the inverse Gaussian distribution of package \pkg{SuppDists} \citep{SuppDists}. The \code{m} and \code{lev} functions are especially useful with estimation methods based on the -matching of raw or limited moments; see Subsection -\ref{sec:empirical-moments} for their empirical counterparts. The -\code{mgf} functions come in handy to compute the adjustment +matching of raw or limited moments; see +Section~\ref{sec:empirical-moments} for their empirical counterparts. +The \code{mgf} functions come in handy to compute the adjustment coefficient in ruin theory; see the \code{"risk"} vignette. In addition to the 17 distributions of Table \ref{tab:distributions}, @@ -183,8 +190,8 @@ random variable with parameters $\pmb{\pi}$ and $\mat{T}$ is \label{eq:cdf-phtype} F(x) = \begin{cases} - 1 - \pmb{\pi} e^{\mat{T} x} \mat{e}, & x > 0 \\ - \pi_{m + 1}, & x = 0, + \pi_{m + 1}, & x = 0, \\ + 1 - \pmb{\pi} e^{\mat{T} x} \mat{e}, & x > 0 \end{cases} \end{equation} where @@ -192,7 +199,7 @@ where \label{eq:matrix-exponential} e^{\mat{M}} = \sum_{n = 0}^\infty \frac{\mat{M}^n}{n!} \end{equation} -is the \emph{matrix exponential} of matrix $\mat{M}$. +is the matrix exponential of matrix $\mat{M}$. The exponential, the Erlang (gamma with integer shape parameter) and discrete mixtures thereof are common special cases of phase-type @@ -217,8 +224,8 @@ Grouped data is data represented in an interval-frequency manner. Typically, a grouped data set will report that there were $n_j$ claims in the interval $(c_{j - 1}, c_j]$, $j = 1, \dots, r$ (with the possibility that $c_r = \infty$). This representation is much more -compact than an individual data set (where the value of each claim is -known), but it also carries far less information. Now that storage +compact than an individual data set --- where the value of each claim is +known --- but it also carries far less information. Now that storage space in computers has almost become a non issue, grouped data has somewhat fallen out of fashion. @@ -229,7 +236,7 @@ grouped data. A standard storage method is needed since there are many ways to represent grouped data in the computer: using a list or a matrix, aligning the $n_j$s with the $c_{j - 1}$s or with the $c_j$s, omitting $c_0$ or not, etc. Moreover, with appropriate extraction, -replacement and summary functions, manipulation of grouped data +replacement and summary methods, manipulation of grouped data becomes similar to that of individual data. First, function \code{grouped.data} creates a grouped data object @@ -308,8 +315,8 @@ x[c(3, 4), 1] <- c(55, 110, 160); x It is not possible to replace the boundaries and the frequencies simultaneously. -Finally, the package defines methods of a few existing summary -functions for grouped data objects. Computing the mean +The package defines methods of a few existing summary functions for +grouped data objects. Computing the mean \begin{equation} \sum_{j = 1}^r \left( \frac{c_{j - 1} + c_j}{2} \right) n_j \end{equation} @@ -318,7 +325,7 @@ is made simple with a method for the \code{mean} function: mean(x) @ Higher empirical moments can be computed with \code{emm}; see -Subsection \ref{sec:empirical-moments}. +Section~\ref{sec:empirical-moments}. The \proglang{R} function \code{hist} splits individual data into groups and draws an histogram of the frequency distribution. The @@ -381,6 +388,15 @@ plot(Fnt) \label{fig:ogive} \end{figure} +Finally, a method of function \code{quantile} for grouped data objects +returns linearly smoothed quantiles, that is the inverse of the ogive +evaluated at various points: +<>= +quantile(x) +Fnt(quantile(x)) +@ + + \section{Data sets} \label{sec:data-sets} @@ -389,8 +405,8 @@ but it remains useful for illustrations and examples: the package includes the individual dental claims and grouped dental claims data of \cite{LossModels2e}: <>= -data(dental); dental -data(gdental); gdental +data("dental"); dental +data("gdental"); gdental @ \section{Calculation of empirical moments} @@ -479,7 +495,7 @@ distance minimization methods. \end{equation} where $\LAS(x, y) = \E{X \wedge y} - \E{X \wedge x}$, % $\tilde{\LAS}_n(x, y) = \tilde{E}_n[X \wedge y] - \tilde{E}_n[X - \wedge x]$, and $\tilde{E}_n[X \wedge x]$ is the empirical limited + \wedge x]$ and $\tilde{E}_n[X \wedge x]$ is the empirical limited expected value for grouped data. \end{enumerate} @@ -548,55 +564,54 @@ terms]{LossModels2e}. Table \ref{tab:coverage} summarizes the definitions of $Y^L$ and $Y^P$. \begin{table} - \begin{center} - \caption{Coverage modifications for per-loss variable ($Y^L$) and - per-payment variable ($Y^P$) as defined in \cite{LossModels2e}.} - \label{tab:coverage} -\begin{tabular}{lll} - \toprule - Coverage modification & Per-loss variable ($Y^L$) & - Per-payment variable ($Y^P$)\\ - \midrule - Ordinary deductible ($d$) & - $\begin{cases} - 0, & X \leq d \\ - X - d, & X > d - \end{cases}$ & - $\begin{cases} - X - d, & X > d - \end{cases}$ \medskip \\ - Franchise deductible ($d$) & - $\begin{cases} - 0, & X \leq d \\ - X, & X > d - \end{cases}$ & - $\begin{cases} - X, & X > d - \end{cases} $ \medskip \\ - Limit ($u$) & - $\begin{cases} - X, & X \leq u \\ - u, & X > u - \end{cases}$ & - $\begin{cases} X, & X \leq u \\ - u, & X > u - \end{cases}$ \bigskip \\ - Coinsurance ($\alpha$) & $\alpha X$ & $\alpha X$ \medskip \\ - Inflation ($r$) & $(1 + r)X$ & $(1 + r)X$ \\ - \bottomrule - \end{tabular} - \end{center} + \centering + \begin{tabular}{lll} + \toprule + Coverage modification & Per-loss variable ($Y^L$) & + Per-payment variable ($Y^P$)\\ + \midrule + Ordinary deductible ($d$) & + $\begin{cases} + 0, & X \leq d \\ + X - d, & X > d + \end{cases}$ & + $\begin{cases} + X - d, & X > d + \end{cases}$ \medskip \\ + Franchise deductible ($d$) & + $\begin{cases} + 0, & X \leq d \\ + X, & X > d + \end{cases}$ & + $\begin{cases} + X, & X > d + \end{cases} $ \medskip \\ + Limit ($u$) & + $\begin{cases} + X, & X \leq u \\ + u, & X > u + \end{cases}$ & + $\begin{cases} X, & X \leq u \\ + u, & X > u + \end{cases}$ \bigskip \\ + Coinsurance ($\alpha$) & $\alpha X$ & $\alpha X$ \medskip \\ + Inflation ($r$) & $(1 + r)X$ & $(1 + r)X$ \\ + \bottomrule + \end{tabular} + \caption{Coverage modifications for per-loss variable ($Y^L$) and + per-payment variable ($Y^P$) as defined in \cite{LossModels2e}.} + \label{tab:coverage} \end{table} -Often, one will want to use data $Y^L_1, \dots, Y^L_n$ (or $Y^P_1, -\dots, Y^P_n$) from the random variable $Y^L$ (or $Y^P$) to fit a -model on the unobservable random variable $X$. This requires -expressing the pdf or cdf of $Y^L$ (or $Y^P$) in terms of the pdf or -cdf of $X$. Function \code{coverage} of \pkg{actuar} does just that: -given a pdf or cdf and any combination of the coverage modifications -mentioned above, \code{coverage} returns a function object to compute -the pdf or cdf of the modified random variable. The function can then -be used in modeling like any other \code{dfoo} or \code{pfoo} -function. + +Often, one will want to use data $Y^P_1, \dots, Y^P_n$ (or $Y^L_1, +\dots, Y^L_n$) from the random variable $Y^P$ ($Y^L$) to fit a model +on the unobservable random variable $X$. This requires expressing the +pdf or cdf of $Y^P$ ($Y^L$) in terms of the pdf or cdf of $X$. +Function \code{coverage} of \pkg{actuar} does just that: given a pdf +or cdf and any combination of the coverage modifications mentioned +above, \code{coverage} returns a function object to compute the pdf or +cdf of the modified random variable. The function can then be used in +modeling like any other \code{dfoo} or \code{pfoo} function. For example, let $Y^P$ represent the amount paid by an insurer for a policy with an ordinary deductible $d$ and a limit $u - d$ (or maximum @@ -623,7 +638,7 @@ and its pdf is Assume $X$ has a gamma distribution. Then an \proglang{R} function to compute the pdf \eqref{eq:pdf-YP} in any $y$ for a deductible $d = 1$ and a limit $u = 10$ is obtained with \code{coverage} as follows: -<>= +<>= f <- coverage(pdf = dgamma, cdf = pgamma, deductible = 1, limit = 10) f f(0, shape = 5, rate = 1) @@ -633,11 +648,24 @@ f(12, shape = 5, rate = 1) @ Note how function \code{f} is built specifically for the coverage -modifications submitted and contains no useless code. For comparison -purpose, the following function contains no deductible and no limit: +modifications submitted and contains as little useless code as +possible. + +Let object \code{y} contain a sample of claims amounts from policies +with the above deductible and limit. Then one can fit a gamma +distribution by maximum likelihood to the claim severity process as +follows: +<>= +x <- rgamma(100, 2, 0.5) +y <- pmin(x[x > 1], 9) +op <- options(warn = -1) # hide warnings from fitdistr() +@ <>= -g <- coverage(dgamma, pgamma) -g +library(MASS) +fitdistr(y, f, start = list(shape = 2, rate = 0.5)) +@ +<>= +options(op) # restore warnings @ The vignette \code{"coverage"} contains more detailed pdf and cdf diff --git a/inst/doc/lossdist.dvi b/inst/doc/lossdist.dvi deleted file mode 100644 index c0a8389..0000000 Binary files a/inst/doc/lossdist.dvi and /dev/null differ diff --git a/inst/doc/lossdist.pdf b/inst/doc/lossdist.pdf index ea24aa5..54891d4 100644 Binary files a/inst/doc/lossdist.pdf and b/inst/doc/lossdist.pdf differ diff --git a/inst/doc/risk.Rnw b/inst/doc/risk.Rnw index 7f71ce5..2df7a7d 100644 --- a/inst/doc/risk.Rnw +++ b/inst/doc/risk.Rnw @@ -19,7 +19,7 @@ \date{} %% Some new commands - \newcommand{\E}[1]{\mathrm{E}[ #1 ]} + \newcommand{\E}[1]{E[ #1 ]} \newcommand{\VAR}[1]{\mathrm{Var} [ #1 ]} \newcommand{\VaR}{\mathrm{VaR}} \newcommand{\CTE}{\mathrm{CTE}} @@ -28,6 +28,12 @@ \newcommand{\pkg}[1]{\textbf{#1}} \newcommand{\code}[1]{\texttt{#1}} + %% Redefine Sweave environments with smaller font + \RecustomVerbatimEnvironment{Sinput}{Verbatim}{% + fontshape=sl,fontsize=\small,xleftmargin=0pt} + \RecustomVerbatimEnvironment{Soutput}{Verbatim}{% + fontsize=\small,xleftmargin=0pt} + \bibliographystyle{plainnat} \definecolor{Red}{rgb}{0.7,0,0} @@ -70,17 +76,17 @@ insurance company occurs. The interested reader can read more on these subjects in \cite{LossModels2e,Gerber_MRT,DenuitCharpentier1,MART}, among others. -The current version of \pkg{actuar} contains four visible functions -related to the above problems: two for the calculation of the -aggregate claim amount distribution and two for ruin probability -calculations. +The current version of \pkg{actuar} \citep{actuar} contains four +visible functions related to the above problems: two for the +calculation of the aggregate claim amount distribution and two for +ruin probability calculations. \section{The collective risk model} \label{sec:collective-risk-model} -Let the random variable $S$ represent the aggregate claim amount (or +Let random variable $S$ represent the aggregate claim amount (or total amount of claims) of a portfolio of independent risks over a fixed period of time, random variable $N$ represent the number of claims (or frequency) in the portfolio over that period, and random @@ -91,7 +97,7 @@ we have the random sum S = C_1 + \dots + C_N, \end{equation} where we assume that $C_1, C_2, \dots$ are mutually independent and -identically distributed random variables each independent from $N$. +identically distributed random variables each independent of $N$. The task at hand consists in evaluating numerically the cdf of $S$, given by \begin{align} @@ -121,7 +127,7 @@ is the $n$-fold convolution of $F_C(\cdot)$. If $C$ is discrete on $0, \label{sec:discretization} Some numerical techniques to compute the aggregate claim amount -distribution (see Subsection \ref{sec:aggregate}) require a +distribution (see Section~\ref{sec:aggregate}) require a discrete arithmetic claim amount distribution; that is, a distribution defined on $0, h, 2h, \dots$ for some step (or span, or lag) $h$. The package provides function \code{discretize} to discretize a @@ -250,9 +256,9 @@ supported. $0, 1, 2, \dots, m$ for some monetary unit and the frequency distribution to be a member of either the $(a, b, 0)$ or $(a, b, 1)$ family of distributions \citep{LossModels2e}. (These families - contain the Poisson, binomial and negative binomial distributions - and their extensions with an arbitrary mass at $x = 0$.) The general - recursive formula is: + contain the Poisson, binomial, negative binomial and logarithmic + distributions and their extensions with an arbitrary mass at $x = + 0$.) The general recursive formula is: \begin{displaymath} f_S(x) = \frac{(p_1 - (a + b)p_0)f_C(x) + \sum_{y=1}^{\min(x, m)}(a + by/x)f_C(y)f_S(x - y)}{1 - a f_C(0)}, @@ -268,11 +274,12 @@ supported. this amount each time the allocated space gets full. \item Exact calculation by numerical convolutions using - \eqref{eq:cdf-S} and \eqref{eq:convolution-formula}. Hence, this + \eqref{eq:cdf-S} and \eqref{eq:convolution-formula}. This also requires a discrete severity distribution. However, there is no restriction on the shape of the frequency distribution. The package merely implements the sum \eqref{eq:cdf-S}, the convolutions being - computed with \proglang{R}'s function \code{convolve}. This approach + computed with \proglang{R}'s function \code{convolve}, which in turn + uses the Fast Fourier Transform. This approach is practical for small problems only, even on today's fast computers. \item Normal approximation of the cdf, that is @@ -300,10 +307,10 @@ supported. approximation is valid for $x > \mu_S$ only and performs reasonably well when $\gamma_S < 1$. See \cite{Daykin_et_al} for details. \item Simulation of a random sample from $S$ and approximation of - $F_S(x)$ by the empirical cdf. The simulation itself is done with - function \code{simul} (see the \code{"simulation"} vignette)). - This function admits very general hierarchical models for both the - frequency and the severity components. + $F_S(x)$ by the empirical cdf \eqref{eq:ecdf}. The simulation itself + is done with function \code{simul} (see the \code{"simulation"} + vignette)). This function admits very general hierarchical models + for both the frequency and the severity components. \end{enumerate} Here also, adding other methods to \code{aggregateDist} is simple due @@ -364,8 +371,8 @@ quantile(Fs, 0.999) # quantiles @ Second, the package introduces the generic functions \code{VaR} and -\code{CTE} with methods for \code{"aggregateDist"} objects. The -former computes the value-at-risk $\VaR_\alpha$ such that +\code{CTE} with methods for objects of class \code{"aggregateDist"}. +The former computes the value-at-risk $\VaR_\alpha$ such that \begin{equation} \label{eq:VaR} \Pr[S \leq \VaR_\alpha] = \alpha, @@ -377,7 +384,7 @@ computes the conditional tail expectation \label{eq:CTE} \CTE_\alpha = \E{S|S > \VaR_\alpha}. \end{equation} -Here are examples: +Here are examples using object \code{Fs} obtained above: <>= VaR(Fs) CTE(Fs) @@ -442,7 +449,7 @@ the infinite time probability of ruin is \psi(u) = \Pr[U(t) < 0 \text{ for some } t \geq 0]. \end{equation} -We define some other quantities of interest in the sequel. Let $N(t)$ +We define some other quantities needed in the sequel. Let $N(t)$ denote the number of claims up to time $t \geq 0$ and $C_j$ denote the amount of claim $j$. Then the definition of $S(t)$ is analogous to \eqref{eq:definition-S}: @@ -459,12 +466,12 @@ time between claim $j - 1$ and claim $j$ is defined as $W_1 = T_1$ and W_j = T_j - T_{j - 1}, \quad j \geq 2. \end{equation} -For the rest of this discussion, we make the following assumptions +For the rest of this discussion, we make the following assumptions: \begin{enumerate} \item premiums are collected at a constant rate $c$, hence $c(t) = ct$; \item the sequence $\{T_j\}_{j \geq 1}$ forms an ordinary renewal - process, with the consequence that the random variables $W_1, W_2, + process, with the consequence that random variables $W_1, W_2, \dots$ are independent and identically distributed; \item claim amounts $C_1, C_2, \dots$ are independent and identically distributed. @@ -475,11 +482,11 @@ For the rest of this discussion, we make the following assumptions \section{Adjustment coefficient} \label{sec:adjustment-coefficient} -The quantity known as the adjustment coefficient $R$ hardly has any +The quantity known as the adjustment coefficient $\rho$ hardly has any physical interpretation, but it is useful as an approximation to the probability of ruin since we have the inequality \begin{displaymath} - \psi(u) \leq e^{-R u}, \quad u \geq 0. + \psi(u) \leq e^{-\rho u}, \quad u \geq 0. \end{displaymath} The adjustment coefficient is defined as the smallest strictly positive solution (if it exists) of the Lundberg equation @@ -496,17 +503,18 @@ most common models, then the equation can be rewritten as \end{equation} Function \code{adjCoef} of \pkg{actuar} computes the adjustment -coefficient $R$ from the following arguments: either the two moment +coefficient $\rho$ from the following arguments: either the two moment generating functions $M_C(t)$ and $M_W(t)$ (thereby assuming independence) or else function $h(t)$; the premium rate $c$; the upper -bound of the support of $M_C(t)$ or any other upper bound for $R$. +bound of the support of $M_C(t)$ or any other upper bound for $\rho$. For example, if $W$ and $C$ are independent, $W \sim \text{Exponential}(2)$, $C \sim \text{Exponential}(1)$ and the premium rate is $c = 2.4$ (for a safety loading of 20\% using the expected value premium principle), then the adjustment coefficient is <>= -adjCoef(mgf.claim = mgfexp(x), mgf.wait = mgfexp(x, 2), p = 2.4, upper = 1) +adjCoef(mgf.claim = mgfexp(x), mgf.wait = mgfexp(x, 2), + premium.rate = 2.4, upper = 1) @ The function also supports models with proportional or excess-of-loss @@ -539,16 +547,16 @@ Figure \ref{fig:adjcoef} for the graph): <>= mgfx <- function(x, y) mgfexp(x * y) p <- function(x) 2.6 * x - 0.2 -R <- adjCoef(mgfx, mgfexp(x, 2), premium = p, upper = 1, reins = "prop", +rho <- adjCoef(mgfx, mgfexp(x, 2), premium = p, upper = 1, reins = "prop", from = 0, to = 1) -R(c(0.75, 0.8, 0.9, 1)) -plot(R) +rho(c(0.75, 0.8, 0.9, 1)) +plot(rho) @ \begin{figure}[t] \centering <>= -plot(R) +plot(rho) @ \caption{Adjustment coefficient as a function of the retention rate} \label{fig:adjcoef} @@ -673,7 +681,7 @@ ruin(claims = "p", par.claims = list(prob = prob, rates = rates), @ To ease plotting of the probability of ruin function, the package -provides a method of \code{plot} for objects returned by \code{ruin}. +provides a method of \code{plot} for objects returned by \code{ruin} that is a simple wrapper for \code{curve} (see Figure \ref{fig:prob-ruin}): <>= diff --git a/inst/doc/risk.pdf b/inst/doc/risk.pdf index 5850ac3..a3ce44f 100644 Binary files a/inst/doc/risk.pdf and b/inst/doc/risk.pdf differ diff --git a/inst/doc/simulation.Rnw b/inst/doc/simulation.Rnw index b29ab6b..9137b7f 100644 --- a/inst/doc/simulation.Rnw +++ b/inst/doc/simulation.Rnw @@ -23,6 +23,12 @@ \newcommand{\pkg}[1]{\textbf{#1}} \newcommand{\code}[1]{\texttt{#1}} + %% Redefine Sweave environments with smaller font + \RecustomVerbatimEnvironment{Sinput}{Verbatim}{% + fontshape=sl,fontsize=\small,xleftmargin=0pt} + \RecustomVerbatimEnvironment{Soutput}{Verbatim}{% + fontsize=\small,xleftmargin=0pt} + \bibliographystyle{plainnat} \definecolor{Red}{rgb}{0.7,0,0} @@ -99,7 +105,10 @@ distributed random variables each independent of $N$. The package provides function \code{simul} to simulate data from compound models like \eqref{eq:definition-S} where both the frequency and the severity components can have a hierarchical structure. The -function also supports weights (or volumes) in the model. +function also supports weights (or volumes) in the model. We give here +a brief description of the function and its usage; see +\cite{Goulet:simpf:2008} for details about the supported models and +more thorough examples. \section{Description of hierarchical models} @@ -391,9 +400,8 @@ frequency(simul(list(entity = 3, year = 5), \end{example} One will find more examples of \code{simul} usage in the -\code{simulation} demo file. -Function \code{simul} was used to simulate the data in -\cite{Goulet_cfs}. +\code{simulation} demo file. Function \code{simul} was used to +simulate the data in \cite{Goulet_cfs}. \bibliography{actuar} diff --git a/inst/doc/simulation.dvi b/inst/doc/simulation.dvi deleted file mode 100644 index d4ce8b3..0000000 Binary files a/inst/doc/simulation.dvi and /dev/null differ diff --git a/inst/doc/simulation.pdf b/inst/doc/simulation.pdf index 05b4030..fdef536 100644 Binary files a/inst/doc/simulation.pdf and b/inst/doc/simulation.pdf differ diff --git a/inst/po/fr/LC_MESSAGES/R-actuar.mo b/inst/po/fr/LC_MESSAGES/R-actuar.mo index c6d046d..a950f70 100644 Binary files a/inst/po/fr/LC_MESSAGES/R-actuar.mo and b/inst/po/fr/LC_MESSAGES/R-actuar.mo differ diff --git a/inst/po/fr/LC_MESSAGES/actuar.mo b/inst/po/fr/LC_MESSAGES/actuar.mo index 9e0d576..3a1be24 100644 Binary files a/inst/po/fr/LC_MESSAGES/actuar.mo and b/inst/po/fr/LC_MESSAGES/actuar.mo differ diff --git a/man/adjCoef.Rd b/man/adjCoef.Rd index 6e4da22..7cf1a75 100644 --- a/man/adjCoef.Rd +++ b/man/adjCoef.Rd @@ -1,7 +1,7 @@ \name{adjCoef} \alias{adjCoef} \alias{plot.adjCoef} -\title{Adjustment coefficient} +\title{Adjustment Coefficient} \description{ Compute the adjustment coefficient in ruin theory, or return a function to compute the adjustment coefficient for various reinsurance @@ -100,7 +100,7 @@ adjCoef(mgf.claim, mgf.wait = mgfexp(x), premium.rate, upper.bound, \emph{Loss Models, From Data to Decisions, Second Edition}, Wiley. } \author{ - Christophe Dutang, Vincent Goulet \email{vincent.goulet@act.ulaval.ca} + Christophe Dutang, Vincent Goulet \email{vincent.goulet@act.ulaval.ca} } \examples{ ## Basic example: no reinsurance, exponential claim severity and wait @@ -125,7 +125,7 @@ R1 <- adjCoef(mgfx, premium = p, upper = 1, reins = "proportional", from = 0, to = 1, n = 11) R2 <- adjCoef(h = h, upper = 1, reins = "p", from = 0, to = 1, n = 101) -R1(seq(0, 1, length = 10)) # evaluation for various retention rates +R1(seq(0, 1, length = 10)) # evaluation for various retention rates R2(seq(0, 1, length = 10)) # same plot(R1) # graphical representation plot(R2, col = "green", add = TRUE) # smoother function diff --git a/man/aggregateDist.Rd b/man/aggregateDist.Rd index 1741e63..10ffdd9 100644 --- a/man/aggregateDist.Rd +++ b/man/aggregateDist.Rd @@ -48,8 +48,8 @@ aggregateDist(method = c("recursive", "convolution", "normal", unit). Used only with \code{"recursive"} and \code{"convolution"} methods.} \item{moments}{vector of the true moments of the aggregate claim - amount distribution; required only by the \code{"normal"} - or \code{"npower"} methods.} + amount distribution; required only by the \code{"normal"} or + \code{"npower"} methods.} \item{nb.simul}{number of simulations for the \code{"simulation"} method.} \item{\dots}{parameters of the frequency distribution for the \code{"recursive"} method; further arguments to be passed to or diff --git a/man/bstraub.Rd b/man/bstraub.Rd deleted file mode 100644 index 1194535..0000000 --- a/man/bstraub.Rd +++ /dev/null @@ -1,136 +0,0 @@ -\name{bstraub} -\alias{bstraub} -\alias{predict.bstraub} -\alias{predict.bstraub.old} -\title{Buhlmann-Straub Credibility Model} -\description{ - \code{bstraub} computes structure parameters estimators in the - Bühlmann-Straub credibility model and \code{predict.bstraub} computes - the credibility premiums. -} -\usage{ -bstraub(ratios, weights, - method = c("unbiased", "iterative"), - tol = sqrt(.Machine$double.eps), maxit = 100, - echo = FALSE, old.format = TRUE) - -\method{predict}{bstraub}(object, levels = NULL, newdata, \dots) -\method{predict}{bstraub.old}(object, \dots) -} -\arguments{ - \item{ratios}{a matrix of ratios (contracts in lines, years in columns).} - \item{weights}{a matrix of weights corresponding to ratios.} - \item{method}{estimator of the between contract heterogeneity - parameter used in premium calculation; \code{"unbiased"} for the - usual Bühlmann-Straub estimator, \code{"iterative"} for the - Bischel-Straub estimator (see below).} - \item{tol}{maximum relative error in the iterative procedure.} - \item{maxit}{maximum number of iterations.} - \item{echo}{logical; whether to echo iterative procedure or not.} - \item{old.format}{logical; if \code{TRUE}, return results in the - deprecated pre-0.9-4 format.} - \item{object}{an object of class \code{"bstraub"}.} - \item{levels, newdata, \dots}{unused arguments.} -} -\details{ - Direct usage of this function is deprecated. The function will not be - exported in future versions of the package. Use - \code{\link[actuar]{cm}} instead. - - The credibility premium of contract \eqn{i} is given by - \deqn{z_i X_{iw} + (1 - z_i) X_{zw},}{z[i] X[iw] + (1 - z[i]) X[zw],} - where - \deqn{z_{i} = \frac{w_{i\cdot} \hat{a}}{w_{i\cdot} \hat{a} + \hat{s}^2},}{% - z[i] = (w[i.] a)/(w[i.] a + s^2),} - \eqn{X_{iw}}{X[iw]} is the weighted average of the ratios of contract - \eqn{i}, \eqn{X_{zw}}{X[zw]} is the weighted average of the matrix of - ratios using credibility factors and \eqn{w_{i\cdot}}{w[i.]} is - the total weight of a contract. \eqn{\hat{s}^2}{s^2} is the estimator - of the within contract heterogeneity and \eqn{\hat{a}}{a} is the - estimator of the between contract heterogeneity. - - Missing data are represented by \code{NA} in both the matrix of ratios - and the matrix of weights. The function can cope with complete lines - of \code{NA} in case a contract has no experience. - - \code{bstraub} computes the structure parameters estimators and - returns an object of class \code{"bstraub"}. The methods of - \code{\link[stats]{predict}} compute the credibility premiums. -} -\section{Estimation of a}{ - The Bühlmann-Straub unbiased estimator (\code{heterogeneity = - "unbiased"}) of the between contracts heterogeneity parameter is - \deqn{% - \hat{a} = c \left( \sum_{i = 1}^I w_{i\cdot} (X_{iw} - X_{ww})^2 - - (I - 1)\hat{s}^2 \right),}{% - a = c sum(w[i.] * (X[iw] - X[ww])^2 - (I - 1) * s^2),} - where \eqn{c = w_{\cdot\cdot}/(w_{\cdot\cdot}^2 - \sum_{i = 1}^I - w_{i\cdot}^2)}{c = w[..]/(w[..]^2 - sum(w[i.]^2))} and \eqn{I} is the - number of contracts. - - The Bishel-Straub pseudo-estimator (\code{heterogeneity = - "iterative"}) is obtained recursively as the solution of - \deqn{% - \hat{a} = \frac{1}{I - 1} \sum_{i=1}^I z_i (X_{iw} - X_{zw})^2.}{% - a = 1/(I - 1) sum(z[i] * (X[iw] - X[zw])^2).} - The fixed point algorithm is used with a relative error of \code{tol} - as stopping criteria. -} -\value{ - For \code{bstraub} with \code{old.format = TRUE} , an object of - \code{\link[base]{class}} \code{"bstraub.old"}. This format is - deprecated. An object of class \code{"bstraub.old"} is a list with the - following components: - \item{individual}{vector of contract weighted averages;} - \item{collective}{collective premium estimator;} - \item{weights}{vector of contracts total weights, as used in - credibility factors;} - \item{s2}{estimator of the within contract heterogeneity parameter;} - \item{unbiased}{unbiased estimator of the between contract - heterogeneity parameter;} - \item{iterative}{iterative estimator of the between contract - heterogeneity parameter;} - \item{cred}{vector of credibility factors;} - - For \code{bstraub} with \code{old.format = FALSE}, an object of - \code{\link[base]{class}} \code{"bstraub"}. An object of class - \code{"bstraub"} is a list with the following components: - \item{means}{a list containing the collective premium estimator and - vector of contract weighted averages.} - \item{weights}{a list containing the total portfolio weight and the - vector of contracts total weights, as used in credibility factors;} - \item{unbiased}{a vector containing the unbiased variance components - estimators.} - \item{iterative}{a vector containing the iterative variance components - estimators, or \code{NULL}.} - \item{cred}{vector of credibility factors.} - \item{nodes}{a list containing the number of contracts in the - portfolio.} - - For \code{predict.bstraub}, a vector of credibility premiums. -} -\references{ - Goulet, V. (1998), \emph{Principles and Application of Credibility - Theory}, Journal of Actuarial Practice \bold{6}, 5--62. - - Goovaerts, M. J. and Kaas, R. and van Heerwaarden, A. E. and - Bauwelinckx, T. (1990), \emph{Effective actuarial methods}, - North-Holland. -} -\author{ - Vincent Goulet \email{vincent.goulet@act.ulaval.ca}, - Sébastien Auclair and Louis-Philippe Pouliot -} -\seealso{ - \code{\link{cm}} -} -\examples{ -data(hachemeister) - -## Credibility premiums calculated with the iterative estimator -fit <- bstraub(hachemeister[, 2:13], hachemeister[, 14:25], - old.format = FALSE) -fit # a list -predict(fit) # credibility premiums -} -\keyword{models} diff --git a/man/cm.Rd b/man/cm.Rd index 27977cd..0d3a762 100644 --- a/man/cm.Rd +++ b/man/cm.Rd @@ -6,12 +6,13 @@ \alias{print.summary.cm} \title{Credibility Models} \description{ - Fit any of the following credibility models: \enc{Bühlmann}{Buhlmann}, + Fit the following credibility models: \enc{Bühlmann}{Buhlmann}, \enc{Bühlmann}{Buhlmann}-Straub, hierarchical or regression (Hachemeister). } \usage{ -cm(formula, data, ratios, weights, subset, xreg = NULL, +cm(formula, data, ratios, weights, subset, + regformula = NULL, regdata, adj.intercept = FALSE, method = c("Buhlmann-Gisler", "Ohlsson", "iterative"), tol = sqrt(.Machine$double.eps), maxit = 100, echo = FALSE) @@ -24,8 +25,9 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, \method{print}{summary.cm}(x, \dots) } \arguments{ - \item{formula}{a symbolic description of the model to be fit. - The details of model specification are given below.} + \item{formula}{an object of class \code{"\link{formula}"}: a symbolic + description of the model to be fit. The details of model + specification are given below.} \item{data}{a matrix or a data frame containing the portfolio structure, the ratios or claim amounts and their associated weights, if any.} @@ -36,7 +38,16 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, \item{subset}{an optional logical expression indicating a subset of observations to be used in the modeling process. All observations are included by default.} - \item{xreg}{an optional vector or matrix of regressors.} + \item{regformula}{an object of class \code{"\link{formula}"}: symbolic + description of the regression component (see \code{"\link{lm}"} for + details). No left hand side is needed in the formula; if present it + is ignored. If \code{NULL}, no regression is done on the data.} + \item{regdata}{an optional data frame, list or environment (or object + coercible by \code{\link{as.data.frame}} to a data frame) containing + the variables in the regression model.} + \item{adj.intercept}{if \code{TRUE}, the intercept of the regression + model is located at the barycenter of the regressor instead of the + origin.} \item{method}{estimation method for the variance components of the model; see details below.} \item{tol}{tolerance level for the stopping criteria for iterative @@ -47,8 +58,8 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, \item{x, object}{an object of class \code{"cm"}} \item{levels}{character vector indicating the levels to predict or to include in the summary; if \code{NULL} all levels are included.} - \item{newdata}{vector or data frame containing the variables used to - predict credibility regression models.} + \item{newdata}{data frame containing the variables used to predict + credibility regression models.} \item{\dots}{additional attributes to attach to the result for the \code{predict} and \code{summary} methods; further arguments to \code{\link[base]{format}} for the \code{print.summary} method; @@ -73,11 +84,13 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, \code{data}. In general, the formula should be of the form \eqn{~ a + a:b + a:b:c + a:b:c:d + ...}. - If argument \code{xreg} is not \code{NULL}, the regression model of - Hachemeister will be fit to data. Hierarchical classification - structures are not supported with this model. Regression models - fitting is currently less robust than hierarchical models fitting. In - particular, one should avoid nodes with no data. + If argument \code{regformula} is not \code{NULL}, the regression model + of Hachemeister is fit to the data. The response is usually time. By + default, the intercept of the model is located at time origin. If + argument \code{adj.intercept} is \code{TRUE}, the intercept is moved + to the (collective) barycenter of time, by orthogonalization of the + design matrix. Note that the regression coefficients may be difficult + to interpret in this case. Arguments \code{ratios}, \code{weights} and \code{subset} are used like arguments \code{select}, \code{select} and \code{subset}, @@ -113,18 +126,19 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, regression coefficients, \eqn{Z_i}{Z[i]} is the credibility matrix and \eqn{I} is the identity matrix. The credibility matrix of node \eqn{i} is equal to - \deqn{\frac{W_i}{W_i + s^2/A},}{W[i]/(W[i] + s2/A),} - where \eqn{W_i}{W[i]} is the unscaled regression covariance matrix of + \deqn{A^{-1} (A + s^2 S_i),}{A^(-1) (A + s2 S[i]),} + where \eqn{S_i}{S[i]} is the unscaled regression covariance matrix of the node, \eqn{s^2}{s2} is the average within node variance and \eqn{A} is the within node covariance matrix. - If argument \code{xreg} is a matrix, it is strongly recommended to - name the columns. + If the intercept is positioned at the barycenter of time, matrices + \eqn{S_i}{S[i]} and \eqn{A} (and hence \eqn{Z_i}{Z[i]}) are diagonal. + This amounts to use \enc{Bühlmann}{Buhlmann}-Straub models for each + regression coefficient. Argument \code{newdata} provides the \dQuote{future} value of the - regressors for prediction purposes. It is either as defined in - \code{\link[stats]{predict.lm}} or else a vector of length one for - regression models with a single regressor. + regressors for prediction purposes. It should be given as specified in + \code{\link[stats]{predict.lm}}. } \section{Variance components estimation}{ For hierarchical models, two sets of estimators of the variance @@ -174,8 +188,11 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, level minus the effective number of nodes of the level above. The Ohlsson estimators are used as starting values. - For regression models, only iterative estimators are available, hence - argument \code{method} is not taken into account. + For regression models, with the intercept at time origin, only + iterative estimators are available. If \code{method} is different from + \code{"iterative"}, a warning is issued. With the intercept at the + barycenter of time, the choice of estimators is the same as in the + \enc{Bühlmann}{Buhlmann}-Straub model. } \value{ Function \code{cm} computes the structure parameters estimators of the @@ -202,9 +219,14 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, \item{ordering}{a list containing, for each level, the affiliation of a node to the node of the level above.} - Regression fits have in addition the following component: + Regression fits have in addition the following components: \item{adj.models}{a list containing, for each node, the credibility - adjusted regression model as obtained with \code{\link[stats]{lm}}.} + adjusted regression model as obtained with + \code{\link[stats]{lm.fit}} or \code{\link[stats]{lm.wfit}}.} + \item{transition}{if \code{adj.intercept} is \code{TRUE}, a transition + matrix from the basis of the orthogonal design matrix to the basis + of the original design matrix.} + \item{terms}{the \code{\link[stats]{terms}} object used.} The method of \code{predict} for objects of class \code{"cm"} computes the credibility premiums for the nodes of every level included in @@ -225,7 +247,7 @@ cm(formula, data, ratios, weights, subset, xreg = NULL, } \author{ Vincent Goulet \email{vincent.goulet@act.ulaval.ca}, - Tommy Ouellet, Louis-Philippe Pouliot + Xavier Milhaud, Tommy Ouellet, Louis-Philippe Pouliot } \seealso{ \code{\link[base]{subset}}, \code{\link[base]{formula}}, @@ -250,12 +272,20 @@ predict(fit, levels = "unit") # unit credibility premiums only summary(fit) summary(fit, levels = "unit") # unit summaries only -## Regression model -fit <- cm(~state, hachemeister, xreg = 12:1, +## Regression model with intercept at time origin +fit <- cm(~state, hachemeister, + regformula = ~time, regdata = data.frame(time = 12:1), ratios = ratio.1:ratio.12, weights = weight.1:weight.12) fit -predict(fit, newdata = 0) # future value of regressor -summary(fit, newdata = 0) +predict(fit, newdata = data.frame(time = 0)) +summary(fit, newdata = data.frame(time = 0)) +## Same regression model, with intercept at barycenter of time +fit <- cm(~state, hachemeister, adj.intercept = TRUE, + regformula = ~time, regdata = data.frame(time = 12:1), + ratios = ratio.1:ratio.12, weights = weight.1:weight.12) +fit +predict(fit, newdata = data.frame(time = 0)) +summary(fit, newdata = data.frame(time = 0)) } \keyword{models} diff --git a/man/coverage.Rd b/man/coverage.Rd index b9ae5a8..e0f03ec 100644 --- a/man/coverage.Rd +++ b/man/coverage.Rd @@ -30,19 +30,15 @@ coverage(pdf, cdf, deductible = 0, franchise = FALSE, \code{FALSE} (default) for the per payment distribution.} } \details{ - \code{coverage()} returns a function to compute the probability + \code{coverage} returns a function to compute the probability density function (pdf) or the cumulative distribution function (cdf) of the distribution of losses under coverage modifications. The pdf and cdf of unmodified losses are \code{pdf} and \code{cdf}, respectively. - If both \code{pdf} and \code{cdf} are specified, the pdf is returned; - if \code{pdf} is missing or \code{NULL}, the cdf is returned. Hence, - \code{cdf} must always be specified. - - See \code{vignette("coverage")} for the exact definitions of the - per payment and per loss random variables under an ordinary or - franchise deductible. + If \code{pdf} is specified, the pdf is returned; if \code{pdf} is + missing or \code{NULL}, the cdf is returned. Note that \code{cdf} is + needed if there is a deductible or a limit. } \value{ An object of mode \code{"function"} with the same arguments as @@ -56,6 +52,11 @@ coverage(pdf, cdf, deductible = 0, franchise = FALSE, Vincent Goulet \email{vincent.goulet@act.ulaval.ca} and Mathieu Pigeon } +\seealso{ + \code{vignette("coverage")} for the exact definitions of the + per payment and per loss random variables under an ordinary or + franchise deductible. +} \examples{ ## Default case: pdf of the per payment random variable with ## an ordinary deductible @@ -88,5 +89,8 @@ coverage(dweibull, pweibull, per.loss = TRUE, limit = 5) coverage(dweibull, pweibull, franchise = TRUE, limit = 5) coverage(dweibull, pweibull, per.loss = TRUE, franchise = TRUE, limit = 5) + +## Coinsurance alone; only case that does not require the cdf +coverage(dgamma, coinsurance = 0.8) } \keyword{models} diff --git a/man/dental.Rd b/man/dental.Rd index 02d3ed5..4e58ba1 100644 --- a/man/dental.Rd +++ b/man/dental.Rd @@ -1,7 +1,7 @@ \name{dental} \docType{data} \alias{dental} -\title{Individual dental claims data set} +\title{Individual Dental Claims Data Set} \description{ Basic dental claims on a policy with a deductible of 50. } diff --git a/man/elev.Rd b/man/elev.Rd index 71f839c..ffefc98 100644 --- a/man/elev.Rd +++ b/man/elev.Rd @@ -75,7 +75,7 @@ lev summary(lev) knots(lev) # the group boundaries -lev(knots(lev)) # empirical lev at boudaries +lev(knots(lev)) # empirical lev at boundaries lev(c(80, 200, 2000)) # and at other limits plot(lev, type = "o", pch = 16) diff --git a/man/gdental.Rd b/man/gdental.Rd index a3a5810..6f924ea 100644 --- a/man/gdental.Rd +++ b/man/gdental.Rd @@ -1,7 +1,7 @@ \name{gdental} \docType{data} \alias{gdental} -\title{Grouped dental claims data set} +\title{Grouped Dental Claims Data Set} \description{ Grouped dental claims, that is presented in a number of claims per claim amount group form. diff --git a/man/grouped.data.Rd b/man/grouped.data.Rd index 72cdf4c..d243546 100644 --- a/man/grouped.data.Rd +++ b/man/grouped.data.Rd @@ -7,12 +7,14 @@ group form. } \usage{ -grouped.data(\dots, row.names = NULL, check.rows = FALSE, +grouped.data(\dots, right = TRUE, row.names = NULL, check.rows = FALSE, check.names = TRUE) } \arguments{ \item{\dots}{these arguments are either of the form \code{value} or \code{tag = value}. See Details.} + \item{right}{logical, indicating if the intervals should be closed on + the right (and open on the left) or vice versa.} \item{row.names, check.rows, check.names}{arguments identical to those of \code{\link{data.frame}}.} } @@ -24,16 +26,16 @@ grouped.data(\dots, row.names = NULL, check.rows = FALSE, } The first argument will be taken as the vector of group - boundaries. This vector must be exactly one element longer than + boundaries. This vector must be exactly one element longer than the other arguments, which will be taken as vectors of group frequencies. All arguments are coerced to data frames. Missing (\code{NA}) frequencies are replaced by zeros, with a warning. - + Extraction and replacement methods exist for \code{grouped.data} objects, but working on non adjacent groups will most likely yield - useless results. + useless results. } \value{ An object of \code{class} \code{c("grouped.data", "data.frame")} with diff --git a/man/hachemeister.Rd b/man/hachemeister.Rd index fc7ca83..127da75 100644 --- a/man/hachemeister.Rd +++ b/man/hachemeister.Rd @@ -1,7 +1,7 @@ \name{hachemeister} \docType{data} \alias{hachemeister} -\title{Hachemeister data set} +\title{Hachemeister Data Set} \description{ Hachemeister (1975) data set giving average claim amounts in private passenger bodily injury insurance in five U.S. states over 12 quarters @@ -15,7 +15,7 @@ \item{\code{state}}{the state number;} \item{\code{ratio.1}, \dots, \code{ratio.12}}{the average claim amounts;} - \item{\code{weight.1}, \dots, \code{weight.12}}{the + \item{\code{weight.1}, \dots, \code{weight.12}}{the corresponding number of claims.} } } diff --git a/man/hist.grouped.data.Rd b/man/hist.grouped.data.Rd index 987fb68..13b231c 100644 --- a/man/hist.grouped.data.Rd +++ b/man/hist.grouped.data.Rd @@ -1,6 +1,6 @@ \name{hist.grouped.data} \alias{hist.grouped.data} -\title{Histogram for grouped data} +\title{Histogram for Grouped Data} \description{ This method for the generic function \code{\link{hist}} is mainly useful to plot the histogram of grouped data. If \code{plot = FALSE}, diff --git a/man/ogive.Rd b/man/ogive.Rd index ebe455a..5d6b395 100644 --- a/man/ogive.Rd +++ b/man/ogive.Rd @@ -4,7 +4,7 @@ \alias{summary.ogive} \alias{knots.ogive} \alias{plot.ogive} -\title{Ogive for grouped data} +\title{Ogive for Grouped Data} \description{ Compute a smoothed empirical distribution function for grouped data. } @@ -33,18 +33,18 @@ ogive(x, y = NULL, \dots) \item{\dots}{arguments to be passed to subsequent methods.} } \details{ - The ogive of a grouped data set links the values of the empirical - cumulative distribution known at group boundaries by straight line - segments, resulting in an approximation of the empirical cdf. - + The ogive is a linear interpolation of the empirical cumulative + distribution function. + The equation of the ogive is - \deqn{F_n(x) = \frac{(c_j - x) F_n(c_{j-1}) + - (x - c_{j-1}) F_n(c_j)}{c_j - c_{j - 1}}}{% - Fn(x) = ((c[j] - x) Fn(c[j-1]) + + \deqn{G_n(x) = \frac{(c_j - x) F_n(c_{j - 1}) + + (x - c_{j - 1}) F_n(c_j)}{c_j - c_{j - 1}}}{% + Gn(x) = ((c[j] - x) Fn(c[j-1]) + (x - c[j-1]) Fn(c[j]))/(c[j] - c[j-1])} for \eqn{c_{j-1} < x \leq c_j}{c[j-1] < x <= c[j]} and where \eqn{c_0, \dots, c_r}{c[0], \dots, c[r]} are the \eqn{r + 1} group - boundaries. + boundaries and \eqn{F_n}{Fn} is the empirical distribution function of + the sample. } \value{ For \code{ogive}, a function of class \code{"ogive"}, inheriting from the @@ -56,6 +56,7 @@ ogive(x, y = NULL, \dots) } \seealso{ \code{\link{grouped.data}} to create grouped data objects; + \code{\link{quantile.grouped.data}} for the inverse function; \code{\link{approxfun}}, which is used to compute the ogive; \code{\link{stepfun}} for related documentation (even though the ogive is not a step function). diff --git a/man/quantile.grouped.data.Rd b/man/quantile.grouped.data.Rd new file mode 100644 index 0000000..cf0cbe5 --- /dev/null +++ b/man/quantile.grouped.data.Rd @@ -0,0 +1,51 @@ +\name{quantile.grouped.data} +\alias{quantile.grouped.data} +\title{Quantiles of Grouped Data} +\description{ + Sample quantiles corresponding to the given probabilities for objects + of class \code{"grouped.data"}. +} +\usage{ +\method{quantile}{grouped.data}(x, probs = seq(0, 1, 0.25), + names = TRUE, \dots) +} +\arguments{ + \item{x}{an object of class \code{"grouped.data"}.} + \item{probs}{numeric vector of probabilities with values + in \eqn{[0, 1]}.} + \item{names}{logical; if true, the result has a \code{names} + attribute. Set to \code{FALSE} for speedup with many \code{probs}.} + \item{\dots}{further arguments passed to or from other methods.} +} +\details{ + The quantile function is the inverse of the ogive, that is a linear + interpolation of the empirical quantile function. + + The equation of the quantile function is + \deqn{x = \frac{c_j (F_n(c_{j - 1}) - q) + + c_{j - 1} (q - F_n(c_j)}{F_n(c_j) - F_n(c_{j - 1})}}{% + x = (c[j] (Fn(c[j-1]) - q) + + c[j-1] (q - Fn(c[j])))/(Fn(c[j]) - Fn(c[j-1]))} + for \eqn{0 \leq q \leq c_j}{0 <= q <= 1} and where \eqn{c_0, \dots, + c_r}{c[0], \dots, c[r]} are the \eqn{r + 1} group + boundaries and \eqn{F_n}{Fn} is the empirical distribution function of + the sample. +} +\value{ + A numeric vector, named if \code{names} is \code{TRUE}. +} +\seealso{ + \code{\link{ogive}} for the smoothed empirical distribution of which + \code{quantile.grouped.data} is an inverse; + \code{\link{grouped.data}} to create grouped data objects. +} +\author{ + Vincent Goulet \email{vincent.goulet@act.ulaval.ca} +} +\examples{ +data(gdental) +quantile(gdental) +Fn <- ogive(gdental) +Fn(quantile(gdental)) # inverse function +} +\keyword{univar} diff --git a/man/simul.Rd b/man/simul.Rd index 908251f..cdf83a7 100644 --- a/man/simul.Rd +++ b/man/simul.Rd @@ -2,7 +2,7 @@ \alias{simul} \alias{simpf} \alias{print.portfolio} -\title{Simulation of a portfolio of data} +\title{Simulation from Compound Hierarchical Models} \description{ \code{simul} simulates data for insurance applications allowing hierarchical structures and separate models for the frequency and diff --git a/po/R-actuar.pot b/po/R-actuar.pot index 5f8f1a6..9adaf7b 100644 --- a/po/R-actuar.pot +++ b/po/R-actuar.pot @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: R 2.3.0\n" "Report-Msgid-Bugs-To: bugs@r-project.org\n" -"POT-Creation-Date: 2008-02-17 18:13\n" +"POT-Creation-Date: 2008-09-12 09:56\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -80,7 +80,7 @@ msgstr "" msgid "there must be more than one node" msgstr "" -msgid "missing values are not in the same positions in weights and in ratios" +msgid "missing values are not in the same positions in 'weights' and in 'ratios'" msgstr "" msgid "no available data to fit model" @@ -89,9 +89,6 @@ msgstr "" msgid "maximum number of iterations reached before obtaining convergence" msgstr "" -msgid "this output format is deprecated" -msgstr "" - msgid "unsupported interactions in 'formula'" msgstr "" @@ -101,6 +98,9 @@ msgstr "" msgid "ratios have to be supplied if weights are" msgstr "" +msgid "empty regression model; fitting with Buhlmann-Straub's model" +msgstr "" + msgid "invalid level name" msgstr "" diff --git a/po/R-fr.po b/po/R-fr.po index 53d075f..7ef90a1 100644 --- a/po/R-fr.po +++ b/po/R-fr.po @@ -9,8 +9,8 @@ msgid "" msgstr "" "Project-Id-Version: actuar 0.9-4\n" "Report-Msgid-Bugs-To: bugs@r-project.org\n" -"POT-Creation-Date: 2007-11-04 23:30\n" -"PO-Revision-Date: 2008-02-17 18:24-0500\n" +"POT-Creation-Date: 2008-09-12 09:56\n" +"PO-Revision-Date: 2008-09-12 09:59-0400\n" "Last-Translator: Vincent Goulet \n" "Language-Team: Vincent Goulet \n" "MIME-Version: 1.0\n" @@ -93,7 +93,8 @@ msgstr "" msgid "there must be more than one node" msgstr "il doit y avoir plus d'un noeud" -msgid "missing values are not in the same positions in weights and in ratios" +msgid "" +"missing values are not in the same positions in 'weights' and in 'ratios'" msgstr "" "les données manquantes ne sont pas aux mêmes positions dans les poids et " "dans les ratios" @@ -104,9 +105,6 @@ msgstr "aucune donn msgid "maximum number of iterations reached before obtaining convergence" msgstr "nombre d'itérations maximal atteint avant obtention de la convergence" -msgid "this output format is deprecated" -msgstr "cette présentation des résultats est obsolète" - msgid "unsupported interactions in 'formula'" msgstr "interactions non supportées dans 'formula'" @@ -116,6 +114,9 @@ msgstr "mod msgid "ratios have to be supplied if weights are" msgstr "ratios requis s'il y a des poids" +msgid "empty regression model; fitting with Buhlmann-Straub's model" +msgstr "modèle de régression vide; utilisation du modèle de Bühlmann-Straub" + msgid "invalid level name" msgstr "nom de niveau incorrect" @@ -129,7 +130,7 @@ msgid "coinsurance must be between 0 and 1" msgstr "le facteur de coassurance doit être entre 0 et 1" msgid "'cdf' must be supplied" -msgstr "'cdf doit être fourni" +msgstr "'cdf' doit être fourni" msgid "'cdf' must be a function or an expression containing 'x'" msgstr "'cdf' doit être une fonction ou une expression contenant 'x'" @@ -282,3 +283,6 @@ msgstr "valeur de 'by' incorrecte" msgid "'x' must be a vector or a matrix" msgstr "'x' doit être un vecteur ou une matrice" + +#~ msgid "this output format is deprecated" +#~ msgstr "cette présentation des résultats est obsolète" diff --git a/po/actuar.pot b/po/actuar.pot index d49baa4..5496dd7 100644 --- a/po/actuar.pot +++ b/po/actuar.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2007-11-04 22:30-0500\n" +"POT-Creation-Date: 2008-09-12 10:02-0400\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -17,8 +17,8 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" #: dpq.c:73 dpq.c:198 dpq.c:383 dpq.c:567 dpq.c:738 dpqphtype.c:49 random.c:61 -#: random.c:68 random.c:142 random.c:149 random.c:236 random.c:243 -#: random.c:330 random.c:337 randomphtype.c:66 randomphtype.c:73 +#: random.c:68 random.c:143 random.c:150 random.c:237 random.c:244 +#: random.c:331 random.c:338 randomphtype.c:66 randomphtype.c:73 msgid "invalid arguments" msgstr "" @@ -56,23 +56,27 @@ msgid "" "complete" msgstr "" -#: random.c:98 +#: random.c:87 random.c:170 random.c:265 random.c:360 +msgid "NAs produced" +msgstr "" + +#: random.c:99 msgid "internal error in do_random1" msgstr "" -#: random.c:189 +#: random.c:191 msgid "internal error in do_random2" msgstr "" -#: random.c:281 +#: random.c:283 msgid "internal error in do_random3" msgstr "" -#: random.c:374 +#: random.c:376 msgid "internal error in do_random4" msgstr "" -#: random.c:405 +#: random.c:407 msgid "internal error in do_random" msgstr "" diff --git a/po/fr.po b/po/fr.po index ac43ef7..bdab401 100644 --- a/po/fr.po +++ b/po/fr.po @@ -8,8 +8,8 @@ msgid "" msgstr "" "Project-Id-Version: actuar 0.9-4\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2007-11-04 22:30-0500\n" -"PO-Revision-Date: 2007-11-04 23:34-0500\n" +"POT-Creation-Date: 2008-09-12 10:02-0400\n" +"PO-Revision-Date: 2008-09-12 10:04-0400\n" "Last-Translator: Vincent Goulet \n" "Language-Team: Vincent Goulet \n" "MIME-Version: 1.0\n" @@ -18,8 +18,8 @@ msgstr "" "Plural-Forms: nplurals=2; plural=(n > 1);\n" #: dpq.c:73 dpq.c:198 dpq.c:383 dpq.c:567 dpq.c:738 dpqphtype.c:49 random.c:61 -#: random.c:68 random.c:142 random.c:149 random.c:236 random.c:243 -#: random.c:330 random.c:337 randomphtype.c:66 randomphtype.c:73 +#: random.c:68 random.c:143 random.c:150 random.c:237 random.c:244 +#: random.c:331 random.c:338 randomphtype.c:66 randomphtype.c:73 msgid "invalid arguments" msgstr "arguments incorrects" @@ -57,23 +57,27 @@ msgid "" "complete" msgstr "nombre de récursions maximal atteint avant obtention de la convergence" -#: random.c:98 +#: random.c:87 random.c:170 random.c:265 random.c:360 +msgid "NAs produced" +msgstr "production de NA" + +#: random.c:99 msgid "internal error in do_random1" msgstr "erreur interne dans do_random1" -#: random.c:189 +#: random.c:191 msgid "internal error in do_random2" msgstr "erreur interne dans do_random2" -#: random.c:281 +#: random.c:283 msgid "internal error in do_random3" msgstr "erreur interne dans do_random3" -#: random.c:374 +#: random.c:376 msgid "internal error in do_random4" msgstr "erreur interne dans do_random4" -#: random.c:405 +#: random.c:407 msgid "internal error in do_random" msgstr "erreur interne dans do_random" @@ -96,12 +100,16 @@ msgstr "erreur interne dans do_randomphtype" #: util.c:102 #, c-format msgid "LAPACK routine dgebal returned info code %d when permuting" -msgstr "la procédure LAPACK dgebal a produit le code d'erreur %d lors de la permutation" +msgstr "" +"la procédure LAPACK dgebal a produit le code d'erreur %d lors de la " +"permutation" #: util.c:106 #, c-format msgid "LAPACK routine dgebal returned info code %d when scaling" -msgstr "la procédure LAPACK dgebal a produit le code d'erreur %d lors de la mis à l'échelle" +msgstr "" +"la procédure LAPACK dgebal a produit le code d'erreur %d lors de la mis à " +"l'échelle" #: util.c:153 #, c-format @@ -129,6 +137,3 @@ msgstr "valeur incorrecte pour l'argument %d du sous-programme Lapack %s" #: util.c:278 msgid "Lapack routine dgesv: system is exactly singular" msgstr "sous-programme Lapack dgesv: le système est exactement singulier" - -#~ msgid "NaNs produced" -#~ msgstr "production de NaN" diff --git a/src/random.c b/src/random.c index fa28dc6..154934d 100644 --- a/src/random.c +++ b/src/random.c @@ -40,21 +40,21 @@ static Rboolean random1(double (*f)(), double *a, int na, double *x, int n) { ai = a[i % na]; x[i] = f(ai); - if (!R_FINITE(x[i])) naflag = TRUE; + if (ISNAN(x[i])) naflag = TRUE; } return(naflag); } + #define RAND1(num, fun) \ case num: \ - random1(fun, REAL(a), na, REAL(x), n); \ + naflag = random1(fun, REAL(a), na, REAL(x), n); \ break SEXP do_random1(int code, SEXP args) { SEXP x, a; int i, n, na; - Rboolean naflag = FALSE; /* Check validity of arguments */ if (!isVector(CAR(args)) || !isNumeric(CADR(args))) @@ -84,12 +84,13 @@ SEXP do_random1(int code, SEXP args) { for (i = 0; i < n; i++) REAL(x)[i] = NA_REAL; + warning(_("NAs produced")); } /* Otherwise, dispatch to appropriate r* function */ else { + Rboolean naflag = FALSE; PROTECT(a = coerceVector(CADR(args), REALSXP)); - naflag = FALSE; GetRNGstate(); switch (code) { @@ -119,21 +120,21 @@ static Rboolean random2(double (*f)(), double *a, int na, ai = a[i % na]; bi = b[i % nb]; x[i] = f(ai, bi); - if (!R_FINITE(x[i])) naflag = TRUE; + if (ISNAN(x[i])) naflag = TRUE; } return(naflag); } #define RAND2(num, fun) \ case num: \ - random2(fun, REAL(a), na, REAL(b), nb, REAL(x), n); \ + naflag = random2(fun, REAL(a), na, REAL(b), nb, REAL(x), n); \ break + SEXP do_random2(int code, SEXP args) { SEXP x, a, b; int i, n, na, nb; - Rboolean naflag = FALSE; /* Check validity of arguments */ if (!isVector(CAR(args)) || @@ -166,13 +167,14 @@ SEXP do_random2(int code, SEXP args) { for (i = 0; i < n; i++) REAL(x)[i] = NA_REAL; + warning(_("NAs produced")); } /* Otherwise, dispatch to appropriate r* function */ else { + Rboolean naflag = FALSE; PROTECT(a = coerceVector(CADR(args), REALSXP)); PROTECT(b = coerceVector(CADDR(args), REALSXP)); - naflag = FALSE; GetRNGstate(); switch (code) { @@ -197,7 +199,6 @@ SEXP do_random2(int code, SEXP args) return x; } - /* Functions for three parameter distributions */ static Rboolean random3(double (*f) (), double *a, int na, double *b, int nb, double *c, int nc, @@ -212,21 +213,21 @@ static Rboolean random3(double (*f) (), double *a, int na, bi = b[i % nb]; ci = c[i % nc]; x[i] = f(ai, bi, ci); - if (!R_FINITE(x[i])) naflag = TRUE; + if (!ISNAN(x[i])) naflag = TRUE; } return(naflag); } #define RAND3(num, fun) \ case num: \ - random3(fun, REAL(a), na, REAL(b), nb, REAL(c), nc, REAL(x), n); \ + naflag = random3(fun, REAL(a), na, REAL(b), nb, REAL(c), nc, REAL(x), n); \ break + SEXP do_random3(int code, SEXP args) { SEXP x, a, b, c; int i, n, na, nb, nc; - Rboolean naflag = FALSE; /* Check validity of arguments */ if (!isVector(CAR(args)) || @@ -261,14 +262,15 @@ SEXP do_random3(int code, SEXP args) { for (i = 0; i < n; i++) REAL(x)[i] = NA_REAL; + warning(_("NAs produced")); } /* Otherwise, dispatch to appropriate r* function */ else { + Rboolean naflag = FALSE; PROTECT(a = coerceVector(CADR(args), REALSXP)); PROTECT(b = coerceVector(CADDR(args), REALSXP)); PROTECT(c = coerceVector(CADDDR(args), REALSXP)); - naflag = FALSE; GetRNGstate(); switch (code) { @@ -305,21 +307,20 @@ static Rboolean random4(double (*f) (), double *a, int na, ci = c[i % nc]; di = d[i % nd]; x[i] = f(ai, bi, ci, di); - if (!R_FINITE(x[i])) naflag = TRUE; + if (!ISNAN(x[i])) naflag = TRUE; } return(naflag); } #define RAND4(num, fun) \ case num: \ - random4(fun, REAL(a), na, REAL(b), nb, REAL(c), nc, REAL(d), nd, REAL(x), n); \ + naflag = random4(fun, REAL(a), na, REAL(b), nb, REAL(c), nc, REAL(d), nd, REAL(x), n); \ break SEXP do_random4(int code, SEXP args) { SEXP x, a, b, c, d; int i, n, na, nb, nc, nd; - Rboolean naflag = FALSE; /* Check validity of arguments */ if (!isVector(CAR(args)) || @@ -355,16 +356,17 @@ SEXP do_random4(int code, SEXP args) if (na < 1 || nb < 1 || nc < 1 || nd < 1) { for (i = 0; i < n; i++) - REAL(x)[i] = NA_REAL; + REAL(x)[i] = NA_REAL; + warning(_("NAs produced")); } /* Otherwise, dispatch to appropriate r* function */ else { + Rboolean naflag = FALSE; PROTECT(a = coerceVector(CADR(args), REALSXP)); PROTECT(b = coerceVector(CADDR(args), REALSXP)); PROTECT(c = coerceVector(CADDDR(args), REALSXP)); PROTECT(d = coerceVector(CAD4R(args), REALSXP)); - naflag = FALSE; GetRNGstate(); switch (code) {