Skip to content

Commit

Permalink
Merge pull request #46 from mayer79/importfrom
Browse files Browse the repository at this point in the history
remove importFrom
  • Loading branch information
mayer79 committed Apr 28, 2023
2 parents 8e5ee5b + 0210874 commit 300e9fd
Show file tree
Hide file tree
Showing 20 changed files with 208 additions and 113 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ Meta
/doc/
/Meta/
inst/doc
revdep

6 changes: 3 additions & 3 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Version: 2.2.0
Date: 2023-03-24 19:48:56 UTC
SHA: 7213048cfb0727fc7f705715bf603ab86550dc61
Version: 2.2.1
Date: 2023-04-28 12:35:48 UTC
SHA: d9029a1a099cc255088439b2e623f7fa3b4aecf0
10 changes: 0 additions & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,3 @@ export(generateNA)
export(imputeUnivariate)
export(missRanger)
export(pmm)
importFrom(FNN,knnx.index)
importFrom(ranger,ranger)
importFrom(stats,predict)
importFrom(stats,reformulate)
importFrom(stats,rmultinom)
importFrom(stats,setNames)
importFrom(stats,terms.formula)
importFrom(stats,var)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# missRanger 2.2.1

- Maintenance: Remove "documentation" workflow
- Switch from `importFrom` to `::` code style
- Documentation improved

# missRanger 2.2.0

Expand Down
107 changes: 57 additions & 50 deletions R/missRanger.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Fast Imputation of Missing Values by Chained Random Forests
#'
#' Uses the "ranger" package (Wright & Ziegler) to do fast missing value imputation by
#' Uses the {ranger} package (Wright & Ziegler) to do fast missing value imputation by
#' chained random forests, see Stekhoven & Buehlmann and Van Buuren & Groothuis-Oudshoorn.
#' Between the iterative model fitting, it offers the option of predictive mean matching.
#' This firstly avoids imputation with values not present in the original data
Expand All @@ -21,14 +21,11 @@
#' single-argument function of the number of available covariables,
#' e.g. \code{mtry = function(m) max(1, m %/% 3)}.
#'
#' @importFrom stats var reformulate terms.formula predict setNames
#' @importFrom ranger ranger
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @param data A \code{data.frame} or \code{tibble} with missing values to impute.
#' @param formula A two-sided formula specifying variables to be imputed
#' (left hand side) and variables used to impute (right hand side).
#' Defaults to \code{. ~ .}, i.e. use all variables to impute all variables.
#' If e.g. all variables (with missings) should be imputed by all variables
#' Defaults to \code{. ~ .}, i.e., use all variables to impute all variables.
#' For instance, if all variables (with missings) should be imputed by all variables
#' except variable "ID", use \code{. ~ . - ID}. Note that a "." is evaluated
#' separately for each side of the formula. Further note that variables with missings
#' must appear in the left hand side if they should be used on the right hand side.
Expand All @@ -47,10 +44,10 @@
#' is added to the output as attribute "oob". This does not work in the special case
#' when the variables are imputed univariately.
#' @param case.weights Vector with non-negative case weights.
#' @param ... Arguments passed to \code{ranger()}. If the data set is large,
#' @param ... Arguments passed to \code{ranger::ranger()}. If the data set is large,
#' better use less trees (e.g. \code{num.trees = 20}) and/or a low value of
#' \code{sample.fraction}.
#' The following arguments are e.g. incompatible:
#' The following arguments are incompatible, amongst others:
#' \code{write.forest}, \code{probability}, \code{split.select.weights},
#' \code{dependent.variable.name}, and \code{classification}.
#'
Expand All @@ -77,8 +74,13 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,
}

# 1) INITIAL CHECKS
bad_args <- c("write.forest", "probability", "split.select.weights",
"dependent.variable.name", "classification")
bad_args <- c(
"write.forest",
"probability",
"split.select.weights",
"dependent.variable.name",
"classification"
)
stopifnot(
"'data' should be a data.frame!" = is.data.frame(data),
"'data' should have at least one row and column!" = dim(data) >= 1L,
Expand Down Expand Up @@ -106,11 +108,11 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,
# 2) SELECT AND CONVERT VARIABLES TO IMPUTE

# Extract lhs and rhs from formula
relevantVars <- lapply(formula[2:3], function(z) attr(terms.formula(
reformulate(z), data = data[1, ]), "term.labels"))
relevantVars <- lapply(formula[2:3], function(z) attr(stats::terms.formula(
stats::reformulate(z), data = data[1L, ]), "term.labels"))

# Pick variables from lhs with some but not all missings
toImpute <- relevantVars[[1]][vapply(data[, relevantVars[[1]], drop = FALSE],
toImpute <- relevantVars[[1L]][vapply(data[, relevantVars[[1L]], drop = FALSE],
FUN.VALUE = TRUE, function(z) anyNA(z) && !all(is.na(z)))]

# Try to convert special variables to numeric/factor
Expand Down Expand Up @@ -141,8 +143,8 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,

# Variables on the rhs should either appear in "visitSeq"
# or do not contain any missings
imputeBy <- relevantVars[[2]][relevantVars[[2]] %in% visitSeq |
!vapply(data[, relevantVars[[2]], drop = FALSE], anyNA, TRUE)]
imputeBy <- relevantVars[[2L]][relevantVars[[2L]] %in% visitSeq |
!vapply(data[, relevantVars[[2L]], drop = FALSE], anyNA, TRUE)]
completed <- setdiff(imputeBy, visitSeq)

if (verbose) {
Expand All @@ -157,7 +159,7 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,
j <- 1L
crit <- TRUE
verboseDigits <- 4L
predError <- setNames(rep(1, length(visitSeq)), visitSeq)
predError <- stats::setNames(rep(1, length(visitSeq)), visitSeq)

if (verbose >= 2) {
cat("\n", abbreviate(visitSeq, minlength = verboseDigits + 2L), sep = "\t")
Expand All @@ -167,11 +169,11 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,
while (crit && j <= maxiter) {
if (verbose) {
if (verbose == 1) {
i <- 1
i <- 1L
cat("\n")
cat(paste("iter", j))
cat("\n")
pb <- txtProgressBar(0, length(visitSeq), style = 3)
pb <- utils::txtProgressBar(0, length(visitSeq), style = 3)
} else if (verbose >= 2) {
cat("\niter ", j, ":\t", sep = "")
}
Expand All @@ -186,16 +188,18 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,
if (length(completed) == 0L) {
data[[v]] <- imputeUnivariate(data[[v]])
} else {
fit <- ranger(formula = reformulate(completed, response = v),
data = data[!v.na, union(v, completed), drop = FALSE],
case.weights = case.weights[!v.na], ...)
pred <- predict(fit, data[v.na, completed, drop = FALSE])$predictions
data[v.na, v] <- if (pmm.k) pmm(xtrain = fit$predictions,
xtest = pred,
ytrain = data[[v]][!v.na],
k = pmm.k) else pred
fit <- ranger::ranger(
formula = stats::reformulate(completed, response = v),
data = data[!v.na, union(v, completed), drop = FALSE],
case.weights = case.weights[!v.na],
...
)
pred <- stats::predict(fit, data[v.na, completed, drop = FALSE])$predictions
data[v.na, v] <- if (pmm.k) pmm(
xtrain = fit$predictions, xtest = pred, ytrain = data[[v]][!v.na], k = pmm.k
) else pred
predError[[v]] <- fit$prediction.error / (
if (fit$treetype == "Regression") var(data[[v]][!v.na]) else 1
if (fit$treetype == "Regression") stats::var(data[[v]][!v.na]) else 1
)

if (is.nan(predError[[v]])) {
Expand All @@ -209,8 +213,8 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,

if (verbose) {
if (verbose == 1) {
setTxtProgressBar(pb, i)
i <- i + 1
utils::setTxtProgressBar(pb, i)
i <- i + 1L
} else if (verbose >= 2) {
cat(format(round(predError[[v]], verboseDigits),
nsmall = verboseDigits), "\t")
Expand Down Expand Up @@ -239,15 +243,14 @@ missRanger <- function(data, formula = . ~ ., pmm.k = 0L, maxiter = 10L,
revert(converted, X = dataLast)
}

#' A version of \code{typeof} internally used by \code{missRanger}.
#' A version of \code{typeof()} internally used by \code{missRanger()}.
#'
#' @description Returns either "numeric" (double or integer), "factor", "character", "logical", "special" (mode numeric, but neither double nor integer) or "" (otherwise).
#' \code{missRanger} requires this information to deal with response types not natively supported by \code{ranger}.
#'
#' @author Michael Mayer
#' Returns either "numeric" (double or integer), "factor", "character", "logical",
#' "special" (mode numeric, but neither double nor integer) or "" (otherwise).
#' \code{missRanger} requires this information to deal with response types not natively
#' supported by \code{ranger::ranger()}.
#'
#' @param object Any object.
#'
#' @return A string.
typeof2 <- function(object) {
if (is.numeric(object)) "numeric" else
Expand All @@ -259,14 +262,16 @@ typeof2 <- function(object) {

#' Conversion of non-factor/non-numeric variables.
#'
#' @description Converts non-factor/non-numeric variables in a data frame to factor/numeric. Stores information to revert back.
#'
#' @author Michael Mayer
#' Converts non-factor/non-numeric variables in a data frame to factor/numeric.
#' Stores information to revert back.
#'
#' @param X A data frame.
#' @param check If \code{TRUE}, the function checks if the converted columns can be reverted without changes.
#'
#' @return A list with the following elements: \code{X} is the converted data frame, \code{vars}, \code{types}, \code{classes} are the names, types and classes of the converted variables. Finally, \code{bad} names variables in \code{X} that should have been converted but could not.
#' @param check If \code{TRUE}, the function checks if the converted columns can be
#' reverted without changes.
#' @return A list with the following elements: \code{X} is the converted data frame,
#' \code{vars}, \code{types}, \code{classes} are the names, types and classes of the
#' converted variables. Finally, \code{bad} names variables in \code{X} that should
#' have been converted but could not.
convert <- function(X, check = FALSE) {
stopifnot(is.data.frame(X))

Expand All @@ -289,14 +294,11 @@ convert <- function(X, check = FALSE) {
}

#' Revert conversion.
#'
#' @description Reverts conversions done by \code{convert}.
#'
#' @author Michael Mayer
#'
#' @param con A list returned by \code{convert}.
#' @param X A data frame with some columns to be converted back according to the information stored in \code{converted}.
#'
#' Reverts conversions done by \code{convert()}.
#' @param con A list returned by \code{convert()}.
#' @param X A data frame with some columns to be converted back according to the
#' information stored in \code{converted}.
#' @return A data frame.
revert <- function(con, X = con$X) {
stopifnot(c("vars", "types", "classes") %in% names(con), is.data.frame(X))
Expand All @@ -306,8 +308,13 @@ revert <- function(con, X = con$X) {
}

f <- function(v, ty, cl) {
switch(ty, logical = as.logical(v), character = as.character(v),
special = {class(v) <- cl; v}, v)
switch(
ty,
logical = as.logical(v),
character = as.character(v),
special = {class(v) <- cl; v},
v
)
}
X[, con$vars] <- Map(f, X[, con$vars, drop = FALSE], con$types, con$classes)
X
Expand Down
7 changes: 2 additions & 5 deletions R/pmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
#' values in the prediction vector \code{xtrain} is randomly chosen and its observed
#' value in \code{ytrain} is returned.
#'
#' @importFrom stats rmultinom
#' @importFrom FNN knnx.index
#'
#' @param xtrain Vector with predicted values in the training data.
#' Can be of type logical, numeric, character, or factor.
#' @param xtest Vector as \code{xtrain} with predicted values in the test data.
Expand Down Expand Up @@ -70,7 +67,7 @@ pmm <- function(xtrain, xtest, ytrain, k = 1L, seed = NULL) {

# STEP 2: PMM based on k-nearest neightbour.
k <- min(k, length(xtrain))
nn <- knnx.index(xtrain, xtest, k)
take <- t(rmultinom(nt, 1L, rep(1L, k)))
nn <- FNN::knnx.index(xtrain, xtest, k)
take <- t(stats::rmultinom(nt, 1L, rep(1L, k)))
ytrain[rowSums(nn * take)]
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# missRanger <a href='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/mayer79/missRanger'><img src='man/figures/logo.png' align="right" height="139" /></a>
# {missRanger} <a href='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/mayer79/missRanger'><img src='man/figures/logo.png' align="right" height="139" /></a>

<!-- badges: start -->

Expand Down
32 changes: 17 additions & 15 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
# missRanger 2.2.0
# missRanger 2.2.1

- removed suggested dependencies dplyr, mice, survival
- improved documentation
Dear CRAN team

## R CMD check

checking for unstated dependencies in examples ... OK
This is a small update, mainly aiming at replacing "importFrom" by "::" logic, plus
some documentation improvement.

WARNING
'qpdf' is needed for checks on size reduction of PDFs
## R CMD check

checking for future file timestamps ... NOTE
unable to verify current time
- WARNING: 'qpdf' is needed for checks on size reduction of PDFs
- NOTE: unable to verify current time
- NOTE: no command 'tidy' found

## RHub

Note: lastMiKTeXException

## Winbuilder

Status: OK

## REVDEP

- OK: 7
- BROKEN: 0

## Reverse dependency check of 7 packages

- hdImpute 0.1.1 -- E: 0 | W: 0 | N: 0 - marginaleffects 0.11.0 -- E: 0 | W: 0 | N: 0 - mlim 0.3.0 -- E: 0 | W: 0 | N: 0 - NADIA 0.4.2 -- E: 0 | W: 0 | N: 1 - outForest 0.1.2 -- E: 0 | W: 0 | N: 0 - wiseR 1.0.1 -- E: 0 | W: 0 | N: 3
- worcs 0.1.10 -- E: 0 | W: 0 | N: 0
OK: 7
BROKEN: 0
14 changes: 8 additions & 6 deletions man/convert.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/missRanger.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 300e9fd

Please sign in to comment.