From 61cc1d00d7dfb75bf70aaa9e255210645307e6cf Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Thu, 17 Aug 2017 09:57:05 -0300 Subject: [PATCH 1/7] Implementing ML functions --- R/sits_machine_learning.R | 156 ++++++++++++++++++++++++++++++++++---- R/sits_table.R | 1 + 2 files changed, 142 insertions(+), 15 deletions(-) diff --git a/R/sits_machine_learning.R b/R/sits_machine_learning.R index 135275345..6bae309e4 100644 --- a/R/sits_machine_learning.R +++ b/R/sits_machine_learning.R @@ -26,7 +26,7 @@ #' In the context of time series classification, the only relevant types are "C-classification" and "nu-classification". #' Please refer to the documentation in that package for more details. #' -#' @param data.tb a SITS tibble time series with an alignment column +#' @param data_tb a SITS tibble time series with an alignment column #' @param kernel the kernel used in training and predicting (options = linear, polynomial, radial basis, sigmoid) #' @param degree exponential of polynomial type kernel #' @param coef0 parameter needed for kernels of type polynomial and sigmoid (default: 0) @@ -36,20 +36,20 @@ #' @return result.svm an svm model fit for the input data #' @export #' -sits_train_svm <- function(data.tb = NULL, kernel = "linear", +sits_train_svm <- function(data_tb = NULL, kernel = "linear", degree = 3, coef0 = 0, cost = 100, tolerance = 0.001, epsilon = 0.1){ # is the input data the result of a TWDTW matching function? - ensurer::ensure_that(data.tb, "matches" %in% names (.), err_desc = "sits_train_svm: input data does not contain TWDTW matches") + ensurer::ensure_that(data_tb, "matches" %in% names (.), err_desc = "sits_train_svm: input data does not contain TWDTW matches") # Spread TWDTW matches - spread.tb <- .sits_spread_matches(data.tb) + spread_tb <- .sits_spread_matches(data_tb) - categories <- names(spread.tb)[-2:0] + categories <- names(spread_tb)[-2:0] lognomes <- paste0('log(', categories, ')') formula1 <- stats::as.formula(paste("factor(reference) ~ ", paste(lognomes, collapse = " + "))) - result.svm <- e1071::svm(formula1, data = spread.tb, + result.svm <- e1071::svm(formula1, data = spread_tb, type = "C-classification", kernel = kernel, degree = degree, epsilon = epsilon, cost = cost) } @@ -80,7 +80,7 @@ sits_train_svm <- function(data.tb = NULL, kernel = "linear", #' @export #' sits_train <- function(data_tb, training_method = sits_svm(formula = sits_formula_logref(predictors_index = -2:0), kernel = "linear", - degree = 3, coef0 = 0, tolerance = 0.001, epsilon = 0.1, ...)){ + degree = 3, coef0 = 0, tolerance = 0.001, epsilon = 0.1)){ # is the input data the result of a TWDTW matching function? ensurer::ensure_that(data_tb, "matches" %in% names (.), err_desc = "sits_train: input data does not contain TWDTW matches") @@ -89,7 +89,7 @@ sits_train <- function(data_tb, training_method = sits_svm(formula = sits_formul ensurer::ensure_that(training_method, class(.) == "function", err_desc = "sits_train: train_method is not a valid function") # Spread TWDTW matches - spread_tb <- .sits_spread_matches(data.tb) + spread_tb <- .sits_spread_matches(data_tb) # compute the training method by the given data result <- training_method(spread_tb) @@ -109,7 +109,7 @@ sits_train <- function(data_tb, training_method = sits_svm(formula = sits_formul #' #' @param data_tb a SITS tibble time series with an alignment column. If data_tb is NULL, the function returns #' a function to be called further to compute svm training model according to given parameters -#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_svm) +#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) #' @param kernel the kernel used in training and predicting (options = linear, polynomial, radial basis, sigmoid) #' @param degree exponential of polynomial type kernel #' @param coef0 parameter needed for kernels of type polynomial and sigmoid (default: 0) @@ -144,6 +144,132 @@ sits_svm <- function(data_tb = NULL, formula = sits_formula_logref(), kernel = " return(result) } +#' @title Train SITS classifiction models +#' @name sits_lda +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Performs a linear discriminant analysis (lda) to classify data. +#' This function is a front-end to the "lda" method in the "MASS" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param data_tb a SITS tibble time series with an `matches` column. If `data_tb` is NULL, the function returns +#' a pre-parameterized function to be called further in order to compute `lda` training model +#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) +#' @param ... other parameters to be passed to MASS::lda function +#' @return result either an MASS::lda class or an function prepared that can be called further to compute lda training model +#' @export +#' +sits_lda <- function(data_tb = NULL, formula = sits_formula_logref(), ...) { + + # function that returns MASS::lda model based on a sits sample tibble + result_fun <- function(tb){ + + # if parameter formula is a function call it passing as argument the input data sample. The function must return a valid formula. + if (class(formula) == "function") + formula <- formula(tb) + + # call MASS::lda method and return the trained lda model + result_lda <- MASS::lda(formula = formula, data = tb, ...) + return(result_lda) + } + + # if no data is given, we prepare a function to compute lda as model + if (is.null(data_tb)) + result <- result_fun + # ...otherwise compute lda model and return it + else + result <- result_fun(data_tb) + return(result) +} + +#' @title Train SITS classifiction models +#' @name sits_mlr +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Use multinomial log-linear (mlr) fitting model via neural networks to classify data. +#' This function is a front-end to the "multinom" method in the "nnet" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param data_tb a SITS tibble time series with an `matches` column. If `data_tb` is NULL, the function returns +#' a pre-parameterized function to be called further in order to compute `multinom` training model +#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) +#' @param ... other parameters to be passed to nnet::multinom function +#' @return result either an nnet::multinom class or an function prepared that can be called further to compute multinom training model +#' @export +#' +sits_mlr <- function(data_tb = NULL, formula = sits_formula_logref(), ...) { + + # function that returns nnet::multinom model based on a sits sample tibble + result_fun <- function(tb){ + + # if parameter formula is a function call it passing as argument the input data sample. The function must return a valid formula. + if (class(formula) == "function") + formula <- formula(tb) + + # call nnet::multinom method and return the trained multinom model + result_mlr <- nnet::multinom(formula = formula, data = tb, ...) + return(result_mlr) + } + + # if no data is given, we prepare a function to compute multinom as model + if (is.null(data_tb)) + result <- result_fun + # ...otherwise compute multinom model and return it + else + result <- result_fun(data_tb) + return(result) +} + +#' @title Train SITS classifiction models +#' @name sits_glm +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Use generalized liner model (glm) via penalized maximim likelihood to classify data. +#' This function is a front-end to the "glmnet" method in the "glmnet" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param data_tb a SITS tibble time series with an `matches` column. If `data_tb` is NULL, the function returns +#' a pre-parameterized function to be called further in order to compute `multinom` training model +#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) +#' @param ... other parameters to be passed to nnet::multinom function +#' @return result either an nnet::multinom class or an function prepared that can be called further to compute multinom training model +#' @export +#' +sits_glm <- function(data_tb = NULL, formula = sits_formula_logref(), ...) { + + # function that returns nnet::multinom model based on a sits sample tibble + result_fun <- function(tb){ + + # if parameter formula is a function call it passing as argument the input data sample. The function must return a valid formula. + if (class(formula) == "function") + formula <- formula(tb) + + # call nnet::multinom method and return the trained multinom model + result_mlr <- nnet::multinom(formula = formula, data = tb, ...) + return(result_mlr) + } + + # if no data is given, we prepare a function to compute multinom as model + if (is.null(data_tb)) + result <- result_fun + # ...otherwise compute multinom model and return it + else + result <- result_fun(data_tb) + return(result) +} + #' @title Train SITS classifiction models #' @name sits_formula_logref #' @@ -192,21 +318,21 @@ sits_formula_logref <- function(predictors_index = NULL){ #' @description Given a SITS tibble time series and a model trained by \code{\link[sits]{sits_train_svm}}, #' returns a SITS tibble with the classification. #' -#' @param data.tb a SITS tibble time series +#' @param data_tb a SITS tibble time series #' @param model a model trained by \code{\link[sits]{sits_train_svm}} #' @param ... other parameters to pass to \code{\link[sits]{sits_patterns}} and #' \code{\link[sits]{sits_TWDTW_matches}} #' #' @export -sits_predict <- function(data.tb = NULL, model){ +sits_predict <- function(data_tb = NULL, model){ # is the input data the result of a TWDTW matching function? - ensurer::ensure_that(data.tb, "matches" %in% names (.), err_desc = "sits_train_svm: input data does not contain TWDTW matches") + ensurer::ensure_that(data_tb, "matches" %in% names (.), err_desc = "sits_train_svm: input data does not contain TWDTW matches") # Spread TWDTW matches - spread.tb <- .sits_spread_matches(data.tb) + spread_tb <- .sits_spread_matches(data_tb) - data.tb$predicted <- as.character(stats::predict(model, newdata = spread.tb)) + data_tb$predicted <- as.character(stats::predict(model, newdata = spread_tb)) - return(data.tb) + return(data_tb) } diff --git a/R/sits_table.R b/R/sits_table.R index 3ea347d76..5581b2406 100644 --- a/R/sits_table.R +++ b/R/sits_table.R @@ -749,6 +749,7 @@ sits_relabel_conv <- function (file = NULL){ #' @author Victor Maus, \email{vwmaus1@@gmail.com} #' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} #' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} #' #' @description Given a SITS tibble with a matches, returns a tibble whose columns have #' the reference label and the TWDTW distances for each temporal pattern. From f4fa2441849287b614576d4f397ca1acf086e671 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Fri, 18 Aug 2017 12:45:17 -0300 Subject: [PATCH 2/7] Update ML functions --- R/sits_machine_learning.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/sits_machine_learning.R b/R/sits_machine_learning.R index be7d11a04..724f98092 100644 --- a/R/sits_machine_learning.R +++ b/R/sits_machine_learning.R @@ -180,21 +180,21 @@ sits_mlr <- function(data.tb = NULL, formula = sits_formula_logref(), ...) { #' @param data.tb a SITS tibble time series with an `matches` column. If `data.tb` is NULL, the function returns #' a pre-parameterized function to be called further in order to compute `multinom` training model #' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) -#' @param ... other parameters to be passed to nnet::multinom function -#' @return result either an nnet::multinom class or an function prepared that can be called further to compute multinom training model +#' @param ... other parameters to be passed to glmnet::multinom function +#' @return result either an glmnet::multinom class or an function prepared that can be called further to compute multinom training model #' @export #' sits_glm <- function(data.tb = NULL, formula = sits_formula_logref(), ...) { - # function that returns nnet::multinom model based on a sits sample tibble + # function that returns glmnet::multinom model based on a sits sample tibble result_fun <- function(tb){ # if parameter formula is a function call it passing as argument the input data sample. The function must return a valid formula. if (class(formula) == "function") formula <- formula(tb) - # call nnet::multinom method and return the trained multinom model - result_mlr <- nnet::multinom(formula = formula, data = tb, ...) + # call glmnet::multinom method and return the trained multinom model + result_mlr <- glmnet::glmnet(formula = formula, data = tb, ...) return(result_mlr) } From ec8752edb0bfb5775cc9d40d8ff84674f8116180 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Fri, 18 Aug 2017 15:33:28 -0300 Subject: [PATCH 3/7] New ML functions --- DESCRIPTION | 4 ++-- NAMESPACE | 3 +++ R/sits_assessment.R | 13 ++++++++----- R/sits_init.R | 4 +++- R/sits_machine_learning.R | 2 +- conf_matrix.json | 1 - man/sits_glm.Rd | 33 +++++++++++++++++++++++++++++++++ man/sits_lda.Rd | 33 +++++++++++++++++++++++++++++++++ man/sits_mlr.Rd | 33 +++++++++++++++++++++++++++++++++ man/sits_spread_matches.Rd | 2 ++ man/sits_svm.Rd | 4 ++-- 11 files changed, 120 insertions(+), 12 deletions(-) delete mode 100644 conf_matrix.json create mode 100644 man/sits_glm.Rd create mode 100644 man/sits_lda.Rd create mode 100644 man/sits_mlr.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 85c8663fc..764066d84 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: sits Type: Package Title: Satellite Image Time Series Analysis -Version: 0.8.16 -Date: 2017-08-16 +Version: 0.8.18 +Date: 2017-08-18 Authors@R: c(person('Gilberto', 'Camara', role = c('aut', 'cre'), email = 'gilberto.camara@inpe.br'), person('Rolf', 'Simoes', role = c('aut'), email = 'rolf.simoes@inpe.br'), person('Victor', 'Maus', role = c('aut'), email = 'vwmaus1@gmail.com'), diff --git a/NAMESPACE b/NAMESPACE index ad0663d9a..335c88316 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(sits_formula_logref) export(sits_gam) export(sits_getcovWTSS) export(sits_getdata) +export(sits_glm) export(sits_group_bylatlong) export(sits_infoWTSS) export(sits_interp) @@ -31,10 +32,12 @@ export(sits_kohonen) export(sits_labels) export(sits_labels_sample) export(sits_lag_diff) +export(sits_lda) export(sits_linear_interp) export(sits_max_colors) export(sits_merge) export(sits_missing_values) +export(sits_mlr) export(sits_mutate) export(sits_patterns) export(sits_plot) diff --git a/R/sits_assessment.R b/R/sits_assessment.R index d89b645f3..975bed69d 100644 --- a/R/sits_assessment.R +++ b/R/sits_assessment.R @@ -40,11 +40,14 @@ sits_accuracy <- function(pred.vec, ref.vec, pred_sans_ext = FALSE, conv.lst = N } if (NCOL(conf.mtx) != NROW (conf.mtx)) { - missing_names = colnames (conf.mtx) [!(colnames(conf.mtx) %in% row.names(conf.mtx))] - for (i in 1:length (missing_names)) { - vz <- rep (0, NCOL(conf.mtx)) - conf.mtx <- rbind (conf.mtx, missing_names[i] = vz) - } + new.mtx <- matrix(rep(0, NROW(conf.mtx) * NCOL(conf.mtx)), nrow = NROW(conf.mtx), ncol = NCOL(conf.mtx)) + new.mtx[colnames(conf.mtx) %in% row.names(conf.mtx), ] <- conf.mtx + # missing_names = colnames (conf.mtx) [!(colnames(conf.mtx) %in% row.names(conf.mtx))] + # for (i in 1:length (missing_names)) { + # vz <- rep (0, NCOL(conf.mtx)) + # conf.mtx <- rbind (conf.mtx, missing_names[i] = vz) + # } + conf.mtx <- new.mtx } # ensures that the confusion matrix is square ensurer::ensure_that(conf.mtx, NCOL(.) == NROW(.), diff --git a/R/sits_init.R b/R/sits_init.R index f17747604..eca8450fe 100644 --- a/R/sits_init.R +++ b/R/sits_init.R @@ -1,8 +1,10 @@ +# On load .onAttach = function(lib, pkg){ packageStartupMessage( sprintf("Loaded sits v%s. See ?sits for help, citation(\"sits\") for use in publication.\n", utils::packageDescription("sits")$Version) ) } + # Include the following global variables in the SITS package # utils::globalVariables(c(".", "%>%", "Index", "value", "variable", "cond", @@ -18,6 +20,6 @@ utils::globalVariables(c(".", "%>%", "Index", "value", "variable", "cond", #' @import dtwSat #' @importFrom magrittr %>% #' @usage lhs \%>\% rhs -#' #' @useDynLib sits, .registration = TRUE +#' NULL diff --git a/R/sits_machine_learning.R b/R/sits_machine_learning.R index 3cbec9ac5..e573f40fa 100644 --- a/R/sits_machine_learning.R +++ b/R/sits_machine_learning.R @@ -263,7 +263,7 @@ sits_predict <- function(data.tb = NULL, model){ # Spread TWDTW matches spread.tb <- sits_spread_matches(data.tb) - data.tb$predicted <- as.character(stats::predict(model, newdata = spread_tb)) + data.tb$predicted <- as.character(stats::predict(model, newdata = spread.tb)) return(data.tb) } diff --git a/conf_matrix.json b/conf_matrix.json deleted file mode 100644 index 811595c70..000000000 --- a/conf_matrix.json +++ /dev/null @@ -1 +0,0 @@ -["Pasture", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Cerrado", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Pasture", "Pasture", "Cerrado", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Cerrado", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture", "Pasture"] diff --git a/man/sits_glm.Rd b/man/sits_glm.Rd new file mode 100644 index 000000000..74fd72f42 --- /dev/null +++ b/man/sits_glm.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_machine_learning.R +\name{sits_glm} +\alias{sits_glm} +\title{Train SITS classifiction models} +\usage{ +sits_glm(data.tb = NULL, formula = sits_formula_logref(), ...) +} +\arguments{ +\item{data.tb}{a SITS tibble time series with an `matches` column. If `data.tb` is NULL, the function returns +a pre-parameterized function to be called further in order to compute `multinom` training model} + +\item{formula}{a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref)} + +\item{...}{other parameters to be passed to glmnet::multinom function} +} +\value{ +result either an glmnet::multinom class or an function prepared that can be called further to compute multinom training model +} +\description{ +Use generalized liner model (glm) via penalized maximim likelihood to classify data. +This function is a front-end to the "glmnet" method in the "glmnet" package. +Please refer to the documentation in that package for more details. +} +\author{ +Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@ipea.gov.br} + +Victor Maus, \email{vwmaus1@gmail.com} + +Gilberto Camara, \email{gilberto.camara@inpe.br} + +Rolf Simoes, \email{rolf.simoes@inpe.br} +} diff --git a/man/sits_lda.Rd b/man/sits_lda.Rd new file mode 100644 index 000000000..27a404ce2 --- /dev/null +++ b/man/sits_lda.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_machine_learning.R +\name{sits_lda} +\alias{sits_lda} +\title{Train SITS classifiction models} +\usage{ +sits_lda(data.tb = NULL, formula = sits_formula_logref(), ...) +} +\arguments{ +\item{data.tb}{a SITS tibble time series with an `matches` column. If `data.tb` is NULL, the function returns +a pre-parameterized function to be called further in order to compute `lda` training model} + +\item{formula}{a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref)} + +\item{...}{other parameters to be passed to MASS::lda function} +} +\value{ +result either an MASS::lda class or an function prepared that can be called further to compute lda training model +} +\description{ +Performs a linear discriminant analysis (lda) to classify data. +This function is a front-end to the "lda" method in the "MASS" package. +Please refer to the documentation in that package for more details. +} +\author{ +Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@ipea.gov.br} + +Victor Maus, \email{vwmaus1@gmail.com} + +Gilberto Camara, \email{gilberto.camara@inpe.br} + +Rolf Simoes, \email{rolf.simoes@inpe.br} +} diff --git a/man/sits_mlr.Rd b/man/sits_mlr.Rd new file mode 100644 index 000000000..3586a65c8 --- /dev/null +++ b/man/sits_mlr.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_machine_learning.R +\name{sits_mlr} +\alias{sits_mlr} +\title{Train SITS classifiction models} +\usage{ +sits_mlr(data.tb = NULL, formula = sits_formula_logref(), ...) +} +\arguments{ +\item{data.tb}{a SITS tibble time series with an `matches` column. If `data.tb` is NULL, the function returns +a pre-parameterized function to be called further in order to compute `multinom` training model} + +\item{formula}{a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref)} + +\item{...}{other parameters to be passed to nnet::multinom function} +} +\value{ +result either an nnet::multinom class or an function prepared that can be called further to compute multinom training model +} +\description{ +Use multinomial log-linear (mlr) fitting model via neural networks to classify data. +This function is a front-end to the "multinom" method in the "nnet" package. +Please refer to the documentation in that package for more details. +} +\author{ +Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@ipea.gov.br} + +Victor Maus, \email{vwmaus1@gmail.com} + +Gilberto Camara, \email{gilberto.camara@inpe.br} + +Rolf Simoes, \email{rolf.simoes@inpe.br} +} diff --git a/man/sits_spread_matches.Rd b/man/sits_spread_matches.Rd index f52a47478..b4e3e6be5 100644 --- a/man/sits_spread_matches.Rd +++ b/man/sits_spread_matches.Rd @@ -22,4 +22,6 @@ Victor Maus, \email{vwmaus1@gmail.com} Gilberto Camara, \email{gilberto.camara@inpe.br} Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@ipea.gov.br} + +Rolf Simoes, \email{rolf.simoes@inpe.br} } diff --git a/man/sits_svm.Rd b/man/sits_svm.Rd index bc4601077..328c1563e 100644 --- a/man/sits_svm.Rd +++ b/man/sits_svm.Rd @@ -9,10 +9,10 @@ sits_svm(data.tb = NULL, formula = sits_formula_logref(), tolerance = 0.001, epsilon = 0.1, ...) } \arguments{ -\item{data.tb}{a SITS tibble time series with an alignment column. If data_tb is NULL, the function returns +\item{data.tb}{a SITS tibble time series with an alignment column. If data.tb is NULL, the function returns a function to be called further to compute svm training model according to given parameters} -\item{formula}{a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_svm)} +\item{formula}{a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref)} \item{kernel}{the kernel used in training and predicting (options = linear, polynomial, radial basis, sigmoid)} From cb5111c307a421bacf8390e1f6fa67fa6db799a7 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Fri, 18 Aug 2017 19:13:15 -0300 Subject: [PATCH 4/7] Improving package cpompilation --- inst/extdata/examples/sits_test_machine_learning.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/examples/sits_test_machine_learning.R b/inst/extdata/examples/sits_test_machine_learning.R index d2e251da8..bd7075dc5 100644 --- a/inst/extdata/examples/sits_test_machine_learning.R +++ b/inst/extdata/examples/sits_test_machine_learning.R @@ -16,4 +16,4 @@ predict.tb <- sits_predict(matches.tb, obj.svm) sits_accuracy(ref.vec = predict.tb$label, pred.vec = predict.tb$predicted) -sits_kfold_validate(cerrado.tb, folds = 3) +sits_kfold_validate(cerrado.tb, folds = 2) From 8030649260cb91300f2746fc13bdba3d069eccf9 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Mon, 21 Aug 2017 15:39:25 -0300 Subject: [PATCH 5/7] New sits cluster functions --- DESCRIPTION | 4 +- NAMESPACE | 4 ++ R/sits_TWDTW.R | 2 +- R/sits_cluster.R | 125 +++++++++++++++++++++++++++++++++- man/sits_cluster.Rd | 4 +- man/sits_cluster_cleaner.Rd | 23 +++++++ man/sits_cluster_frequency.Rd | 23 +++++++ man/sits_cluster_relabel.Rd | 24 +++++++ man/sits_cluster_validity.Rd | 27 ++++++++ 9 files changed, 227 insertions(+), 9 deletions(-) create mode 100644 man/sits_cluster_cleaner.Rd create mode 100644 man/sits_cluster_frequency.Rd create mode 100644 man/sits_cluster_relabel.Rd create mode 100644 man/sits_cluster_validity.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 764066d84..f80d8473c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: sits Type: Package Title: Satellite Image Time Series Analysis -Version: 0.8.18 -Date: 2017-08-18 +Version: 0.8.21 +Date: 2017-08-21 Authors@R: c(person('Gilberto', 'Camara', role = c('aut', 'cre'), email = 'gilberto.camara@inpe.br'), person('Rolf', 'Simoes', role = c('aut'), email = 'rolf.simoes@inpe.br'), person('Victor', 'Maus', role = c('aut'), email = 'vwmaus1@gmail.com'), diff --git a/NAMESPACE b/NAMESPACE index 17fad51fc..3dfe7a0e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,10 @@ export(sits_align) export(sits_apply) export(sits_bands) export(sits_cluster) +export(sits_cluster_cleaner) +export(sits_cluster_frequency) +export(sits_cluster_relabel) +export(sits_cluster_validity) export(sits_color_name) export(sits_coverageWTSS) export(sits_createColRowSequence) diff --git a/R/sits_TWDTW.R b/R/sits_TWDTW.R index a3048c80d..901adff1a 100644 --- a/R/sits_TWDTW.R +++ b/R/sits_TWDTW.R @@ -48,7 +48,7 @@ sits_TWDTW_matches <- function (data.tb = NULL, patterns.tb = NULL, bands = NULL .sits_test_table (patterns.tb) # handle the case of null bands - if (purrr::is_null (bands)) bands <- sits_bands(tb) + if (purrr::is_null (bands)) bands <- sits_bands(data.tb) # create a tibble to store the results of the TWDTW matches matches.tb <- sits_table() diff --git a/R/sits_cluster.R b/R/sits_cluster.R index 9d1bbe1ca..108bf37f5 100644 --- a/R/sits_cluster.R +++ b/R/sits_cluster.R @@ -10,17 +10,136 @@ #' @param data.tb a tibble with input data of dtwclust. #' @param clusters a cluster structure returned from dtwclust. #' @param k the desired number of clusters -#' @param height the height of dendrogram cutting #' @return result.tb a SITS table with the clusters or clusters' members #' @export -sits_cluster <- function (data.tb, clusters, k = NULL, height = NULL) { +sits_cluster <- function (data.tb, clusters, k) { # create a table to store the results result.tb <- data.tb # cut the tree - result.tb$cluster <- stats::cutree(clusters, k, height) + result.tb$cluster <- stats::cutree(clusters, k) return (result.tb) } +#' @title Cluster validity indices +#' @name sits_cluster_validity +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Compute different cluster validity indices. This function needs +#' as input a SITS tibble with `cluster` column. It is a front-end to +#' `dtwclust::cvi` function. Please refer to the documentation in that package for more details. +#' @references "dtwclust" package (https://CRAN.R-project.org/package=dtwclust) +#' +#' @param data.tb a SITS tibble with `cluster` column. +#' @param type character vector indicating which indices are to be computed. (Default "valid") +#' @return result.vec vectors with chosen CVIs +#' @export +sits_cluster_validity <- function (data.tb, type = "valid") { + + # is the input data the result of a cluster function? + ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_cleaner: input data does not contain cluster column") + + # compute CVIs and return + result.vec <- dtwclust::cvi(a = data.tb$cluster, b = factor(data.tb$label), type = type, log.base = 10) + + return (result.vec) +} + +#' @title Cluster frequency +#' @name sits_cluster_frequency +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Computes the frequency of labels in each cluster. +#' This function needs as input a SITS tibble with `cluster` column. +#' +#' @param data.tb a SITS tibble with `cluster` column. +#' @param relative (boolean) return relative frequency? +#' @return result.mtx matrix containing all frequencies of labels in clusters +#' @export +sits_cluster_frequency <- function (data.tb, relative = FALSE) { + + # is the input data the result of a cluster function? + ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_cleaner: input data does not contain cluster column") + + # compute frequency table + result.mtx <- table(data.tb$label, data.tb$cluster) + + # compute relative frequency + if (relative) + result.mtx <- prop.table(result.mtx, margin = 2) + + # compute total row and col + result.mtx <- stats::addmargins(result.mtx, FUN = list(Total = sum), quiet = TRUE) + return (result.mtx) +} + +#' @title Cluster cleaner +#' @name sits_cluster_cleaner +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Removes SITS tibble samples of labels that are minority in each cluster. +#' This function needs as input a SITS tibble with `cluster` column. +#' +#' @param data.tb a SITS tibble with `cluster` column. +#' @param min_clu_perc minimum percentage of representativeness inside a cluster to remain in cluster. +#' @return result.tb a SITS tibble with all selected samples +#' @export +sits_cluster_cleaner <- function (data.tb, min_clu_perc) { + + # is the input data the result of a cluster function? + ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_cleaner: input data does not contain cluster column") + + # compute frequency in each cluster + freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE) + + # get those indexes whose labels represents more than `min_clu_perc` + index.mtx <- which(freq.mtx[1:NROW(freq.mtx) - 1,1:NCOL(freq.mtx) - 1] > min_clu_perc, arr.ind = TRUE, useNames = TRUE) + + # return only those samples that satisfies the `min_clu_perc` condition + filter_condition <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2], + function(lbl, clu) paste0("label=='", lbl, "' & cluster==", clu)), + collapse = " | ") + + # if no index selescted, return none + filter_condition <- ifelse(filter_condition != "", filter_condition, "FALSE") + + # filter result and return + result.tb <- dplyr::filter_(data.tb, filter_condition) + return (result.tb) +} + +#' @title Cluster cleaner +#' @name sits_cluster_relabel +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Renames the labels of SITS tibble samples according to its respective cluster. +#' This function needs as input a SITS tibble with `cluster` column. +#' +#' @param data.tb a SITS tibble with `cluster` column. +#' @param cluster_names character vector informing all cluster names. If unnamed vector is informed, +#' the index of each name will be treated as cluster code +#' @return result.tb SITS tibble with relabeled samples +#' @export +sits_cluster_relabel <- function (data.tb, cluster_names) { + + # is the input data the result of a cluster function? + ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_cleaner: input data does not contain cluster column") + + # compute new clusters names + # if an unnamed vector is given + if (is.null(names(cluster_names))) + data_cluster_names.vec <- cluster_names[data.tb$cluster] + # if a named vector is given + else + data_cluster_names.vec <- cluster_names[as.character(data.tb$cluster)] + + # fill not renamed entries with original value + data_cluster_names.vec <- ifelse(is.na(data_cluster_names.vec), data.tb$cluster, data_cluster_names.vec) + names(data_cluster_names.vec) <- NULL + + # relabel result and return + result.tb$label <- data_cluster_names.vec + return (result.tb) +} diff --git a/man/sits_cluster.Rd b/man/sits_cluster.Rd index ba07647c2..d7cca9f3b 100644 --- a/man/sits_cluster.Rd +++ b/man/sits_cluster.Rd @@ -4,7 +4,7 @@ \alias{sits_cluster} \title{Cuts a cluster tree produced by sits_dendrogram} \usage{ -sits_cluster(data.tb, clusters, k = NULL, height = NULL) +sits_cluster(data.tb, clusters, k) } \arguments{ \item{data.tb}{a tibble with input data of dtwclust.} @@ -12,8 +12,6 @@ sits_cluster(data.tb, clusters, k = NULL, height = NULL) \item{clusters}{a cluster structure returned from dtwclust.} \item{k}{the desired number of clusters} - -\item{height}{the height of dendrogram cutting} } \value{ result.tb a SITS table with the clusters or clusters' members diff --git a/man/sits_cluster_cleaner.Rd b/man/sits_cluster_cleaner.Rd new file mode 100644 index 000000000..708fa0f8a --- /dev/null +++ b/man/sits_cluster_cleaner.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_cluster.R +\name{sits_cluster_cleaner} +\alias{sits_cluster_cleaner} +\title{Cluster cleaner} +\usage{ +sits_cluster_cleaner(data.tb, min_clu_perc) +} +\arguments{ +\item{data.tb}{a SITS tibble with `cluster` column.} + +\item{min_clu_perc}{minimum percentage of representativeness inside a cluster to remain in cluster.} +} +\value{ +result.tb a SITS tibble with all selected samples +} +\description{ +Removes SITS tibble samples of labels that are minority in each cluster. +This function needs as input a SITS tibble with `cluster` column. +} +\author{ +Rolf Simoes, \email{rolf.simoes@inpe.br} +} diff --git a/man/sits_cluster_frequency.Rd b/man/sits_cluster_frequency.Rd new file mode 100644 index 000000000..542edf8de --- /dev/null +++ b/man/sits_cluster_frequency.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_cluster.R +\name{sits_cluster_frequency} +\alias{sits_cluster_frequency} +\title{Cluster frequency} +\usage{ +sits_cluster_frequency(data.tb, relative = FALSE) +} +\arguments{ +\item{data.tb}{a SITS tibble with `cluster` column.} + +\item{relative}{(boolean) return relative frequency?} +} +\value{ +result.mtx matrix containing all frequencies of labels in clusters +} +\description{ +Computes the frequency of labels in each cluster. +This function needs as input a SITS tibble with `cluster` column. +} +\author{ +Rolf Simoes, \email{rolf.simoes@inpe.br} +} diff --git a/man/sits_cluster_relabel.Rd b/man/sits_cluster_relabel.Rd new file mode 100644 index 000000000..15b2278d0 --- /dev/null +++ b/man/sits_cluster_relabel.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_cluster.R +\name{sits_cluster_relabel} +\alias{sits_cluster_relabel} +\title{Cluster cleaner} +\usage{ +sits_cluster_relabel(data.tb, cluster_names) +} +\arguments{ +\item{data.tb}{a SITS tibble with `cluster` column.} + +\item{cluster_names}{character vector informing all cluster names. If unnamed vector is informed, +the index of each name will be treated as cluster code} +} +\value{ +result.tb SITS tibble with relabeled samples +} +\description{ +Renames the labels of SITS tibble samples according to its respective cluster. +This function needs as input a SITS tibble with `cluster` column. +} +\author{ +Rolf Simoes, \email{rolf.simoes@inpe.br} +} diff --git a/man/sits_cluster_validity.Rd b/man/sits_cluster_validity.Rd new file mode 100644 index 000000000..e78ea87dc --- /dev/null +++ b/man/sits_cluster_validity.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sits_cluster.R +\name{sits_cluster_validity} +\alias{sits_cluster_validity} +\title{Cluster validity indices} +\usage{ +sits_cluster_validity(data.tb, type = "valid") +} +\arguments{ +\item{data.tb}{a SITS tibble with `cluster` column.} + +\item{type}{character vector indicating which indices are to be computed. (Default "valid")} +} +\value{ +result.vec vectors with chosen CVIs +} +\description{ +Compute different cluster validity indices. This function needs +as input a SITS tibble with `cluster` column. It is a front-end to +`dtwclust::cvi` function. Please refer to the documentation in that package for more details. +} +\references{ +"dtwclust" package (https://CRAN.R-project.org/package=dtwclust) +} +\author{ +Rolf Simoes, \email{rolf.simoes@inpe.br} +} From 9c6d17e668e4e39d25f6bae8f247e25c6bd7259b Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Mon, 21 Aug 2017 17:44:12 -0300 Subject: [PATCH 6/7] Fix sits_cluster_relabel bug --- R/sits_cluster.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/sits_cluster.R b/R/sits_cluster.R index 108bf37f5..907bca7b0 100644 --- a/R/sits_cluster.R +++ b/R/sits_cluster.R @@ -140,6 +140,7 @@ sits_cluster_relabel <- function (data.tb, cluster_names) { names(data_cluster_names.vec) <- NULL # relabel result and return + result.tb <- data.tb result.tb$label <- data_cluster_names.vec return (result.tb) } From 0359c42fc154337a648e1d47a297ba778220a361 Mon Sep 17 00:00:00 2001 From: Rolf Simoes Date: Tue, 22 Aug 2017 18:28:59 -0300 Subject: [PATCH 7/7] Add new Machine Learn functions Change sits_predict function Change model data structure returned by sits_train --- R/sits_machine_learning.R | 204 +++++++++++++- .../Modelagem_classificacao_dados_dtw_v3.R | 250 +++++++++--------- 2 files changed, 320 insertions(+), 134 deletions(-) diff --git a/R/sits_machine_learning.R b/R/sits_machine_learning.R index 4a9c11490..8437a7ede 100644 --- a/R/sits_machine_learning.R +++ b/R/sits_machine_learning.R @@ -60,7 +60,7 @@ sits_train <- function(distances.tb, tr_method = sits_svm()){ #' @param tolerance tolerance of termination criterion (default: 0.001) #' @param epsilon epsilon in the insensitive-loss function (default: 0.1) #' @param ... other parameters to be passed to e1071::svm function -#' @return result either an e1071::svm class or an function prepared that can be called further to compute svm training model +#' @return result either an model function to be passed in sits_predict or an function prepared that can be called further to compute multinom training model #' @export #' sits_svm <- function(distances.tb = NULL, formula = sits_formula_logref(), kernel = "linear", @@ -79,10 +79,194 @@ sits_svm <- function(distances.tb = NULL, formula = sits_formula_logref(), kerne # call e1071::svm method and return the trained svm model result_svm <- e1071::svm(formula = formula, data = tb, kernel = kernel, degree = degree, cost = cost, coef0 = coef0, tolerance = tolerance, epsilon = epsilon, ...) - return(result_svm) + + # construct model predict enclosure function and returns + model_predict <- function(test_distances.tb){ + return(stats::predict(result_lda, newdata = test_distances.tb)) + } + return(model_predict) + } + + result <- .sits_factory_function (distances.tb, result_fun) +} +#' @title Train SITS classifiction models +#' @name sits_lda +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Performs a linear discriminant analysis (lda) to classify data. +#' This function is a front-end to the "lda" method in the "MASS" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param distances.tb a time series with a set of distance measures for each training sample +#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) +#' @param ... other parameters to be passed to MASS::lda function +#' @return result either an model function to be passed in sits_predict or an function prepared that can be called further to compute multinom training model +#' @export +#' +sits_lda <- function(distances.tb = NULL, formula = sits_formula_logref(), ...) { + + # function that returns MASS::lda model based on a sits sample tibble + result_fun <- function(train_distances.tb){ + + # is the input data the result of a TWDTW matching function? + ensurer::ensure_that(train_distances.tb, "reference" %in% names (.), err_desc = "sits_lda: input data does not contain distance") + + # if parameter formula is a function call it passing as argument the input data sample. The function must return a valid formula. + if (class(formula) == "function") + formula <- formula(train_distances.tb) + + # call MASS::lda method and return the trained lda model + result_lda <- MASS::lda(formula = formula, data = train_distances.tb, ...) + + # construct model predict enclosure function and returns + model_predict <- function(test_distances.tb){ + return(stats::predict(result_lda, newdata = test_distances.tb)) + } + return(model_predict) + } + + result <- .sits_factory_function (distances.tb, result_fun) + return(result) +} + +#' @title Train SITS classifiction models +#' @name sits_mlr +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Use multinomial log-linear (mlr) fitting model via neural networks to classify data. +#' This function is a front-end to the "multinom" method in the "nnet" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param distances.tb a time series with a set of distance measures for each training sample +#' @param formula a symbolic description of the model to be fit. SITS offers a set of such formulas (default: sits_formula_logref) +#' @param ... other parameters to be passed to nnet::multinom function +#' @return result either an model function to be passed in sits_predict or an function prepared that can be called further to compute multinom training model +#' @export +#' +sits_mlr <- function(distances.tb = NULL, formula = sits_formula_logref(), ...) { + + # function that returns nnet::multinom model based on a sits sample tibble + result_fun <- function(train_distances.tb){ + + # is the input data the result of a TWDTW matching function? + ensurer::ensure_that(train_distances.tb, "reference" %in% names (.), err_desc = "sits_mlr: input data does not contain distance") + + # if parameter formula is a function call it passing as argument the input data sample. The function must return a valid formula. + if (class(formula) == "function") + formula <- formula(train_distances.tb) + + # call nnet::multinom method and return the trained multinom model + result_mlr <- nnet::multinom(formula = formula, data = train_distances.tb, ...) + + # construct model predict enclosure function and returns + model_predict <- function(test_distances.tb){ + return(stats::predict(result_mlr, newdata = test_distances.tb)) + } + return(model_predict) + } + + result <- .sits_factory_function (distances.tb, result_fun) + return(result) +} + +#' @title Train SITS classifiction models +#' @name sits_glm +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Use generalized liner model (glm) via penalized maximim likelihood to classify data. +#' This function is a front-end to the "cv.glmnet" method in the "glmnet" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param distances.tb a time series with a set of distance measures for each training sample +#' @param family Response type. Can be either "gaussian", "binomial", "poisson", "multinomial", "cox", or "mgaussian". (default: "multinomial") +#' @param alpha the elasticnet mixing parameter, with 0<=alpha<=1. Set alpha = 0.0 to obtain ridge model, and alpha = 1.0 to obtain lasso model). +#' (refer to `glmnet::cv.glmnet` function for more details) +#' @param lambda_kfolds number of folds to find best lambda parameter (default: 10) +#' @param ... other parameters to be passed to `glmnet::cv.glmnet` function +#' @return result either an model function to be passed in sits_predict or an function prepared that can be called further to compute multinom training model +#' @export +#' +sits_glm <- function(distances.tb = NULL, family = "multinomial", alpha = 1.0, lambda_kfolds = 10, ...) { + + # function that returns glmnet::multinom model based on a sits sample tibble + result_fun <- function(train_distances.tb){ + + # is the input data the result of a TWDTW matching function? + ensurer::ensure_that(train_distances.tb, "reference" %in% names (.), err_desc = "sits_glm: input data does not contain distance") + + # call glmnet::multinom method and return the trained multinom model + result_glm <- glmnet::cv.glmnet(y = factor(data.matrix(train_distances.tb$reference)), + x = log(data.matrix(train_distances.tb[,3:NCOL(train_distances.tb)])), + family = family, alpha = alpha, k = lambda_kfolds, ...) + + # construct model predict enclosure function and returns + model_predict <- function(test_distances.tb){ + return(stats::predict(result_glm, + s = result_glm$lambda.min, + newx = log(data.matrix(test_distances.tb[,3:NCOL(test_distances.tb)])), type = "class")) + } + return(model_predict) + } + + result <- .sits_factory_function (distances.tb, result_fun) + return(result) +} + +#' @title Train SITS classifiction models +#' @name sits_rfor +#' +#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br} +#' @author Victor Maus, \email{vwmaus1@@gmail.com} +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br} +#' +#' @description Use Random Forest algorithm to classify data. +#' This function is a front-end to the "randomForest" method in the "randomForest" package. +#' Please refer to the documentation in that package for more details. +#' +#' @param distances.tb a time series with a set of distance measures for each training sample +#' @param n_tree Number of trees to grow. This should not be set to too small a number, +#' to ensure that every input row gets predicted at least a few times. (default: 500) +#' @param node_size +#' @param ... other parameters to be passed to `randomForest::randomForest` function +#' @return result either an model function to be passed in sits_predict or an function prepared that can be called further to compute multinom training model +#' @export +#' +sits_rfor <- function(distances.tb = NULL, n_tree = 500, ...) { + + # function that returns `randomForest::randomForest` model based on a sits sample tibble + result_fun <- function(train_distances.tb){ + + # is the input data the result of a TWDTW matching function? + ensurer::ensure_that(train_distances.tb, "reference" %in% names (.), err_desc = "sits_mlr: input data does not contain distance") + + # call `randomForest::randomForest` method and return the trained multinom model + result_rfor <- randomForest::randomForest(y = data.matrix(train_distances.tb$reference), + x = log(data.matrix(train_distances.tb[,2:NCOL(train_distances.tb)])), + data = NULL, ntree = n_tree, nodesize = 1, + norm.votes = FALSE, train_distances.tb, ...) + + # construct model predict enclosure function and returns + model_predict <- function(test_distances.tb){ + return(stats::predict(result_rfor, newdata = test_distances.tb, type = "response")) + } + return(model_predict) } result <- .sits_factory_function (distances.tb, result_fun) + return(result) } #' @title Train SITS classifiction models @@ -121,7 +305,6 @@ sits_formula_logref <- function(predictors_index = -2:0){ return(result_fun) } - #' @title Predict class based on the trained models #' @name sits_predict #' @@ -133,18 +316,21 @@ sits_formula_logref <- function(predictors_index = -2:0){ #' @description Given a SITS tibble time series and a model trained by \code{\link[sits]{sits_train}}, #' returns a SITS tibble with the classification. #' -#' @param data.tb a SITS tibble time series -#' @param distances.tb a tibble with a set of distance metrics to each of the classes -#' @param model a model trained by \code{\link[sits]{sits_train}} -#' @return data.tb - a SITS tibble with the predicted label +#' @param data.tb a SITS tibble time series +#' @param distances.tb a tibble with a set of distance metrics to each of the classes +#' @param model a model trained by \code{\link[sits]{sits_train}} +#' @return data.tb a SITS tibble with the predicted label #' #' @export -sits_predict <- function(data.tb = NULL, distances.tb = NULL, model){ +sits_predict <- function(data.tb = NULL, distances.tb = NULL, model, ...){ # is the input data the result of a TWDTW matching function? ensurer::ensure_that(distances.tb, "reference" %in% names (.), err_desc = "sits_train_svm: input data does not contain TWDTW matches") - data.tb$predicted <- as.character(stats::predict(model, newdata = distances.tb)) + # is the input model a model function? + ensurer::ensure_that(model, class (.) == "function", err_desc = "sits_predict: model parameter is not a function model returned by sits_train.") + + data.tb$predicted <- as.character(model(distances.tb)) return(data.tb) } diff --git a/inst/extdata/ML/ProgramasR/Modelagem_classificacao_dados_dtw_v3.R b/inst/extdata/ML/ProgramasR/Modelagem_classificacao_dados_dtw_v3.R index 4ef7d1301..14e9feb4a 100644 --- a/inst/extdata/ML/ProgramasR/Modelagem_classificacao_dados_dtw_v3.R +++ b/inst/extdata/ML/ProgramasR/Modelagem_classificacao_dados_dtw_v3.R @@ -25,13 +25,13 @@ rm(list=ls()); #--- data used for estimations ---# #---------------------------------------------# -dir_base <- "D:\\Alex\\Pesquisa\\LandUseChange\\IIASA_GLOBIOM\\DTWMachineLearning" +dir_base <- "~/sits/inst/extdata/ML" #dir_base <- "//storage3/usuarios/AlexandreYwata/Pesquisa/LandUseChange/IIASA_GLOBIOM/SITS/inst/extdata/examples/PosProcessamentoDTW" -dir_dados <- paste0(dir_base, "\\Dados"); -dir_prog <- paste0(dir_base, "\\ProgramasR") +dir_dados <- paste0(dir_base, "/Dados"); +dir_prog <- paste0(dir_base, "/ProgramasR") -file_dados <- paste0(dir_dados, "\\mt_twdtw_distances.csv") +file_dados <- paste0(dir_dados, "/mt_twdtw_distances.csv") dados_originais <- read.csv(file_dados, stringsAsFactors=FALSE, header=TRUE, sep = ","); str(dados_originais) @@ -57,7 +57,7 @@ for (i in 1:nrow(dados)) for (j in 1:length(list_refs)) { variavel <- list_refs[j] - + if (dados[i,variavel] < min_dist) { min_dist <- dados[i,variavel] @@ -76,8 +76,8 @@ for (i in 1:nrow(dados)) # "Soybean_Comerc1" = "Double_Cropping", # "Soybean_Comerc2" = "Double_Cropping", # "Soybean_Cotton" = "Soybean_Cotton", -# "Soybean_Fallow1" = "Single_Cropping", -# "Soybean_Fallow2" = "Single_Cropping", +# "Soybean_Fallow1" = "Single_Cropping", +# "Soybean_Fallow2" = "Single_Cropping", # "Soybean_NonComerc1" = "Double_Cropping", # "Soybean_NonComerc2" = "Double_Cropping", # "Soybean_Pasture" = "Pasture", @@ -92,8 +92,8 @@ conv.lst <- c("Fallow_Cotton" = "Cotton", "Soybean_Comerc1" = "Double_Cropping", "Soybean_Comerc2" = "Double_Cropping", "Soybean_Cotton" = "Double_Cropping", - "Soybean_Fallow1" = "Single_Cropping", - "Soybean_Fallow2" = "Single_Cropping", + "Soybean_Fallow1" = "Single_Cropping", + "Soybean_Fallow2" = "Single_Cropping", "Soybean_NonComerc1" = "Double_Cropping", "Soybean_NonComerc2" = "Double_Cropping", "Soybean_Pasture" = "Pasture", @@ -146,10 +146,10 @@ table(dados$class, dados$classnum) #-----------------------------------------------------------# nomes <- names(dados); nomes -lognomes <- paste0('log(', nomes[nomes %in% c(list_refs)], ')'); +lognomes <- paste0('log(', nomes[nomes %in% c(list_refs)], ')'); paste(lognomes, collapse = " + ") -orinomes <- paste0(nomes[nomes %in% c(list_refs)]); +orinomes <- paste0(nomes[nomes %in% c(list_refs)]); paste(orinomes, collapse = " + ") formula1 <- as.formula(paste("factor(class) ~ ", paste(lognomes, collapse = " + "))); @@ -172,10 +172,10 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + categorias.lda <- lda(formula1, data=dadosTrain) summary(categorias.lda) - + categorias.lda.pred <- as.character(predict(categorias.lda, newdata = dadosTest)$class) dados[dados$folds == k, 'pred_lda'] <- categorias.lda.pred } @@ -193,10 +193,10 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + categorias.mlr <- multinom(formula1, data=dadosTrain) summary(categorias.mlr) - + categorias.mlr.pred <- as.character(predict(categorias.mlr, newdata = dadosTest)) dados[dados$folds == k, 'pred_mlr'] <- categorias.mlr.pred } @@ -222,17 +222,17 @@ for (costsvm in list_costs) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.linear <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "linear", - type="C-classification", epsilon = epsilonsvm, cost = costsvm) + + categorias.svm.linear <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "linear", + type="C-classification", epsilon = epsilonsvm, cost = costsvm) summary(categorias.svm.linear) - - categorias.svm.linear.pred <- as.character(predict(categorias.svm.linear, + + categorias.svm.linear.pred <- as.character(predict(categorias.svm.linear, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_linear'] <- categorias.svm.linear.pred } - + cv_svm_linear[counter, 1] <- costsvm cv_svm_linear[counter, 2] <- epsilonsvm cv_svm_linear[counter, 3] <- mean(dados$class == dados$pred_svm_linear) @@ -249,13 +249,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.linear <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "linear", - type="C-classification", epsilon = epsilon_opt, cost = cost_opt) + + categorias.svm.linear <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "linear", + type="C-classification", epsilon = epsilon_opt, cost = cost_opt) summary(categorias.svm.linear) - - categorias.svm.linear.pred <- as.character(predict(categorias.svm.linear, + + categorias.svm.linear.pred <- as.character(predict(categorias.svm.linear, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_linear'] <- categorias.svm.linear.pred } @@ -281,17 +281,17 @@ for (costsvm in list_costs) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.radial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "radial", - type="C-classification", epsilon = epsilonsvm, cost = costsvm) + + categorias.svm.radial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "radial", + type="C-classification", epsilon = epsilonsvm, cost = costsvm) summary(categorias.svm.radial) - - categorias.svm.radial.pred <- as.character(predict(categorias.svm.radial, + + categorias.svm.radial.pred <- as.character(predict(categorias.svm.radial, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_radial'] <- categorias.svm.radial.pred } - + cv_svm_radial[counter, 1] <- costsvm cv_svm_radial[counter, 2] <- epsilonsvm cv_svm_radial[counter, 3] <- mean(dados$class == dados$pred_svm_radial) @@ -308,13 +308,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.radial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "radial", - type="C-classification", epsilon = epsilon_opt, cost = cost_opt) + + categorias.svm.radial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "radial", + type="C-classification", epsilon = epsilon_opt, cost = cost_opt) summary(categorias.svm.radial) - - categorias.svm.radial.pred <- as.character(predict(categorias.svm.radial, + + categorias.svm.radial.pred <- as.character(predict(categorias.svm.radial, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_radial'] <- categorias.svm.radial.pred } @@ -340,17 +340,17 @@ for (costsvm in list_costs) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.polynomial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "polynomial", - type="C-classification", epsilon = epsilonsvm, cost = costsvm) + + categorias.svm.polynomial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "polynomial", + type="C-classification", epsilon = epsilonsvm, cost = costsvm) summary(categorias.svm.polynomial) - - categorias.svm.polynomial.pred <- as.character(predict(categorias.svm.polynomial, + + categorias.svm.polynomial.pred <- as.character(predict(categorias.svm.polynomial, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_polynomial'] <- categorias.svm.polynomial.pred } - + cv_svm_polynomial[counter, 1] <- costsvm cv_svm_polynomial[counter, 2] <- epsilonsvm cv_svm_polynomial[counter, 3] <- mean(dados$class == dados$pred_svm_polynomial) @@ -367,13 +367,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.polynomial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "polynomial", - type="C-classification", epsilon = epsilon_opt, cost = cost_opt) + + categorias.svm.polynomial <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "polynomial", + type="C-classification", epsilon = epsilon_opt, cost = cost_opt) summary(categorias.svm.polynomial) - - categorias.svm.polynomial.pred <- as.character(predict(categorias.svm.polynomial, + + categorias.svm.polynomial.pred <- as.character(predict(categorias.svm.polynomial, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_polynomial'] <- categorias.svm.polynomial.pred } @@ -399,17 +399,17 @@ for (costsvm in list_costs) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.sigmoid <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "sigmoid", - type="C-classification", epsilon = epsilonsvm, cost = costsvm) + + categorias.svm.sigmoid <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "sigmoid", + type="C-classification", epsilon = epsilonsvm, cost = costsvm) summary(categorias.svm.sigmoid) - - categorias.svm.sigmoid.pred <- as.character(predict(categorias.svm.sigmoid, + + categorias.svm.sigmoid.pred <- as.character(predict(categorias.svm.sigmoid, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_sigmoid'] <- categorias.svm.sigmoid.pred } - + cv_svm_sigmoid[counter, 1] <- costsvm cv_svm_sigmoid[counter, 2] <- epsilonsvm cv_svm_sigmoid[counter, 3] <- mean(dados$class == dados$pred_svm_sigmoid) @@ -426,13 +426,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - categorias.svm.sigmoid <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], - kernel = "sigmoid", - type="C-classification", epsilon = epsilon_opt, cost = cost_opt) + + categorias.svm.sigmoid <- svm(formula1, data=dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")], + kernel = "sigmoid", + type="C-classification", epsilon = epsilon_opt, cost = cost_opt) summary(categorias.svm.sigmoid) - - categorias.svm.sigmoid.pred <- as.character(predict(categorias.svm.sigmoid, + + categorias.svm.sigmoid.pred <- as.character(predict(categorias.svm.sigmoid, newdata = dadosTest[,colnames(dadosTest) %in% c(list_refs, "class")])) dados[dados$folds == k, 'pred_svm_sigmoid'] <- categorias.svm.sigmoid.pred } @@ -444,7 +444,7 @@ mean(dados$class == dados$pred_svm_sigmoid) #--- ridge multinomial regression ---# #-----------------------------------------------------------# -yDados <- data.matrix(dados[,length(list_refs)+4]); head(yDados) +yDados <- data.matrix(dados[,"class"]); head(yDados) xDados <- log(data.matrix(dados[,c(2:(length(list_refs)+1))])); head(xDados) set.seed(2104) @@ -457,13 +457,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - - yTrain <- data.matrix(dadosTrain[,length(list_refs)+4]) - yTest <- data.matrix(dadosTest[,length(list_refs)+4]) - + + yTrain <- data.matrix(dadosTrain[,"class"]) + yTest <- data.matrix(dadosTest[,"class"]) + xTrain <- log(data.matrix(dadosTrain[,c(2:(length(list_refs)+1))])) xTest <- log(data.matrix(dadosTest[,c(2:(length(list_refs)+1))])) - + categorias.ridge.pred <- predict(categorias.ridge, s=categorias.ridge$lambda.min, newx=xTest, type='class') dados[dados$folds == k, 'pred_ridge'] <- categorias.ridge.pred } @@ -490,13 +490,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + yTrain <- data.matrix(dadosTrain[,length(list_refs)+4]) yTest <- data.matrix(dadosTest[,length(list_refs)+4]) - + xTrain <- log(data.matrix(dadosTrain[,c(2:(length(list_refs)+1))])) xTest <- log(data.matrix(dadosTest[,c(2:(length(list_refs)+1))])) - + categorias.lasso.pred <- predict(categorias.lasso, s=categorias.lasso$lambda.min, newx=xTest, type='class') dados[dados$folds == k, 'pred_lasso'] <- categorias.lasso.pred } @@ -521,11 +521,11 @@ for (alphaelnet in list_alpha) categorias.elnet <- cv.glmnet(y = factor(yDados), x = xDados, family="multinomial", alpha=alphaelnet, k = kfolds) summary(categorias.elnet) elnet_lambda_opt <- categorias.elnet$lambda.min; elnet_lambda_opt - + cv_elnet_alpha[contador, 1] <- alphaelnet cv_elnet_alpha[contador, 2] <- elnet_lambda_opt cv_elnet_alpha[contador, 3] <- min(categorias.elnet$cvm) - + contador <- contador + 1 } @@ -542,13 +542,13 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + yTrain <- data.matrix(dadosTrain[,length(list_refs)+4]) yTest <- data.matrix(dadosTest[,length(list_refs)+4]) - + xTrain <- log(data.matrix(dadosTrain[,c(2:(length(list_refs)+1))])) xTest <- log(data.matrix(dadosTest[,c(2:(length(list_refs)+1))])) - + categorias.elnet.pred <- predict(categorias.elnet, s=categorias.elnet$lambda.min, newx=xTest, type='class') dados[dados$folds == k, 'pred_elnet'] <- categorias.elnet.pred } @@ -574,21 +574,21 @@ for (ntreesrf in list_ntrees) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + yTrain <- data.matrix(dadosTrain[,length(list_refs)+4]) yTest <- data.matrix(dadosTest[,length(list_refs)+4]) - + xTrain <- log(data.matrix(dadosTrain[,c(2:(length(list_refs)+1))])) xTest <- log(data.matrix(dadosTest[,c(2:(length(list_refs)+1))])) - - categorias.rfore <- randomForest(y = factor(yTrain), x = xTrain, data=NULL, - ntree=ntreesrf, nodesize = nodesizerf, norm.votes=FALSE) + + categorias.rfore <- randomForest(y = factor(yTrain), x = xTrain, data=NULL, + ntree=ntreesrf, nodesize = nodesizerf, norm.votes=FALSE) summary(categorias.rfore) - + categorias.rfore.pred <- as.character(predict(categorias.rfore, newdata = xTest, type = 'response')) dados[dados$folds == k, 'pred_rfore'] <- categorias.rfore.pred } - + cv_rfore[counter, 1] <- ntreesrf cv_rfore[counter, 2] <- nodesizerf cv_rfore[counter, 3] <- mean(dados$class == dados$pred_rfore) @@ -605,17 +605,17 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + yTrain <- data.matrix(dadosTrain[,length(list_refs)+4]) yTest <- data.matrix(dadosTest[,length(list_refs)+4]) - + xTrain <- log(data.matrix(dadosTrain[,c(2:(length(list_refs)+1))])) xTest <- log(data.matrix(dadosTest[,c(2:(length(list_refs)+1))])) - - categorias.rfore <- randomForest(y = factor(yTrain), x = xTrain, data=NULL, - ntree=ntrees_opt, nodesize = nodesize_opt, norm.votes=FALSE) + + categorias.rfore <- randomForest(y = factor(yTrain), x = xTrain, data=NULL, + ntree=ntrees_opt, nodesize = nodesize_opt, norm.votes=FALSE) summary(categorias.rfore) - + categorias.rfore.pred <- as.character(predict(categorias.rfore, newdata = xTest, type = 'response')) dados[dados$folds == k, 'pred_rfore'] <- categorias.rfore.pred } @@ -644,20 +644,20 @@ for (ntreesgbm in list_ntrees) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + h2o_dadosTrain <- as.h2o(dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")]) h2o_dadosTrain$class <- as.factor(h2o_dadosTrain$class) h2o_dadosTest <- as.h2o(dadosTest[,colnames(dadosTest) %in% list_refs]) - - categorias.h2ogbm <- h2o.gbm(y = "class", x = list_refs, + + categorias.h2ogbm <- h2o.gbm(y = "class", x = list_refs, training_frame = h2o_dadosTrain, distribution = "multinomial", ntrees = ntreesgbm, max_depth = depthgbm); - + categorias.h2ogbm.pred <- h2o.predict(categorias.h2ogbm, newdata = h2o_dadosTest)$predict; - + dados[dados$folds == k, 'pred_h2ogbm'] <- as.character(as.vector(categorias.h2ogbm.pred)); } - + cv_h2ogbm[counter, 1] <- ntreesgbm cv_h2ogbm[counter, 2] <- depthgbm cv_h2ogbm[counter, 3] <- mean(dados$class == dados$pred_h2ogbm) @@ -676,17 +676,17 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + h2o_dadosTrain <- as.h2o(dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")]) h2o_dadosTrain$class <- as.factor(h2o_dadosTrain$class) h2o_dadosTest <- as.h2o(dadosTest[,colnames(dadosTest) %in% list_refs]) - - categorias.h2ogbm <- h2o.gbm(y = "class", x = list_refs, + + categorias.h2ogbm <- h2o.gbm(y = "class", x = list_refs, training_frame = h2o_dadosTrain, distribution = "multinomial", ntrees = ntrees_opt, max_depth = depth_opt); - + categorias.h2ogbm.pred <- h2o.predict(categorias.h2ogbm, newdata = h2o_dadosTest)$predict; - + dados[dados$folds == k, 'pred_h2ogbm'] <- as.character(as.vector(categorias.h2ogbm.pred)); } @@ -716,20 +716,20 @@ for (ntreesrfore in list_ntrees) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + h2o_dadosTrain <- as.h2o(dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")]) h2o_dadosTrain$class <- as.factor(h2o_dadosTrain$class) h2o_dadosTest <- as.h2o(dadosTest[,colnames(dadosTest) %in% list_refs]) - - categorias.h20rfore <- h2o.randomForest(y = "class", x = list_refs, - training_frame = h2o_dadosTrain, - ntrees = ntreesrfore, max_depth = depthrfore) - + + categorias.h20rfore <- h2o.randomForest(y = "class", x = list_refs, + training_frame = h2o_dadosTrain, + ntrees = ntreesrfore, max_depth = depthrfore) + categorias.h20rfore.pred <- h2o.predict(categorias.h20rfore, newdata = h2o_dadosTest)$predict; - + dados[dados$folds == k, 'pred_h2orfore'] <- as.character(as.vector(categorias.h20rfore.pred)); } - + cv_h2orfore[counter, 1] <- ntreesrfore cv_h2orfore[counter, 2] <- depthrfore cv_h2orfore[counter, 3] <- mean(dados$class == dados$pred_h2orfore) @@ -748,17 +748,17 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + h2o_dadosTrain <- as.h2o(dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")]) h2o_dadosTrain$class <- as.factor(h2o_dadosTrain$class) h2o_dadosTest <- as.h2o(dadosTest[,colnames(dadosTest) %in% list_refs]) - - categorias.h20rfore <- h2o.randomForest(y = "class", x = list_refs, - training_frame = h2o_dadosTrain, - ntrees = ntrees_opt, max_depth = depth_opt) - + + categorias.h20rfore <- h2o.randomForest(y = "class", x = list_refs, + training_frame = h2o_dadosTrain, + ntrees = ntrees_opt, max_depth = depth_opt) + categorias.h20rfore.pred <- h2o.predict(categorias.h20rfore, newdata = h2o_dadosTest)$predict; - + dados[dados$folds == k, 'pred_h2orfore'] <- as.character(as.vector(categorias.h20rfore.pred)); } @@ -778,16 +778,16 @@ for (k in 1:kfolds) { dadosTrain <- dados[dados$folds != k,] dadosTest <- dados[dados$folds == k,] - + h2o_dadosTrain <- as.h2o(dadosTrain[,colnames(dadosTrain) %in% c(list_refs, "class")]) h2o_dadosTrain$class <- as.factor(h2o_dadosTrain$class) h2o_dadosTest <- as.h2o(dadosTest[,colnames(dadosTest) %in% list_refs]) - - categorias.h2odpl <- h2o.deeplearning(y = "class", x = list_refs, + + categorias.h2odpl <- h2o.deeplearning(y = "class", x = list_refs, training_frame = h2o_dadosTrain, distribution = "multinomial"); - + categorias.h2odpl.pred <- h2o.predict(categorias.h2odpl, newdata = h2o_dadosTest)$predict; - + dados[dados$folds == k, 'pred_h2odpl'] <- as.character(as.vector(categorias.h2odpl.pred)); }