e-sensing · gilbertocamara · Sep 6, 2017 · Aug 29, 2017 · Aug 30, 2017 · Aug 30, 2017
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -78,8 +78,8 @@ Imports:
 LinkingTo: 
  Rcpp
 Remotes:
-  e-sensing/wtss,
-  e-sensing/wtsps,
+ e-sensing/wtss,
+ e-sensing/wtsps,
  rolfsimoes/dtwSat
 RoxygenNote: 6.0.1
 Collate: 
@@ -100,11 +100,10 @@ Collate:
  'sits_getdata.R'
  'sits_init.R'
  'sits_kohonen.R'
- 'sits_labels.R'
  'sits_machine_learning.R'
  'sits_patterns.R'
  'sits_pipe.R'
  'sits_plot.R'
- 'sits_smooth.R'
  'sits_stack_proc.R'
  'sits_table.R'
+ 'sits_time_series.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export("sits_bands<-")
+export("sits_cluster_names<-")
+export("sits_labels<-")
 export(sits_TS_distances)
 export(sits_TWDTW_classify)
 export(sits_TWDTW_distances)
@@ -18,9 +21,10 @@ export(sits_cloud_filter)
 export(sits_cluster)
 export(sits_cluster_cleaner)
 export(sits_cluster_frequency)
-export(sits_cluster_relabel)
+export(sits_cluster_names)
 export(sits_cluster_validity)
 export(sits_color_name)
+export(sits_colors)
 export(sits_conf_fromGZ)
 export(sits_conf_fromJSON)
 export(sits_coverageWTSS)
@@ -34,6 +38,8 @@ export(sits_distance_table_from_data)
 export(sits_distances)
 export(sits_envelope)
 export(sits_exitConnection)
+export(sits_filter)
+export(sits_foreach)
 export(sits_formula_linear)
 export(sits_formula_logref)
 export(sits_formula_smooth)
@@ -59,8 +65,6 @@ export(sits_kfold_fast_validate)
 export(sits_kfold_validate)
 export(sits_kohonen)
 export(sits_labels)
-export(sits_labels_list)
-export(sits_labels_sample)
 export(sits_lda)
 export(sits_linear_interp)
 export(sits_max_colors)
@@ -75,17 +79,16 @@ export(sits_plot_dendrogram)
 export(sits_predict)
 export(sits_prune)
 export(sits_qda)
-export(sits_relabel)
-export(sits_rename)
 export(sits_rfor)
 export(sits_runProcess_WTSPS)
+export(sits_sample)
 export(sits_select)
 export(sits_sgolay)
-export(sits_smooth)
 export(sits_spread_matches)
 export(sits_stack_relabel)
 export(sits_stack_transition_relabel)
 export(sits_statusProcess_WTSPS)
+export(sits_summary)
 export(sits_svm)
 export(sits_table)
 export(sits_test_patterns)

diff --git a/R/sits_assessment.R b/R/sits_assessment.R
@@ -217,8 +217,9 @@ sits_kfold_validate <- function (data.tb, folds = 5,
 
  # does the input data exist?
  .sits_test_table (data.tb)
+
  # is the data labelled?
- ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)$label),
+ ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)),
  err_desc = "sits_cross_validate: please provide a labelled set of time series")
 
  #is the bands are not provided, deduced them from the data
@@ -306,7 +307,7 @@ sits_kfold_fast_validate <- function (data.tb, folds = 5,
  # does the input data exist?
  .sits_test_table (data.tb)
  # is the data labelled?
- ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)$label),
+ ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)),
  err_desc = "sits_cross_validate: please provide a labelled set of time series")
 
  # what are the bands of the data?
@@ -485,7 +486,7 @@ sits_test_patterns <- function (data.tb, patterns.tb, bands,
  .sits_test_table (patterns.tb)
  ensurer::ensure_that (bands, !purrr::is_null(.),
  err_desc = "sits_test_patterns: please provide the bands to be used")
- ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)$label),
+ ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)),
  err_desc = "sits_test_patterns: please provide a labelled set of time series")
 
 

diff --git a/R/sits_cluster.R b/R/sits_cluster.R
@@ -10,15 +10,20 @@
 #' @param data.tb a tibble with input data of dtwclust.
 #' @param clusters a cluster structure returned from dtwclust.
 #' @param k the desired number of clusters
+#' @param height the desired height to cut the dendrogram. At least one of k or height must be specified, k overrides height if both are given.
 #' @return result.tb a SITS table with the clusters or clusters' members
 #' @export
-sits_cluster <- function (data.tb, clusters, k) {
+sits_cluster <- function (data.tb, clusters, k = NULL, height = NULL) {
+
+ #verifies if either k or height were informed
+ ensurer::ensure_that(k, !(is.null(.) & is.null(height)),
+ err_desc = "sits_cluster: you must provide at least k or height")
 
  # create a table to store the results
  result.tb <- data.tb
 
  # cut the tree
- result.tb$cluster <- stats::cutree(clusters, k)
+ result.tb$cluster <- stats::cutree(clusters, k, height)
 
  return (result.tb)
 }
@@ -56,9 +61,10 @@ sits_cluster_validity <- function (data.tb, type = "valid") {
 #'
 #' @param data.tb a SITS tibble with `cluster` column.
 #' @param relative (boolean) return relative frequency?
+#' @param margin number indicating how to compute relative frequency (1 regarding labels, 2 regarding clusters) (default 2)
 #' @return result.mtx matrix containing all frequencies of labels in clusters
 #' @export
-sits_cluster_frequency <- function (data.tb, relative = FALSE) {
+sits_cluster_frequency <- function (data.tb, relative = FALSE, margin = 2) {
 
  # is the input data the result of a cluster function?
  ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_frequency: input data does not contain cluster column")
@@ -68,7 +74,7 @@ sits_cluster_frequency <- function (data.tb, relative = FALSE) {
 
  # compute relative frequency
  if (relative)
- result.mtx <- prop.table(result.mtx, margin = 2)
+ result.mtx <- prop.table(result.mtx, margin = margin)
 
  # compute total row and col
  result.mtx <- stats::addmargins(result.mtx, FUN = list(Total = sum), quiet = TRUE)
@@ -83,64 +89,97 @@ sits_cluster_frequency <- function (data.tb, relative = FALSE) {
 #' This function needs as input a SITS tibble with `cluster` column.
 #'
 #' @param data.tb a SITS tibble with `cluster` column.
-#' @param min_clu_perc minimum percentage of representativeness inside a cluster to remain in cluster.
+#' @param min_clu_perc minimum percentage of labels inside a cluster to remain in cluster.
+#' @param min_lab_perc minimum percentage of labels regarding its total to be keeped in cluster.
 #' @return result.tb a SITS tibble with all selected samples
 #' @export
-sits_cluster_cleaner <- function (data.tb, min_clu_perc) {
+sits_cluster_cleaner <- function (data.tb, min_clu_perc = 0.0, min_lab_perc = 0.0) {
+
+ # verify if data.tb has data
+ .sits_test_table(data.tb)
 
  # is the input data the result of a cluster function?
  ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_cleaner: input data does not contain cluster column")
 
  # compute frequency in each cluster
- freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE)
+ freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE, margin = 2)
 
  # get those indexes whose labels represents more than `min_clu_perc`
  index.mtx <- which(freq.mtx[1:NROW(freq.mtx) - 1,1:NCOL(freq.mtx) - 1] > min_clu_perc, arr.ind = TRUE, useNames = TRUE)
 
  # return only those samples that satisfies the `min_clu_perc` condition
- filter_condition <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2],
- function(lb, clu) paste0("label=='", lb, "' & cluster==", clu)),
- collapse = " | ")
+ filter_condition_clu <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2],
+ function(lb, clu) paste0("label=='", lb, "' & cluster==", clu)),
+ collapse = " | ")
+
+ # compute frequency in each label
+ freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE, margin = 1)
 
- # if no index selescted, return none
- filter_condition <- ifelse(filter_condition != "", filter_condition, "FALSE")
+ # get those indexes whose labels represents more than `min_lab_perc`
+ index.mtx <- which(freq.mtx[1:NROW(freq.mtx) - 1,1:NCOL(freq.mtx) - 1] > min_lab_perc, arr.ind = TRUE, useNames = TRUE)
+
+ # return only those samples that satisfies the `min_lab_perc` condition
+ filter_condition_lab <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2],
+ function(lb, clu) paste0("label=='", lb, "' & cluster==", clu)),
+ collapse = " | ")
+
+ # if no index selected, return none
+ filter_condition <- ifelse(filter_condition_clu != "",
+ ifelse(filter_condition_lab != "", paste0("(", filter_condition_clu, ") & (", filter_condition_lab, ")"), "FALSE"),
+ "FALSE")
 
  # filter result and return
  result.tb <- dplyr::filter_(data.tb, filter_condition)
  return (result.tb)
 }
 
-#' @title Cluster cleaner
-#' @name sits_cluster_relabel
+#' @title Cluster label
+#' @name sits_cluster_names
 #' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
 #'
 #' @description Renames the labels of SITS tibble samples according to its respective cluster.
 #' This function needs as input a SITS tibble with `cluster` column.
 #'
 #' @param data.tb a SITS tibble with `cluster` column.
-#' @param cluster_names  character vector informing all cluster names. If unnamed vector is informed,
+#' @return cluster_names character vector informing all cluster names. If unnamed vector is informed,
 #' the index of each name will be treated as cluster code
-#' @return result.tb SITS tibble with relabeled samples
 #' @export
-sits_cluster_relabel <-  function (data.tb, cluster_names) {
+sits_cluster_names <- function (data.tb) {
 
  # is the input data the result of a cluster function?
- ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_relabel: input data does not contain cluster column")
+ ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_names: input data does not contain cluster column")
 
- # compute new clusters names
- # if an unnamed vector is given
- if (is.null(names(cluster_names)))
- data_cluster_names.vec <- cluster_names[data.tb$cluster] %>% unlist()
- # if a named vector is given
- else
- data_cluster_names.vec <- as.character(cluster_names[as.character(data.tb$cluster)] %>% unlist())
+ # compute clusters names and return
+ cluster_names <- sort(base::unique(data.tb$cluster))
+ return (cluster_names)
+}
 
- # fill not renamed entries with original value
- data_cluster_names.vec <- ifelse(is.na(data_cluster_names.vec), data.tb$cluster, data_cluster_names.vec)
- names(data_cluster_names.vec) <- NULL
+#' @title Cluster names
+#' @name `sits_cluster_names<-`
+#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
+#'
+#' @description Renames the labels of SITS tibble samples according to its respective cluster.
+#' This function needs as input a SITS tibble with `cluster` column.
+#'
+#' @param data.tb a SITS tibble with `cluster` column.
+#' @param value character vector informing all cluster names. If unnamed vector is informed,
+#' the index of each name will be treated as cluster code
+#' @return data.tb SITS tibble with relabeled samples
+#' @export
+`sits_cluster_names<-` <- function (data.tb, value) {
+
+ # is the input data the result of a cluster function?
+ ensurer::ensure_that(data.tb, "cluster" %in% names (.),
+ err_desc = "sits_cluster_names: input data does not contain cluster column")
+
+ # verify if the informed cluster names has the same length of clusters names
+ ensurer::ensure_that(data.tb, length(sits_cluster_names(.)) == length(value),
+ err_desc = "sits_cluster_name: informed names has length different of the number of clusters")
+
+ # compute new clusters names
+ data_cluster_names.vec <- value[data.tb$cluster] %>% unlist()
 
  # relabel result and return
- result.tb <- data.tb
- result.tb$label <- data_cluster_names.vec
- return (result.tb)
+ data.tb$label <- data_cluster_names.vec
+ return (data.tb)
 }
diff --git a/R/sits_colors.R b/R/sits_colors.R
@@ -132,21 +132,34 @@ sits_color_name <- function(name = NULL){
 #'
 #' @description Number of colors available in brewer color schemes
 #'
-#' @param name  name of the brewer color set
+#' @param brewer name of the brewer color set
 #' @return numeric number of available colors
 #' @export
-sits_max_colors <- function(name = NULL){
- if (is.null(name))
+sits_max_colors <- function(brewer = NULL){
+ if (is.null(brewer))
  return(.sits_brewerRGB %>% purrr::map(function(sch){
  sum((sch %>%
  purrr::map(function(n) length(n)) %>%
  as.numeric()) > 1)
  }))
- return(sum((.sits_brewerRGB[[sits_color_name(name)]] %>%
+ return(sum((.sits_brewerRGB[[sits_color_name(brewer)]] %>%
  purrr::map(function(n) length(n)) %>%
  as.numeric()) > 1))
 }
 
+#' @title Brewer color schemes
+#' @name sits_colors
+#'
+#' @description Number of colors available in brewer color schemes
+#'
+#' @param brewer name of the brewer color set
+#' @param n number of desirable colors (default 3)
+#' @return list list of rgb colors
+#' @export
+sits_colors <- function(brewer, n = 3){
+ return(.sits_brewerRGB[[sits_color_name(name)]][[as.character(n)]])
+}
+
 # Brewer color set constant
 # based on https://colorbrewer2.org colors' schemes
 .sits_brewerRGB = tibble::lst(

diff --git a/R/sits_dendrogram.R b/R/sits_dendrogram.R
@@ -40,7 +40,6 @@ sits_dendrogram <- function (data.tb, bands = NULL,
  distance = dist_method,
  control = dtwclust::hierarchical_control(method = grouping_method), ...)
 
-
  # return the clusters
  return (clusters)
 }