Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New functions and improvements #18

Merged
merged 13 commits into from
Sep 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ Imports:
LinkingTo:
Rcpp
Remotes:
e-sensing/wtss,
e-sensing/wtsps,
e-sensing/wtss,
e-sensing/wtsps,
rolfsimoes/dtwSat
RoxygenNote: 6.0.1
Collate:
Expand All @@ -100,11 +100,10 @@ Collate:
'sits_getdata.R'
'sits_init.R'
'sits_kohonen.R'
'sits_labels.R'
'sits_machine_learning.R'
'sits_patterns.R'
'sits_pipe.R'
'sits_plot.R'
'sits_smooth.R'
'sits_stack_proc.R'
'sits_table.R'
'sits_time_series.R'
15 changes: 9 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export("sits_bands<-")
export("sits_cluster_names<-")
export("sits_labels<-")
export(sits_TS_distances)
export(sits_TWDTW_classify)
export(sits_TWDTW_distances)
Expand All @@ -18,9 +21,10 @@ export(sits_cloud_filter)
export(sits_cluster)
export(sits_cluster_cleaner)
export(sits_cluster_frequency)
export(sits_cluster_relabel)
export(sits_cluster_names)
export(sits_cluster_validity)
export(sits_color_name)
export(sits_colors)
export(sits_conf_fromGZ)
export(sits_conf_fromJSON)
export(sits_coverageWTSS)
Expand All @@ -34,6 +38,8 @@ export(sits_distance_table_from_data)
export(sits_distances)
export(sits_envelope)
export(sits_exitConnection)
export(sits_filter)
export(sits_foreach)
export(sits_formula_linear)
export(sits_formula_logref)
export(sits_formula_smooth)
Expand All @@ -59,8 +65,6 @@ export(sits_kfold_fast_validate)
export(sits_kfold_validate)
export(sits_kohonen)
export(sits_labels)
export(sits_labels_list)
export(sits_labels_sample)
export(sits_lda)
export(sits_linear_interp)
export(sits_max_colors)
Expand All @@ -75,17 +79,16 @@ export(sits_plot_dendrogram)
export(sits_predict)
export(sits_prune)
export(sits_qda)
export(sits_relabel)
export(sits_rename)
export(sits_rfor)
export(sits_runProcess_WTSPS)
export(sits_sample)
export(sits_select)
export(sits_sgolay)
export(sits_smooth)
export(sits_spread_matches)
export(sits_stack_relabel)
export(sits_stack_transition_relabel)
export(sits_statusProcess_WTSPS)
export(sits_summary)
export(sits_svm)
export(sits_table)
export(sits_test_patterns)
Expand Down
7 changes: 4 additions & 3 deletions R/sits_assessment.R
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,9 @@ sits_kfold_validate <- function (data.tb, folds = 5,

# does the input data exist?
.sits_test_table (data.tb)

# is the data labelled?
ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)$label),
ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)),
err_desc = "sits_cross_validate: please provide a labelled set of time series")

#is the bands are not provided, deduced them from the data
Expand Down Expand Up @@ -306,7 +307,7 @@ sits_kfold_fast_validate <- function (data.tb, folds = 5,
# does the input data exist?
.sits_test_table (data.tb)
# is the data labelled?
ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)$label),
ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)),
err_desc = "sits_cross_validate: please provide a labelled set of time series")

# what are the bands of the data?
Expand Down Expand Up @@ -485,7 +486,7 @@ sits_test_patterns <- function (data.tb, patterns.tb, bands,
.sits_test_table (patterns.tb)
ensurer::ensure_that (bands, !purrr::is_null(.),
err_desc = "sits_test_patterns: please provide the bands to be used")
ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)$label),
ensurer::ensure_that (data.tb, !("NoClass" %in% sits_labels(.)),
err_desc = "sits_test_patterns: please provide a labelled set of time series")


Expand Down
101 changes: 70 additions & 31 deletions R/sits_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,20 @@
#' @param data.tb a tibble with input data of dtwclust.
#' @param clusters a cluster structure returned from dtwclust.
#' @param k the desired number of clusters
#' @param height the desired height to cut the dendrogram. At least one of k or height must be specified, k overrides height if both are given.
#' @return result.tb a SITS table with the clusters or clusters' members
#' @export
sits_cluster <- function (data.tb, clusters, k) {
sits_cluster <- function (data.tb, clusters, k = NULL, height = NULL) {

#verifies if either k or height were informed
ensurer::ensure_that(k, !(is.null(.) & is.null(height)),
err_desc = "sits_cluster: you must provide at least k or height")

# create a table to store the results
result.tb <- data.tb

# cut the tree
result.tb$cluster <- stats::cutree(clusters, k)
result.tb$cluster <- stats::cutree(clusters, k, height)

return (result.tb)
}
Expand Down Expand Up @@ -56,9 +61,10 @@ sits_cluster_validity <- function (data.tb, type = "valid") {
#'
#' @param data.tb a SITS tibble with `cluster` column.
#' @param relative (boolean) return relative frequency?
#' @param margin number indicating how to compute relative frequency (1 regarding labels, 2 regarding clusters) (default 2)
#' @return result.mtx matrix containing all frequencies of labels in clusters
#' @export
sits_cluster_frequency <- function (data.tb, relative = FALSE) {
sits_cluster_frequency <- function (data.tb, relative = FALSE, margin = 2) {

# is the input data the result of a cluster function?
ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_frequency: input data does not contain cluster column")
Expand All @@ -68,7 +74,7 @@ sits_cluster_frequency <- function (data.tb, relative = FALSE) {

# compute relative frequency
if (relative)
result.mtx <- prop.table(result.mtx, margin = 2)
result.mtx <- prop.table(result.mtx, margin = margin)

# compute total row and col
result.mtx <- stats::addmargins(result.mtx, FUN = list(Total = sum), quiet = TRUE)
Expand All @@ -83,64 +89,97 @@ sits_cluster_frequency <- function (data.tb, relative = FALSE) {
#' This function needs as input a SITS tibble with `cluster` column.
#'
#' @param data.tb a SITS tibble with `cluster` column.
#' @param min_clu_perc minimum percentage of representativeness inside a cluster to remain in cluster.
#' @param min_clu_perc minimum percentage of labels inside a cluster to remain in cluster.
#' @param min_lab_perc minimum percentage of labels regarding its total to be keeped in cluster.
#' @return result.tb a SITS tibble with all selected samples
#' @export
sits_cluster_cleaner <- function (data.tb, min_clu_perc) {
sits_cluster_cleaner <- function (data.tb, min_clu_perc = 0.0, min_lab_perc = 0.0) {

# verify if data.tb has data
.sits_test_table(data.tb)

# is the input data the result of a cluster function?
ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_cleaner: input data does not contain cluster column")

# compute frequency in each cluster
freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE)
freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE, margin = 2)

# get those indexes whose labels represents more than `min_clu_perc`
index.mtx <- which(freq.mtx[1:NROW(freq.mtx) - 1,1:NCOL(freq.mtx) - 1] > min_clu_perc, arr.ind = TRUE, useNames = TRUE)

# return only those samples that satisfies the `min_clu_perc` condition
filter_condition <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2],
function(lb, clu) paste0("label=='", lb, "' & cluster==", clu)),
collapse = " | ")
filter_condition_clu <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2],
function(lb, clu) paste0("label=='", lb, "' & cluster==", clu)),
collapse = " | ")

# compute frequency in each label
freq.mtx <- sits_cluster_frequency(data.tb, relative = TRUE, margin = 1)

# if no index selescted, return none
filter_condition <- ifelse(filter_condition != "", filter_condition, "FALSE")
# get those indexes whose labels represents more than `min_lab_perc`
index.mtx <- which(freq.mtx[1:NROW(freq.mtx) - 1,1:NCOL(freq.mtx) - 1] > min_lab_perc, arr.ind = TRUE, useNames = TRUE)

# return only those samples that satisfies the `min_lab_perc` condition
filter_condition_lab <- paste0(purrr::map2(rownames(index.mtx), index.mtx[,2],
function(lb, clu) paste0("label=='", lb, "' & cluster==", clu)),
collapse = " | ")

# if no index selected, return none
filter_condition <- ifelse(filter_condition_clu != "",
ifelse(filter_condition_lab != "", paste0("(", filter_condition_clu, ") & (", filter_condition_lab, ")"), "FALSE"),
"FALSE")

# filter result and return
result.tb <- dplyr::filter_(data.tb, filter_condition)
return (result.tb)
}

#' @title Cluster cleaner
#' @name sits_cluster_relabel
#' @title Cluster label
#' @name sits_cluster_names
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Renames the labels of SITS tibble samples according to its respective cluster.
#' This function needs as input a SITS tibble with `cluster` column.
#'
#' @param data.tb a SITS tibble with `cluster` column.
#' @param cluster_names character vector informing all cluster names. If unnamed vector is informed,
#' @return cluster_names character vector informing all cluster names. If unnamed vector is informed,
#' the index of each name will be treated as cluster code
#' @return result.tb SITS tibble with relabeled samples
#' @export
sits_cluster_relabel <- function (data.tb, cluster_names) {
sits_cluster_names <- function (data.tb) {

# is the input data the result of a cluster function?
ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_relabel: input data does not contain cluster column")
ensurer::ensure_that(data.tb, "cluster" %in% names (.), err_desc = "sits_cluster_names: input data does not contain cluster column")

# compute new clusters names
# if an unnamed vector is given
if (is.null(names(cluster_names)))
data_cluster_names.vec <- cluster_names[data.tb$cluster] %>% unlist()
# if a named vector is given
else
data_cluster_names.vec <- as.character(cluster_names[as.character(data.tb$cluster)] %>% unlist())
# compute clusters names and return
cluster_names <- sort(base::unique(data.tb$cluster))
return (cluster_names)
}

# fill not renamed entries with original value
data_cluster_names.vec <- ifelse(is.na(data_cluster_names.vec), data.tb$cluster, data_cluster_names.vec)
names(data_cluster_names.vec) <- NULL
#' @title Cluster names
#' @name `sits_cluster_names<-`
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Renames the labels of SITS tibble samples according to its respective cluster.
#' This function needs as input a SITS tibble with `cluster` column.
#'
#' @param data.tb a SITS tibble with `cluster` column.
#' @param value character vector informing all cluster names. If unnamed vector is informed,
#' the index of each name will be treated as cluster code
#' @return data.tb SITS tibble with relabeled samples
#' @export
`sits_cluster_names<-` <- function (data.tb, value) {

# is the input data the result of a cluster function?
ensurer::ensure_that(data.tb, "cluster" %in% names (.),
err_desc = "sits_cluster_names: input data does not contain cluster column")

# verify if the informed cluster names has the same length of clusters names
ensurer::ensure_that(data.tb, length(sits_cluster_names(.)) == length(value),
err_desc = "sits_cluster_name: informed names has length different of the number of clusters")

# compute new clusters names
data_cluster_names.vec <- value[data.tb$cluster] %>% unlist()

# relabel result and return
result.tb <- data.tb
result.tb$label <- data_cluster_names.vec
return (result.tb)
data.tb$label <- data_cluster_names.vec
return (data.tb)
}
21 changes: 17 additions & 4 deletions R/sits_colors.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,21 +132,34 @@ sits_color_name <- function(name = NULL){
#'
#' @description Number of colors available in brewer color schemes
#'
#' @param name name of the brewer color set
#' @param brewer name of the brewer color set
#' @return numeric number of available colors
#' @export
sits_max_colors <- function(name = NULL){
if (is.null(name))
sits_max_colors <- function(brewer = NULL){
if (is.null(brewer))
return(.sits_brewerRGB %>% purrr::map(function(sch){
sum((sch %>%
purrr::map(function(n) length(n)) %>%
as.numeric()) > 1)
}))
return(sum((.sits_brewerRGB[[sits_color_name(name)]] %>%
return(sum((.sits_brewerRGB[[sits_color_name(brewer)]] %>%
purrr::map(function(n) length(n)) %>%
as.numeric()) > 1))
}

#' @title Brewer color schemes
#' @name sits_colors
#'
#' @description Number of colors available in brewer color schemes
#'
#' @param brewer name of the brewer color set
#' @param n number of desirable colors (default 3)
#' @return list list of rgb colors
#' @export
sits_colors <- function(brewer, n = 3){
return(.sits_brewerRGB[[sits_color_name(name)]][[as.character(n)]])
}

# Brewer color set constant
# based on https://colorbrewer2.org colors' schemes
.sits_brewerRGB = tibble::lst(
Expand Down
1 change: 0 additions & 1 deletion R/sits_dendrogram.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ sits_dendrogram <- function (data.tb, bands = NULL,
distance = dist_method,
control = dtwclust::hierarchical_control(method = grouping_method), ...)


# return the clusters
return (clusters)
}
Loading