diff --git a/NAMESPACE b/NAMESPACE index 77420af6..bfdf7a25 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -129,6 +129,8 @@ S3method(.raster_yres,terra) S3method(.reg_s2tile_convert,dem_cube) S3method(.reg_s2tile_convert,grd_cube) S3method(.reg_s2tile_convert,rtc_cube) +S3method(.samples_alloc_strata,class_cube) +S3method(.samples_alloc_strata,class_vector_cube) S3method(.slice_dfr,numeric) S3method(.source_collection_access_test,"mpc_cube_sentinel-1-grd") S3method(.source_collection_access_test,cdse_cube) @@ -198,6 +200,7 @@ S3method(.source_tile_get_bbox,stac_cube) S3method(.tile,default) S3method(.tile,raster_cube) S3method(.tile_area_freq,class_cube) +S3method(.tile_area_freq,class_vector_cube) S3method(.tile_area_freq,default) S3method(.tile_area_freq,raster_cube) S3method(.tile_as_sf,default) diff --git a/R/api_cube.R b/R/api_cube.R index afa69361..840181ff 100644 --- a/R/api_cube.R +++ b/R/api_cube.R @@ -143,23 +143,10 @@ NULL #' #' @return A \code{vector} with the areas of the cube labels. .cube_class_areas <- function(cube) { - .check_is_class_cube(cube) - labels_cube <- sits_labels(cube) - # Get area for each class for each row of the cube freq_lst <- slider::slide(cube, function(tile) { # Get the frequency count and value for each labelled image freq <- .tile_area_freq(tile) - # pixel area - # convert the area to hectares - # assumption: spatial resolution unit is meters - area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000 - # Include class names - freq <- dplyr::mutate( - freq, - area = area, - class = labels_cube[as.character(freq[["value"]])] - ) return(freq) }) # Get a tibble by binding the row (duplicated labels with different counts) diff --git a/R/api_samples.R b/R/api_samples.R index 4bcaa023..69f08358 100644 --- a/R/api_samples.R +++ b/R/api_samples.R @@ -203,3 +203,82 @@ }) }) } +#' @title Allocate points for stratified sampling for accuracy estimation +#' @name .samples_alloc_strata +#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br} +#' @param cube Classified data cube (raster or vector) +#' @param samples_class Matrix with sampling design to be allocated +#' @param dots Other params for the function +#' @param multicores Number of cores to work in parallel +#' @param progress Show progress bar? +#' +#' @return Points resulting from stratified sampling +#' @keywords internal +#' @noRd +.samples_alloc_strata <- function(cube, + samples_class, ..., + multicores = 2, + progress = TRUE){ + UseMethod(".samples_alloc_strata", cube) +} +#' @export +.samples_alloc_strata.class_cube <- function(cube, + samples_class, ..., + multicores = 2, + progress = TRUE){ + # estimate size + size <- ceiling(max(samples_class) / nrow(cube)) + # get labels + labels <- names(samples_class) + n_labels <- length(labels) + # Prepare parallel processing + .parallel_start(workers = multicores) + on.exit(.parallel_stop(), add = TRUE) + # Create assets as jobs + cube_assets <- .cube_split_assets(cube) + # Process each asset in parallel + samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) { + robj <- .raster_open_rast(.tile_path(tile)) + cls <- data.frame(id = 1:n_labels, + cover = labels) + levels(robj) <- cls + samples_sv <- terra::spatSample( + x = robj, + size = size, + method = "stratified", + as.points = TRUE + ) + samples_sf <- sf::st_as_sf(samples_sv) + samples_sf <- dplyr::mutate(samples_sf, + label = labels[.data[["cover"]]]) + samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326") + }, progress = progress) + + samples <- purrr::map_dfr(labels, function(lab) { + samples_class <- samples |> + dplyr::filter(.data[["label"]] == lab) |> + dplyr::slice_sample(n = samples_class[lab]) + }) + + return(samples) +} +#' @export +.samples_alloc_strata.class_vector_cube <- function(cube, + samples_class, ..., + multicores = 2, + progress = TRUE){ + + segments_cube <- slider::slide_dfr(cube, function(tile){ + # Open segments and transform them to tibble + segments <- .segments_read_vec(tile) + }) + # Retrieve the required number of segments per class + samples_lst <- segments_cube |> + dplyr::group_by(.data[["class"]]) |> + dplyr::group_map(function(cl, class){ + samples_class <- sf::st_sample(cl, samples_class[class[["class"]]]) + sf_samples <- sf::st_sf(class, geometry = samples_class) + }) + samples <- dplyr::bind_rows(samples_lst) + return(samples) +} diff --git a/R/api_tile.R b/R/api_tile.R index b653f8e4..17c604cd 100644 --- a/R/api_tile.R +++ b/R/api_tile.R @@ -1314,6 +1314,33 @@ NULL r_obj <- .raster_open_rast(.tile_path(tile)) # Retrieve the frequency freq <- tibble::as_tibble(.raster_freq(r_obj)) + # get labels + labels <- .tile_labels(tile) + # pixel area + # convert the area to hectares + # assumption: spatial resolution unit is meters + area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000 + # Include class names + freq <- dplyr::mutate( + freq, + area = area, + class = labels[as.character(freq[["value"]])] + ) + # Return frequencies + freq +} +#' @export +.tile_area_freq.class_vector_cube <- function(tile) { + # Open segments + segments <- .segments_read_vec(tile) + segments[["area"]] <- sf::st_area(segments) + segments <- sf::st_drop_geometry(segments) + segments <- units::drop_units(segments) + # Retrieve the area + freq <- segments |> + dplyr::group_by(class) |> + dplyr::summarise(area = sum(.data[["area"]])) |> + dplyr::select(c(dplyr::all_of("area"), dplyr::all_of("class"))) # Return frequencies freq } diff --git a/R/sits_classify.R b/R/sits_classify.R index 9a2c05fb..e472f3b1 100644 --- a/R/sits_classify.R +++ b/R/sits_classify.R @@ -253,6 +253,8 @@ sits_classify.raster_cube <- function(data, .check_filter_fn(filter_fn) # Retrieve the samples from the model samples <- .ml_samples(ml_model) + # Retrieve the bands from the model + bands <- .ml_bands(ml_model) # Do the samples and tile match their timeline length? .check_samples_tile_match_timeline(samples = samples, tile = data) # Do the samples and tile match their bands? @@ -263,7 +265,7 @@ sits_classify.raster_cube <- function(data, # Check minimum memory needed to process one block job_memsize <- .jobs_memsize( job_size = .block_size(block = block, overlap = 0), - npaths = length(.tile_paths(data)) + length(.ml_labels(ml_model)), + npaths = length(.tile_paths(data, bands)) + length(.ml_labels(ml_model)), nbytes = 8, proc_bloat = proc_bloat ) diff --git a/R/sits_plot.R b/R/sits_plot.R index c8cb8fec..c86f6d12 100644 --- a/R/sits_plot.R +++ b/R/sits_plot.R @@ -524,7 +524,7 @@ plot.vector_cube <- function(x, ..., tile = x[["tile"]][[1]], dates = NULL, seg_color = "black", - line_width = 1, + line_width = 0.3, palette = "RdYlGn", rev = FALSE, scale = 1.0, diff --git a/R/sits_sample_functions.R b/R/sits_sample_functions.R index 7c499932..57bb00c1 100644 --- a/R/sits_sample_functions.R +++ b/R/sits_sample_functions.R @@ -284,15 +284,14 @@ sits_reduce_imbalance <- function(samples, #' sampling_design <- sits_sampling_design(label_cube, expected_ua) #' } #' @export -sits_sampling_design <- function(cube, +sits_sampling_design <- function(cube, ..., expected_ua = 0.75, std_err = 0.01, rare_class_prop = 0.1) { .check_set_caller("sits_sampling_design") # check the cube is valid - .check_raster_cube_files(cube) - # check cube is class cube - .check_is_class_cube(cube) + .check_that(inherits(cube, "class_cube") || + inherits(cube, "class_vector_cube")) # get the labels labels <- .cube_labels(cube) n_labels <- length(labels) @@ -304,10 +303,13 @@ sits_sampling_design <- function(cube, .check_that(length(expected_ua) == n_labels) # check names of labels .check_that(all(labels %in% names(expected_ua))) - # adjust names to match cube labels - expected_ua <- expected_ua[labels] # get cube class areas class_areas <- .cube_class_areas(cube) + # check that names of class areas are contained in the labels + .check_that(all(names(class_areas) %in% labels), + msg = .conf("messages", "sits_sampling_design_labels")) + # adjust names to match cube labels + expected_ua <- expected_ua[names(class_areas)] # calculate proportion of class areas prop <- class_areas / sum(class_areas) # standard deviation of the stratum @@ -315,8 +317,9 @@ sits_sampling_design <- function(cube, # calculate sample size sample_size <- round((sum(prop * std_dev) / std_err) ^ 2) # determine "Equal" allocation - equal <- rep(round(sample_size / n_labels), n_labels) - names(equal) <- labels + n_classes <- length(class_areas) + equal <- rep(round(sample_size / n_classes), n_classes) + names(equal) <- names(class_areas) # find out the classes which are rare rare_classes <- prop[prop <= rare_class_prop] # Determine allocation possibilities @@ -418,66 +421,44 @@ sits_stratified_sampling <- function(cube, .check_set_caller("sits_stratified_sampling") # check the cube is valid .check_raster_cube_files(cube) - # check cube is class cube - .check_is_class_cube(cube) + # check the cube is valid + .check_that(inherits(cube, "class_cube") || + inherits(cube, "class_vector_cube")) # get the labels labels <- .cube_labels(cube) n_labels <- length(labels) # check number of labels - .check_that(nrow(sampling_design) == n_labels) + .check_that(nrow(sampling_design) <= n_labels) # check names of labels .check_that(all(rownames(sampling_design) %in% labels)) # check allocation method .check_that(alloc %in% colnames(sampling_design), - msg = .conf("messages", "sits_sampling_design_alloc")) + msg = .conf("messages", "sits_stratified_sampling_alloc")) # retrieve samples class samples_class <- unlist(sampling_design[, alloc]) # check samples class .check_int_parameter(samples_class, is_named = TRUE, - msg = .conf("messages", "sits_sampling_design_samples") + msg = .conf("messages", "sits_stratified_sampling_samples") ) .check_int_parameter(multicores, min = 1, max = 2048) .check_progress(progress) # name samples class - names(samples_class) <- rownames(sampling_design) + # names(samples_class) <- rownames(sampling_design) # include overhead samples_class <- ceiling(samples_class * overhead) - # estimate size - size <- ceiling(max(samples_class) / nrow(cube)) - # Prepare parallel processing - .parallel_start(workers = multicores) - on.exit(.parallel_stop(), add = TRUE) - # Create assets as jobs - cube_assets <- .cube_split_assets(cube) - # Process each asset in parallel - samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) { - robj <- .raster_open_rast(.tile_path(tile)) - cls <- data.frame(id = 1:n_labels, - cover = labels) - levels(robj) <- cls - samples_sv <- terra::spatSample( - x = robj, - size = size, - method = "stratified", - as.points = TRUE - ) - samples_sf <- sf::st_as_sf(samples_sv) - samples_sf <- dplyr::mutate(samples_sf, - label = labels[.data[["cover"]]]) - samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326") - }, progress = progress) - samples <- purrr::map_dfr(labels, function(lab) { - samples_class <- samples |> - dplyr::filter(.data[["label"]] == lab) |> - dplyr::slice_sample(n = samples_class[lab]) - }) + # call function to allocate sample per strata + samples <- .samples_alloc_strata( + cube = cube, + samples_class = samples_class, + multicores = multicores) + if (.has(shp_file)) { .check_that(tools::file_ext(shp_file) == "shp", - msg = .conf("messages", "sits_sampling_design_shp") + msg = .conf("messages", "sits_stratified_sampling_shp") ) sf::st_write(samples, shp_file, append = FALSE) - message(.conf("messages", "sits_sampling_design_shp_save"), shp_file) + message(.conf("messages", "sits_stratified_sampling_shp_save"), shp_file) } return(samples) } diff --git a/inst/extdata/config_messages.yml b/inst/extdata/config_messages.yml index 3b64cc5b..c7c1c360 100644 --- a/inst/extdata/config_messages.yml +++ b/inst/extdata/config_messages.yml @@ -410,11 +410,8 @@ sits_reduce_imbalance_samples: "number of samples to undersample for large class sits_resnet: "wrong input parameters - see example in documentation" sits_rfor: "wrong input parameters - see example in documentation" sits_sample: "invalid frac parameter - values should be btw 0.0 and 2.0" -sits_sampling_design: "expected values of user's accuracy should contain names of labels" -sits_sampling_design_alloc: "allocation method is not included in sampling design" -sits_sampling_design_samples: "number of samples per allocation method should be integer" -sits_sampling_design_shp: "invalid shapefile name" -sits_sampling_design_shp_save: "saved allocation in shapefile" +sits_sampling_design: "sampling design only runs in classified cubes" +sits_sampling_design_labels: "names of classes in cube do not match labels in expected_ua" sits_select: "input should be a valid set of training samples or a non-classified data cube" sits_segment: "wrong input parameters - see example in documentation" sits_slic: "wrong input parameters - see example in documentation" @@ -426,6 +423,10 @@ sits_som_evaluate_cluster: "wrong input data; please run sits_som_map first" sits_som_map: "number of samples should be greater than number of neurons" sits_som_map_grid_size: "recommended values for grid_xdim and grid_ydim are " sits_stratified_sampling: "labels in sampling design do not match labels in cube" +sits_stratified_sampling_alloc: "allocation method is not included in sampling design" +sits_stratified_sampling_samples: "number of samples per allocation method should be integer" +sits_stratified_sampling_shp: "invalid shapefile name" +sits_stratified_sampling_shp_save: "saved allocation in shapefile" sits_svm: "wrong input parameters - see example in documentation" sits_tae: "wrong input parameters - see example in documentation" sits_tempcnn: "wrong input parameters - see example in documentation" diff --git a/man/plot.vector_cube.Rd b/man/plot.vector_cube.Rd index c57d1e82..b4dd3e3c 100644 --- a/man/plot.vector_cube.Rd +++ b/man/plot.vector_cube.Rd @@ -14,7 +14,7 @@ tile = x[["tile"]][[1]], dates = NULL, seg_color = "black", - line_width = 1, + line_width = 0.3, palette = "RdYlGn", rev = FALSE, scale = 1, diff --git a/man/sits_sampling_design.Rd b/man/sits_sampling_design.Rd index f04fc4c7..ad56310b 100644 --- a/man/sits_sampling_design.Rd +++ b/man/sits_sampling_design.Rd @@ -6,6 +6,7 @@ \usage{ sits_sampling_design( cube, + ..., expected_ua = 0.75, std_err = 0.01, rare_class_prop = 0.1