closes #1158

e-sensing · Jun 17, 2024 · 87a94c3 · 87a94c3
1 parent 63bb426
commit 87a94c3
Show file tree

Hide file tree

Showing 10 changed files with 147 additions and 66 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -129,6 +129,8 @@ S3method(.raster_yres,terra)
 S3method(.reg_s2tile_convert,dem_cube)
 S3method(.reg_s2tile_convert,grd_cube)
 S3method(.reg_s2tile_convert,rtc_cube)
+S3method(.samples_alloc_strata,class_cube)
+S3method(.samples_alloc_strata,class_vector_cube)
 S3method(.slice_dfr,numeric)
 S3method(.source_collection_access_test,"mpc_cube_sentinel-1-grd")
 S3method(.source_collection_access_test,cdse_cube)
@@ -198,6 +200,7 @@ S3method(.source_tile_get_bbox,stac_cube)
 S3method(.tile,default)
 S3method(.tile,raster_cube)
 S3method(.tile_area_freq,class_cube)
+S3method(.tile_area_freq,class_vector_cube)
 S3method(.tile_area_freq,default)
 S3method(.tile_area_freq,raster_cube)
 S3method(.tile_as_sf,default)

diff --git a/R/api_cube.R b/R/api_cube.R
@@ -143,23 +143,10 @@ NULL
 #'
 #' @return A \code{vector} with the areas of the cube labels.
 .cube_class_areas <- function(cube) {
- .check_is_class_cube(cube)
- labels_cube <- sits_labels(cube)
-
  # Get area for each class for each row of the cube
  freq_lst <- slider::slide(cube, function(tile) {
  # Get the frequency count and value for each labelled image
  freq <- .tile_area_freq(tile)
- # pixel area
- # convert the area to hectares
- # assumption: spatial resolution unit is meters
- area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000
- # Include class names
- freq <- dplyr::mutate(
- freq,
- area = area,
- class = labels_cube[as.character(freq[["value"]])]
- )
  return(freq)
  })
  # Get a tibble by binding the row (duplicated labels with different counts)

diff --git a/R/api_samples.R b/R/api_samples.R
@@ -203,3 +203,82 @@
  })
  })
 }
+#' @title Allocate points for stratified sampling for accuracy estimation
+#' @name .samples_alloc_strata
+#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
+#' @param cube Classified data cube (raster or vector)
+#' @param samples_class Matrix with sampling design to be allocated
+#' @param dots Other params for the function
+#' @param multicores Number of cores to work in parallel
+#' @param progress Show progress bar?
+#'
+#' @return Points resulting from stratified sampling
+#' @keywords internal
+#' @noRd
+.samples_alloc_strata <- function(cube,
+ samples_class, ...,
+ multicores = 2,
+ progress = TRUE){
+ UseMethod(".samples_alloc_strata", cube)
+}
+#' @export
+.samples_alloc_strata.class_cube <- function(cube,
+ samples_class, ...,
+ multicores = 2,
+ progress = TRUE){
+ # estimate size
+ size <- ceiling(max(samples_class) / nrow(cube))
+ # get labels
+ labels <- names(samples_class)
+ n_labels <- length(labels)
+ # Prepare parallel processing
+ .parallel_start(workers = multicores)
+ on.exit(.parallel_stop(), add = TRUE)
+ # Create assets as jobs
+ cube_assets <- .cube_split_assets(cube)
+ # Process each asset in parallel
+ samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) {
+ robj <- .raster_open_rast(.tile_path(tile))
+ cls <- data.frame(id = 1:n_labels,
+ cover = labels)
+ levels(robj) <- cls
+ samples_sv <- terra::spatSample(
+ x = robj,
+ size = size,
+ method = "stratified",
+ as.points = TRUE
+ )
+ samples_sf <- sf::st_as_sf(samples_sv)
+ samples_sf <- dplyr::mutate(samples_sf,
+ label = labels[.data[["cover"]]])
+ samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326")
+ }, progress = progress)
+
+ samples <- purrr::map_dfr(labels, function(lab) {
+ samples_class <- samples |>
+ dplyr::filter(.data[["label"]] == lab) |>
+ dplyr::slice_sample(n = samples_class[lab])
+ })
+
+ return(samples)
+}
+#' @export
+.samples_alloc_strata.class_vector_cube <- function(cube,
+ samples_class, ...,
+ multicores = 2,
+ progress = TRUE){
+
+ segments_cube <- slider::slide_dfr(cube, function(tile){
+ # Open segments and transform them to tibble
+ segments <- .segments_read_vec(tile)
+ })
+ # Retrieve the required number of segments per class
+ samples_lst <- segments_cube |>
+ dplyr::group_by(.data[["class"]]) |>
+ dplyr::group_map(function(cl, class){
+ samples_class <- sf::st_sample(cl, samples_class[class[["class"]]])
+ sf_samples <- sf::st_sf(class, geometry = samples_class)
+ })
+ samples <- dplyr::bind_rows(samples_lst)
+ return(samples)
+}
diff --git a/R/api_tile.R b/R/api_tile.R
@@ -1314,6 +1314,33 @@ NULL
  r_obj <- .raster_open_rast(.tile_path(tile))
  # Retrieve the frequency
  freq <- tibble::as_tibble(.raster_freq(r_obj))
+ # get labels
+ labels <- .tile_labels(tile)
+ # pixel area
+ # convert the area to hectares
+ # assumption: spatial resolution unit is meters
+ area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000
+ # Include class names
+ freq <- dplyr::mutate(
+ freq,
+ area = area,
+ class = labels[as.character(freq[["value"]])]
+ )
+ # Return frequencies
+ freq
+}
+#' @export
+.tile_area_freq.class_vector_cube <- function(tile) {
+ # Open segments
+ segments <- .segments_read_vec(tile)
+ segments[["area"]] <- sf::st_area(segments)
+ segments <- sf::st_drop_geometry(segments)
+ segments <- units::drop_units(segments)
+ # Retrieve the area
+ freq <- segments |>
+ dplyr::group_by(class) |>
+ dplyr::summarise(area = sum(.data[["area"]])) |>
+ dplyr::select(c(dplyr::all_of("area"), dplyr::all_of("class")))
  # Return frequencies
  freq
 }

diff --git a/R/sits_classify.R b/R/sits_classify.R
@@ -253,6 +253,8 @@ sits_classify.raster_cube <- function(data,
  .check_filter_fn(filter_fn)
  # Retrieve the samples from the model
  samples <- .ml_samples(ml_model)
+ # Retrieve the bands from the model
+ bands <- .ml_bands(ml_model)
  # Do the samples and tile match their timeline length?
  .check_samples_tile_match_timeline(samples = samples, tile = data)
  # Do the samples and tile match their bands?
@@ -263,7 +265,7 @@ sits_classify.raster_cube <- function(data,
  # Check minimum memory needed to process one block
  job_memsize <- .jobs_memsize(
  job_size = .block_size(block = block, overlap = 0),
- npaths = length(.tile_paths(data)) + length(.ml_labels(ml_model)),
+ npaths = length(.tile_paths(data, bands)) + length(.ml_labels(ml_model)),
  nbytes = 8,
  proc_bloat = proc_bloat
  )

diff --git a/R/sits_plot.R b/R/sits_plot.R
@@ -524,7 +524,7 @@ plot.vector_cube <- function(x, ...,
  tile = x[["tile"]][[1]],
  dates = NULL,
  seg_color = "black",
- line_width = 1,
+ line_width = 0.3,
  palette = "RdYlGn",
  rev = FALSE,
  scale = 1.0,

diff --git a/R/sits_sample_functions.R b/R/sits_sample_functions.R
@@ -284,15 +284,14 @@ sits_reduce_imbalance <- function(samples,
 #' sampling_design <- sits_sampling_design(label_cube, expected_ua)
 #' }
 #' @export
-sits_sampling_design <- function(cube,
+sits_sampling_design <- function(cube, ...,
  expected_ua = 0.75,
  std_err = 0.01,
  rare_class_prop = 0.1) {
  .check_set_caller("sits_sampling_design")
  # check the cube is valid
- .check_raster_cube_files(cube)
- # check cube is class cube
- .check_is_class_cube(cube)
+ .check_that(inherits(cube, "class_cube") ||
+ inherits(cube, "class_vector_cube"))
  # get the labels
  labels <- .cube_labels(cube)
  n_labels <- length(labels)
@@ -304,19 +303,23 @@ sits_sampling_design <- function(cube,
  .check_that(length(expected_ua) == n_labels)
  # check names of labels
  .check_that(all(labels %in% names(expected_ua)))
- # adjust names to match cube labels
- expected_ua <- expected_ua[labels]
  # get cube class areas
  class_areas <- .cube_class_areas(cube)
+ # check that names of class areas are contained in the labels
+ .check_that(all(names(class_areas) %in% labels),
+ msg = .conf("messages", "sits_sampling_design_labels"))
+ # adjust names to match cube labels
+ expected_ua <- expected_ua[names(class_areas)]
  # calculate proportion of class areas
  prop <- class_areas / sum(class_areas)
  # standard deviation of the stratum
  std_dev <- signif(sqrt(expected_ua * (1 - expected_ua)), 3)
  # calculate sample size
  sample_size <- round((sum(prop * std_dev) / std_err) ^ 2)
  # determine "Equal" allocation
- equal <- rep(round(sample_size / n_labels), n_labels)
- names(equal) <- labels
+ n_classes <- length(class_areas)
+ equal <- rep(round(sample_size / n_classes), n_classes)
+ names(equal) <- names(class_areas)
  # find out the classes which are rare
  rare_classes <- prop[prop <= rare_class_prop]
  # Determine allocation possibilities
@@ -418,66 +421,44 @@ sits_stratified_sampling <- function(cube,
  .check_set_caller("sits_stratified_sampling")
  # check the cube is valid
  .check_raster_cube_files(cube)
- # check cube is class cube
- .check_is_class_cube(cube)
+ # check the cube is valid
+ .check_that(inherits(cube, "class_cube") ||
+ inherits(cube, "class_vector_cube"))
  # get the labels
  labels <- .cube_labels(cube)
  n_labels <- length(labels)
  # check number of labels
- .check_that(nrow(sampling_design) == n_labels)
+ .check_that(nrow(sampling_design) <= n_labels)
  # check names of labels
  .check_that(all(rownames(sampling_design) %in% labels))
  # check allocation method
  .check_that(alloc %in% colnames(sampling_design),
- msg = .conf("messages", "sits_sampling_design_alloc"))
+ msg = .conf("messages", "sits_stratified_sampling_alloc"))
  # retrieve samples class
  samples_class <- unlist(sampling_design[, alloc])
  # check samples class
  .check_int_parameter(samples_class, is_named = TRUE,
- msg = .conf("messages", "sits_sampling_design_samples")
+ msg = .conf("messages", "sits_stratified_sampling_samples")
  )
  .check_int_parameter(multicores, min = 1, max = 2048)
  .check_progress(progress)
  # name samples class
- names(samples_class) <- rownames(sampling_design)
+ # names(samples_class) <- rownames(sampling_design)
  # include overhead
  samples_class <- ceiling(samples_class * overhead)
- # estimate size
- size <- ceiling(max(samples_class) / nrow(cube))
- # Prepare parallel processing
- .parallel_start(workers = multicores)
- on.exit(.parallel_stop(), add = TRUE)
- # Create assets as jobs
- cube_assets <- .cube_split_assets(cube)
- # Process each asset in parallel
- samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) {
- robj <- .raster_open_rast(.tile_path(tile))
- cls <- data.frame(id = 1:n_labels,
- cover = labels)
- levels(robj) <- cls
- samples_sv <- terra::spatSample(
- x = robj,
- size = size,
- method = "stratified",
- as.points = TRUE
- )
- samples_sf <- sf::st_as_sf(samples_sv)
- samples_sf <- dplyr::mutate(samples_sf,
- label = labels[.data[["cover"]]])
- samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326")
- }, progress = progress)
 
- samples <- purrr::map_dfr(labels, function(lab) {
- samples_class <- samples |>
- dplyr::filter(.data[["label"]] == lab) |>
- dplyr::slice_sample(n = samples_class[lab])
- })
+ # call function to allocate sample per strata
+ samples <- .samples_alloc_strata(
+ cube = cube,
+ samples_class = samples_class,
+ multicores = multicores)
+
  if (.has(shp_file)) {
  .check_that(tools::file_ext(shp_file) == "shp",
- msg = .conf("messages", "sits_sampling_design_shp")
+ msg = .conf("messages", "sits_stratified_sampling_shp")
  )
  sf::st_write(samples, shp_file, append = FALSE)
- message(.conf("messages", "sits_sampling_design_shp_save"), shp_file)
+ message(.conf("messages", "sits_stratified_sampling_shp_save"), shp_file)
  }
  return(samples)
 }
diff --git a/inst/extdata/config_messages.yml b/inst/extdata/config_messages.yml
@@ -410,11 +410,8 @@ sits_reduce_imbalance_samples: "number of samples to undersample for large class
 sits_resnet: "wrong input parameters - see example in documentation"
 sits_rfor: "wrong input parameters - see example in documentation"
 sits_sample: "invalid frac parameter - values should be btw 0.0 and 2.0"
-sits_sampling_design: "expected values of user's accuracy should contain names of labels"
-sits_sampling_design_alloc: "allocation method is not included in sampling design"
-sits_sampling_design_samples: "number of samples per allocation method should be integer"
-sits_sampling_design_shp: "invalid shapefile name"
-sits_sampling_design_shp_save: "saved allocation in shapefile"
+sits_sampling_design: "sampling design only runs in classified cubes"
+sits_sampling_design_labels: "names of classes in cube do not match labels in expected_ua"
 sits_select: "input should be a valid set of training samples or a non-classified data cube"
 sits_segment: "wrong input parameters - see example in documentation"
 sits_slic: "wrong input parameters - see example in documentation"
@@ -426,6 +423,10 @@ sits_som_evaluate_cluster: "wrong input data; please run sits_som_map first"
 sits_som_map: "number of samples should be greater than number of neurons"
 sits_som_map_grid_size: "recommended values for grid_xdim and grid_ydim are "
 sits_stratified_sampling: "labels in sampling design do not match labels in cube"
+sits_stratified_sampling_alloc: "allocation method is not included in sampling design"
+sits_stratified_sampling_samples: "number of samples per allocation method should be integer"
+sits_stratified_sampling_shp: "invalid shapefile name"
+sits_stratified_sampling_shp_save: "saved allocation in shapefile"
 sits_svm: "wrong input parameters - see example in documentation"
 sits_tae: "wrong input parameters - see example in documentation"
 sits_tempcnn: "wrong input parameters - see example in documentation"

diff --git a/man/plot.vector_cube.Rd b/man/plot.vector_cube.Rd
diff --git a/man/sits_sampling_design.Rd b/man/sits_sampling_design.Rd