Skip to content

Commit

Permalink
closes #1158
Browse files Browse the repository at this point in the history
  • Loading branch information
gilbertocamara committed Jun 17, 2024
1 parent 63bb426 commit 87a94c3
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 66 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ S3method(.raster_yres,terra)
S3method(.reg_s2tile_convert,dem_cube)
S3method(.reg_s2tile_convert,grd_cube)
S3method(.reg_s2tile_convert,rtc_cube)
S3method(.samples_alloc_strata,class_cube)
S3method(.samples_alloc_strata,class_vector_cube)
S3method(.slice_dfr,numeric)
S3method(.source_collection_access_test,"mpc_cube_sentinel-1-grd")
S3method(.source_collection_access_test,cdse_cube)
Expand Down Expand Up @@ -198,6 +200,7 @@ S3method(.source_tile_get_bbox,stac_cube)
S3method(.tile,default)
S3method(.tile,raster_cube)
S3method(.tile_area_freq,class_cube)
S3method(.tile_area_freq,class_vector_cube)
S3method(.tile_area_freq,default)
S3method(.tile_area_freq,raster_cube)
S3method(.tile_as_sf,default)
Expand Down
13 changes: 0 additions & 13 deletions R/api_cube.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,23 +143,10 @@ NULL
#'
#' @return A \code{vector} with the areas of the cube labels.
.cube_class_areas <- function(cube) {
.check_is_class_cube(cube)
labels_cube <- sits_labels(cube)

# Get area for each class for each row of the cube
freq_lst <- slider::slide(cube, function(tile) {
# Get the frequency count and value for each labelled image
freq <- .tile_area_freq(tile)
# pixel area
# convert the area to hectares
# assumption: spatial resolution unit is meters
area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000
# Include class names
freq <- dplyr::mutate(
freq,
area = area,
class = labels_cube[as.character(freq[["value"]])]
)
return(freq)
})
# Get a tibble by binding the row (duplicated labels with different counts)
Expand Down
79 changes: 79 additions & 0 deletions R/api_samples.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,82 @@
})
})
}
#' @title Allocate points for stratified sampling for accuracy estimation
#' @name .samples_alloc_strata
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @param cube Classified data cube (raster or vector)
#' @param samples_class Matrix with sampling design to be allocated
#' @param dots Other params for the function
#' @param multicores Number of cores to work in parallel
#' @param progress Show progress bar?
#'
#' @return Points resulting from stratified sampling
#' @keywords internal
#' @noRd
.samples_alloc_strata <- function(cube,
samples_class, ...,
multicores = 2,
progress = TRUE){
UseMethod(".samples_alloc_strata", cube)
}
#' @export
.samples_alloc_strata.class_cube <- function(cube,
samples_class, ...,
multicores = 2,
progress = TRUE){
# estimate size
size <- ceiling(max(samples_class) / nrow(cube))
# get labels
labels <- names(samples_class)
n_labels <- length(labels)
# Prepare parallel processing
.parallel_start(workers = multicores)
on.exit(.parallel_stop(), add = TRUE)
# Create assets as jobs
cube_assets <- .cube_split_assets(cube)
# Process each asset in parallel
samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) {
robj <- .raster_open_rast(.tile_path(tile))
cls <- data.frame(id = 1:n_labels,
cover = labels)
levels(robj) <- cls
samples_sv <- terra::spatSample(
x = robj,
size = size,
method = "stratified",
as.points = TRUE
)
samples_sf <- sf::st_as_sf(samples_sv)
samples_sf <- dplyr::mutate(samples_sf,
label = labels[.data[["cover"]]])
samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326")
}, progress = progress)

samples <- purrr::map_dfr(labels, function(lab) {
samples_class <- samples |>
dplyr::filter(.data[["label"]] == lab) |>
dplyr::slice_sample(n = samples_class[lab])
})

return(samples)
}
#' @export
.samples_alloc_strata.class_vector_cube <- function(cube,
samples_class, ...,
multicores = 2,
progress = TRUE){

segments_cube <- slider::slide_dfr(cube, function(tile){
# Open segments and transform them to tibble
segments <- .segments_read_vec(tile)
})
# Retrieve the required number of segments per class
samples_lst <- segments_cube |>
dplyr::group_by(.data[["class"]]) |>
dplyr::group_map(function(cl, class){
samples_class <- sf::st_sample(cl, samples_class[class[["class"]]])
sf_samples <- sf::st_sf(class, geometry = samples_class)
})
samples <- dplyr::bind_rows(samples_lst)
return(samples)
}
27 changes: 27 additions & 0 deletions R/api_tile.R
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,33 @@ NULL
r_obj <- .raster_open_rast(.tile_path(tile))
# Retrieve the frequency
freq <- tibble::as_tibble(.raster_freq(r_obj))
# get labels
labels <- .tile_labels(tile)
# pixel area
# convert the area to hectares
# assumption: spatial resolution unit is meters
area <- freq[["count"]] * .tile_xres(tile) * .tile_yres(tile) / 10000
# Include class names
freq <- dplyr::mutate(
freq,
area = area,
class = labels[as.character(freq[["value"]])]
)
# Return frequencies
freq
}
#' @export
.tile_area_freq.class_vector_cube <- function(tile) {
# Open segments
segments <- .segments_read_vec(tile)
segments[["area"]] <- sf::st_area(segments)
segments <- sf::st_drop_geometry(segments)
segments <- units::drop_units(segments)
# Retrieve the area
freq <- segments |>
dplyr::group_by(class) |>
dplyr::summarise(area = sum(.data[["area"]])) |>
dplyr::select(c(dplyr::all_of("area"), dplyr::all_of("class")))
# Return frequencies
freq
}
Expand Down
4 changes: 3 additions & 1 deletion R/sits_classify.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ sits_classify.raster_cube <- function(data,
.check_filter_fn(filter_fn)
# Retrieve the samples from the model
samples <- .ml_samples(ml_model)
# Retrieve the bands from the model
bands <- .ml_bands(ml_model)
# Do the samples and tile match their timeline length?
.check_samples_tile_match_timeline(samples = samples, tile = data)
# Do the samples and tile match their bands?
Expand All @@ -263,7 +265,7 @@ sits_classify.raster_cube <- function(data,
# Check minimum memory needed to process one block
job_memsize <- .jobs_memsize(
job_size = .block_size(block = block, overlap = 0),
npaths = length(.tile_paths(data)) + length(.ml_labels(ml_model)),
npaths = length(.tile_paths(data, bands)) + length(.ml_labels(ml_model)),
nbytes = 8,
proc_bloat = proc_bloat
)
Expand Down
2 changes: 1 addition & 1 deletion R/sits_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ plot.vector_cube <- function(x, ...,
tile = x[["tile"]][[1]],
dates = NULL,
seg_color = "black",
line_width = 1,
line_width = 0.3,
palette = "RdYlGn",
rev = FALSE,
scale = 1.0,
Expand Down
71 changes: 26 additions & 45 deletions R/sits_sample_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -284,15 +284,14 @@ sits_reduce_imbalance <- function(samples,
#' sampling_design <- sits_sampling_design(label_cube, expected_ua)
#' }
#' @export
sits_sampling_design <- function(cube,
sits_sampling_design <- function(cube, ...,
expected_ua = 0.75,
std_err = 0.01,
rare_class_prop = 0.1) {
.check_set_caller("sits_sampling_design")
# check the cube is valid
.check_raster_cube_files(cube)
# check cube is class cube
.check_is_class_cube(cube)
.check_that(inherits(cube, "class_cube") ||
inherits(cube, "class_vector_cube"))
# get the labels
labels <- .cube_labels(cube)
n_labels <- length(labels)
Expand All @@ -304,19 +303,23 @@ sits_sampling_design <- function(cube,
.check_that(length(expected_ua) == n_labels)
# check names of labels
.check_that(all(labels %in% names(expected_ua)))
# adjust names to match cube labels
expected_ua <- expected_ua[labels]
# get cube class areas
class_areas <- .cube_class_areas(cube)
# check that names of class areas are contained in the labels
.check_that(all(names(class_areas) %in% labels),
msg = .conf("messages", "sits_sampling_design_labels"))
# adjust names to match cube labels
expected_ua <- expected_ua[names(class_areas)]
# calculate proportion of class areas
prop <- class_areas / sum(class_areas)
# standard deviation of the stratum
std_dev <- signif(sqrt(expected_ua * (1 - expected_ua)), 3)
# calculate sample size
sample_size <- round((sum(prop * std_dev) / std_err) ^ 2)
# determine "Equal" allocation
equal <- rep(round(sample_size / n_labels), n_labels)
names(equal) <- labels
n_classes <- length(class_areas)
equal <- rep(round(sample_size / n_classes), n_classes)
names(equal) <- names(class_areas)
# find out the classes which are rare
rare_classes <- prop[prop <= rare_class_prop]
# Determine allocation possibilities
Expand Down Expand Up @@ -418,66 +421,44 @@ sits_stratified_sampling <- function(cube,
.check_set_caller("sits_stratified_sampling")
# check the cube is valid
.check_raster_cube_files(cube)
# check cube is class cube
.check_is_class_cube(cube)
# check the cube is valid
.check_that(inherits(cube, "class_cube") ||
inherits(cube, "class_vector_cube"))
# get the labels
labels <- .cube_labels(cube)
n_labels <- length(labels)
# check number of labels
.check_that(nrow(sampling_design) == n_labels)
.check_that(nrow(sampling_design) <= n_labels)
# check names of labels
.check_that(all(rownames(sampling_design) %in% labels))
# check allocation method
.check_that(alloc %in% colnames(sampling_design),
msg = .conf("messages", "sits_sampling_design_alloc"))
msg = .conf("messages", "sits_stratified_sampling_alloc"))
# retrieve samples class
samples_class <- unlist(sampling_design[, alloc])
# check samples class
.check_int_parameter(samples_class, is_named = TRUE,
msg = .conf("messages", "sits_sampling_design_samples")
msg = .conf("messages", "sits_stratified_sampling_samples")
)
.check_int_parameter(multicores, min = 1, max = 2048)
.check_progress(progress)
# name samples class
names(samples_class) <- rownames(sampling_design)
# names(samples_class) <- rownames(sampling_design)
# include overhead
samples_class <- ceiling(samples_class * overhead)
# estimate size
size <- ceiling(max(samples_class) / nrow(cube))
# Prepare parallel processing
.parallel_start(workers = multicores)
on.exit(.parallel_stop(), add = TRUE)
# Create assets as jobs
cube_assets <- .cube_split_assets(cube)
# Process each asset in parallel
samples <- .jobs_map_parallel_dfr(cube_assets, function(tile) {
robj <- .raster_open_rast(.tile_path(tile))
cls <- data.frame(id = 1:n_labels,
cover = labels)
levels(robj) <- cls
samples_sv <- terra::spatSample(
x = robj,
size = size,
method = "stratified",
as.points = TRUE
)
samples_sf <- sf::st_as_sf(samples_sv)
samples_sf <- dplyr::mutate(samples_sf,
label = labels[.data[["cover"]]])
samples_sf <- sf::st_transform(samples_sf, crs = "EPSG:4326")
}, progress = progress)

samples <- purrr::map_dfr(labels, function(lab) {
samples_class <- samples |>
dplyr::filter(.data[["label"]] == lab) |>
dplyr::slice_sample(n = samples_class[lab])
})
# call function to allocate sample per strata
samples <- .samples_alloc_strata(
cube = cube,
samples_class = samples_class,
multicores = multicores)

if (.has(shp_file)) {
.check_that(tools::file_ext(shp_file) == "shp",
msg = .conf("messages", "sits_sampling_design_shp")
msg = .conf("messages", "sits_stratified_sampling_shp")
)
sf::st_write(samples, shp_file, append = FALSE)
message(.conf("messages", "sits_sampling_design_shp_save"), shp_file)
message(.conf("messages", "sits_stratified_sampling_shp_save"), shp_file)
}
return(samples)
}
11 changes: 6 additions & 5 deletions inst/extdata/config_messages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,8 @@ sits_reduce_imbalance_samples: "number of samples to undersample for large class
sits_resnet: "wrong input parameters - see example in documentation"
sits_rfor: "wrong input parameters - see example in documentation"
sits_sample: "invalid frac parameter - values should be btw 0.0 and 2.0"
sits_sampling_design: "expected values of user's accuracy should contain names of labels"
sits_sampling_design_alloc: "allocation method is not included in sampling design"
sits_sampling_design_samples: "number of samples per allocation method should be integer"
sits_sampling_design_shp: "invalid shapefile name"
sits_sampling_design_shp_save: "saved allocation in shapefile"
sits_sampling_design: "sampling design only runs in classified cubes"
sits_sampling_design_labels: "names of classes in cube do not match labels in expected_ua"
sits_select: "input should be a valid set of training samples or a non-classified data cube"
sits_segment: "wrong input parameters - see example in documentation"
sits_slic: "wrong input parameters - see example in documentation"
Expand All @@ -426,6 +423,10 @@ sits_som_evaluate_cluster: "wrong input data; please run sits_som_map first"
sits_som_map: "number of samples should be greater than number of neurons"
sits_som_map_grid_size: "recommended values for grid_xdim and grid_ydim are "
sits_stratified_sampling: "labels in sampling design do not match labels in cube"
sits_stratified_sampling_alloc: "allocation method is not included in sampling design"
sits_stratified_sampling_samples: "number of samples per allocation method should be integer"
sits_stratified_sampling_shp: "invalid shapefile name"
sits_stratified_sampling_shp_save: "saved allocation in shapefile"
sits_svm: "wrong input parameters - see example in documentation"
sits_tae: "wrong input parameters - see example in documentation"
sits_tempcnn: "wrong input parameters - see example in documentation"
Expand Down
2 changes: 1 addition & 1 deletion man/plot.vector_cube.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/sits_sampling_design.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 87a94c3

Please sign in to comment.