Merge pull request #1130 from M3nin0/feature/detections-api-raster

detections api: include raster cube support
e-sensing · May 18, 2024 · 61af7d9 · 61af7d9
2 parents 3a5aa79 + ee6ecb4
commit 61af7d9
Show file tree

Hide file tree

Showing 12 changed files with 575 additions and 122 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -148,6 +148,7 @@ Collate:
  'api_mosaic.R'
  'api_opensearch.R'
  'api_parallel.R'
+ 'api_patterns.R'
  'api_period.R'
  'api_plot_time_series.R'
  'api_plot_raster.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -339,6 +339,7 @@ S3method(sits_cube,local_cube)
 S3method(sits_cube,sar_cube)
 S3method(sits_cube,stac_cube)
 S3method(sits_detect_change,default)
+S3method(sits_detect_change,raster_cube)
 S3method(sits_detect_change,sits)
 S3method(sits_get_data,csv)
 S3method(sits_get_data,data.frame)

diff --git a/R/api_detect_changes.R b/R/api_detect_changes.R
@@ -25,20 +25,211 @@
  # Divide samples in chunks to parallel processing
  parts <- .pred_create_partition(pred = samples, partitions = multicores)
  # Detect changes!
- detections <- .jobs_map_parallel_dfr(parts, function(part) {
+ .jobs_map_parallel_dfr(parts, function(part) {
  # Get samples
  values <- .pred_part(part)
  # Detect changes! For detection, models can be time-aware. So, the
  # complete data, including dates, must be passed as argument.
- detections <- cd_method(values[["time_series"]])
+ detections <- cd_method(values[["time_series"]], "ts")
  detections <- tibble::tibble(detections)
  # Prepare result
  result <- tibble::tibble(data.frame(values, detections = detections))
  class(result) <- class(values)
  # return
  result
-
  }, progress = progress)
+}
 
- return(detections)
+#' @title Detect changes from a chunk of raster data using multicores
+#' @name .detect_change_tile
+#' @keywords internal
+#' @noRd
+#' @param tile Single tile of a data cube.
+#' @param band Band to be produced.
+#' @param cd_method Change Detection Model.
+#' @param block Optimized block to be read into memory.
+#' @param roi Region of interest.
+#' @param filter_fn Smoothing filter function to be applied to the data.
+#' @param impute_fn Imputation function.
+#' @param output_dir Output directory.
+#' @param version Version of result.
+#' @param verbose Print processing information?
+#' @param progress Show progress bar?
+#' @return List of the classified raster layers.
+.detect_change_tile <- function(tile,
+ band,
+ cd_method,
+ block,
+ roi,
+ filter_fn,
+ impute_fn,
+ output_dir,
+ version,
+ verbose,
+ progress) {
+ # Output file
+ out_file <- .file_derived_name(
+ tile = tile,
+ band = band,
+ version = version,
+ output_dir = output_dir
+ )
+ # Resume feature
+ if (file.exists(out_file)) {
+ if (.check_messages()) {
+ .check_recovery(out_file)
+ }
+ detected_changes_tile <- .tile_derived_from_file(
+ file = out_file,
+ band = band,
+ base_tile = tile,
+ labels = .ml_labels_code(cd_method),
+ derived_class = "detections_cube",
+ update_bbox = TRUE
+ )
+ return(detected_changes_tile)
+ }
+ # Show initial time for tile classification
+ tile_start_time <- .tile_classif_start(
+ tile = tile,
+ verbose = verbose
+ )
+ # Tile timeline
+ tile_timeline <- .tile_timeline(tile)
+ # Create chunks as jobs
+ chunks <- .tile_chunks_create(
+ tile = tile,
+ overlap = 0,
+ block = block
+ )
+ # By default, update_bbox is FALSE
+ update_bbox <- FALSE
+ if (.has(roi)) {
+ # How many chunks there are in tile?
+ nchunks <- nrow(chunks)
+ # Intersecting chunks with ROI
+ chunks <- .chunks_filter_spatial(
+ chunks = chunks,
+ roi = roi
+ )
+ # Should bbox of resulting tile be updated?
+ update_bbox <- nrow(chunks) != nchunks
+ }
+ # Process jobs in parallel
+ block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
+ # Job block
+ block <- .block(chunk)
+ # Block file name
+ block_file <- .file_block_name(
+ pattern = .file_pattern(out_file),
+ block = block,
+ output_dir = output_dir
+ )
+ # Resume processing in case of failure
+ if (.raster_is_valid(block_file)) {
+ return(block_file)
+ }
+ # Read and preprocess values
+ values <- .classify_data_read(
+ tile = tile,
+ block = block,
+ bands = .ml_bands(cd_method),
+ ml_model = cd_method,
+ impute_fn = impute_fn,
+ filter_fn = filter_fn
+ )
+ # Get mask of NA pixels
+ na_mask <- C_mask_na(values)
+ # Fill with zeros remaining NA pixels
+ values <- C_fill_na(values, 0)
+ # Used to check values (below)
+ n_input_pixels <- nrow(values)
+ # Include names in cube predictors
+ colnames(values) <- .pred_features_name(
+ .ml_bands(cd_method), tile_timeline
+ )
+ # Prepare values
+ values <- .pred_as_ts(values, .ml_bands(cd_method), tile_timeline) |>
+ tidyr::nest(.by = "sample_id", .key = "time_series")
+ # Log here
+ .debug_log(
+ event = "start_block_data_detection",
+ key = "model",
+ value = .ml_class(cd_method)
+ )
+ # Detect changes!
+ values <- cd_method(values[["time_series"]], "cube") |>
+ dplyr::as_tibble()
+ # Are the results consistent with the data input?
+ .check_processed_values(
+ values = values,
+ n_input_pixels = n_input_pixels
+ )
+ # Log here
+ .debug_log(
+ event = "end_block_data_detection",
+ key = "model",
+ value = .ml_class(cd_method)
+ )
+ # Prepare probability to be saved
+ band_conf <- .conf_derived_band(
+ derived_class = "detections_cube",
+ band = band
+ )
+ offset <- .offset(band_conf)
+ if (.has(offset) && offset != 0) {
+ values <- values - offset
+ }
+ scale <- .scale(band_conf)
+ if (.has(scale) && scale != 1) {
+ values <- values / scale
+ }
+ # Mask NA pixels with same probabilities for all classes
+ values[na_mask, ] <- 0 # event detection = 1, no event = 0
+ # Log
+ .debug_log(
+ event = "start_block_data_save",
+ key = "file",
+ value = block_file
+ )
+ # Prepare and save results as raster
+ .raster_write_block(
+ files = block_file,
+ block = block,
+ bbox = .bbox(chunk),
+ values = values,
+ data_type = .data_type(band_conf),
+ missing_value = .miss_value(band_conf),
+ crop_block = NULL
+ )
+ # Log
+ .debug_log(
+ event = "end_block_data_save",
+ key = "file",
+ value = block_file
+ )
+ # Free memory
+ gc()
+ # Returned block file
+ block_file
+ }, progress = progress)
+ # Merge blocks into a new detections_cube tile
+ detections_tile <- .tile_derived_merge_blocks(
+ file = out_file,
+ band = band,
+ labels = .ml_labels_code(cd_method),
+ base_tile = tile,
+ block_files = block_files,
+ derived_class = "detections_cube",
+ multicores = .jobs_multicores(),
+ update_bbox = update_bbox
+ )
+ # show final time for detection
+ .tile_classif_end(
+ tile = tile,
+ start_time = tile_start_time,
+ verbose = verbose
+ )
+ # Return detections tile
+ detections_tile
 }
diff --git a/R/api_dtw.R b/R/api_dtw.R
@@ -1,38 +1,37 @@
-
-#' @title Extract temporal pattern of samples using temporal median.
-#' @name .pattern_temporal_median
+# ---- Distances ----
+#' @title Calculate the DTW distance between temporal patterns and time-series.
+#' @name .dtw_distance
+#' @description This function calculates the DTW distance between label patterns
+#' and real data (e.g., sample data, data cube data). The distance is calculated
+#' without a window. It's use is recommended for big datasets.
 #' @keywords internal
 #' @noRd
-.pattern_temporal_median <- function(samples) {
- samples |>
-  dplyr::group_by(.data[["label"]]) |>
-  dplyr::group_map(function(data, name) {
-  ts_median <- dplyr::bind_rows(data[["time_series"]]) |>
-  dplyr::group_by(.data[["Index"]]) |>
-  dplyr::summarize(dplyr::across(dplyr::everything(),
-  stats::median, na.rm = TRUE)) |>
-  dplyr::select(-.data[["Index"]])
-
- ts_median["label"] <- name
-  ts_median
-  })
+.dtw_distance <- function(data, patterns) {
+ # Prepare input data
+ data <- as.matrix(.ts_values(data))
+ # Calculate the DTW distance between `data` and `patterns`
+ purrr::map_dfc(patterns, function(pattern) {
+ # Prepare pattern data
+ pattern_ts <- as.matrix(.ts_values(pattern))
+ # Calculate distance
+ stats::setNames(
+ data.frame(distance = dtw_distance(data, pattern_ts)),
+ pattern[["label"]][[1]]
+ )
+ })
 }
-
-#' @title Calculate the DTW distance between label patterns and sample data.
-#' @name .pattern_distance_dtw
+#' @title Calculate the DTW distance between temporal patterns and time-series.
+#' @name .dtw_distance_windowed
 #' @description This function calculates the DTW distance between label patterns
-#' and sample data in a given temporal window.
+#' and real data (e.g., sample data, data cube data). The distance is calculated
+#' using windows.
 #' @keywords internal
 #' @noRd
-.pattern_distance_dtw <- function(data, patterns, windows) {
+.dtw_distance_windowed <- function(data, patterns, windows) {
  # Calculate the DTW distance between `data` and `patterns`
- purrr::map_dfc(1:length(patterns), function(pattern_index) {
- # Get pattern metadata
- pattern <- patterns[pattern_index][[1]]
- pattern_label <- unique(pattern[["label"]])
+ purrr::map_dfc(patterns, function(pattern) {
  # Get pattern data
- pattern_ts <- dplyr::select(pattern, -.data[["label"]])
- pattern_ts <- as.matrix(pattern_ts)
+ pattern_ts <- as.matrix(.ts_values(pattern))
  # Windowed search
  distances <- purrr::map_df(windows, function(window) {
  # Get time-series in the window
@@ -41,16 +40,89 @@
  .data[["Index"]] >= window[["start"]],
  .data[["Index"]] <= window[["end"]])
  # Remove the time reference column
- data_in_window <- dplyr::select(data_in_window, -.data[["Index"]])
- # Transform values in matrix (as expected in the cpp code)
- data_in_window <- as.matrix(data_in_window)
- data_in_window <- data_in_window
+ data_in_window <- as.matrix(.ts_values(data_in_window))
  # Calculate distance
- distance_from_pattern <- dtw_distance(data_in_window, pattern_ts)
- # Prepare result and return it
- data.frame(distance = distance_from_pattern)
+ data.frame(distance = dtw_distance(data_in_window, pattern_ts))
  })
  # Associate the pattern name with the distances
- stats::setNames(distances, pattern_label)
+ stats::setNames(distances, pattern[["label"]][[1]])
+ })
+}
+# ---- Operation mode ----
+#' @title Search for events in time series using complete data (no windowing).
+#' @name .dtw_complete_ts
+#' @description This function searches for events in time series without
+#' windowing.
+#' @keywords internal
+#' @noRd
+.dtw_complete_ts <- function(values, patterns, threshold, ...) {
+ # Do the change detection for each time-series
+ purrr::map_vec(values, function(value_row) {
+ # Search for the patterns
+ patterns_distances <- .dtw_distance(
+ data = value_row,
+ patterns = patterns
+ )
+ # Remove distances out the user-defined threshold
+ as.numeric(any(patterns_distances <= threshold))
+ })
+}
+#' @title Search for events in time series using windowing.
+#' @name .dtw_windowed_ts
+#' @description This function searches for events in time series with windowing.
+#' @keywords internal
+#' @noRd
+.dtw_windowed_ts <- function(values, patterns, window, threshold) {
+ # Extract dates
+ dates_min <- .ts_min_date(values[[1]])
+ dates_max <- .ts_max_date(values[[1]])
+ # Assume time-series are regularized, then use the period
+ # as the step of the moving window. As a result, we have
+ # one step per iteration.
+ dates_step <- lubridate::as.period(
+ lubridate::int_diff(.ts_index(values[[1]]))
+ )
+ dates_step <- dates_step[[1]]
+ # Create comparison windows
+ comparison_windows <- .period_windows(
+ period = window,
+ step = dates_step,
+ start_date = dates_min,
+ end_date = dates_max
+ )
+ # Do the change detection for each time-series
+ purrr::map(values, function(value_row) {
+ # Search for the patterns
+ patterns_distances <- .dtw_distance_windowed(
+ data = value_row,
+ patterns = patterns,
+ windows = comparison_windows
+ )
+ # Remove distances out the user-defined threshold
+ patterns_distances[patterns_distances > threshold] <- NA
+ # Define where each label was detected. For this, first
+ # get from each label the minimal distance
+ detections_idx <- apply(patterns_distances, 2, which.min)
+ detections_name <- names(detections_idx)
+ # For each label, extract the metadata where they had
+ # minimal distance
+ purrr::map_df(seq_len(length(detections_idx)), function(idx) {
+ # Extract detection name and inced
+ detection_name <- detections_name[idx]
+ detection_idx <- detections_idx[idx]
+ # Extract detection distance (min one defined above)
+ detection_distance <- patterns_distances[detection_idx,]
+ detection_distance <- detection_distance[detection_name]
+ detection_distance <- as.numeric(detection_distance)
+ # Extract detection dates
+ detection_dates <- comparison_windows[[detection_idx]]
+ # Prepare result and return it!
+ tibble::tibble(
+ change = detection_name,
+ distance = detection_distance,
+ from = detection_dates[["start"]],
+ to = detection_dates[["end"]]
+ )
+ })
  })
 }