Skip to content

Commit

Permalink
Merge pull request #1130 from M3nin0/feature/detections-api-raster
Browse files Browse the repository at this point in the history
detections api: include raster cube support
  • Loading branch information
gilbertocamara committed May 18, 2024
2 parents 3a5aa79 + ee6ecb4 commit 61af7d9
Show file tree
Hide file tree
Showing 12 changed files with 575 additions and 122 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ Collate:
'api_mosaic.R'
'api_opensearch.R'
'api_parallel.R'
'api_patterns.R'
'api_period.R'
'api_plot_time_series.R'
'api_plot_raster.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ S3method(sits_cube,local_cube)
S3method(sits_cube,sar_cube)
S3method(sits_cube,stac_cube)
S3method(sits_detect_change,default)
S3method(sits_detect_change,raster_cube)
S3method(sits_detect_change,sits)
S3method(sits_get_data,csv)
S3method(sits_get_data,data.frame)
Expand Down
199 changes: 195 additions & 4 deletions R/api_detect_changes.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,211 @@
# Divide samples in chunks to parallel processing
parts <- .pred_create_partition(pred = samples, partitions = multicores)
# Detect changes!
detections <- .jobs_map_parallel_dfr(parts, function(part) {
.jobs_map_parallel_dfr(parts, function(part) {
# Get samples
values <- .pred_part(part)
# Detect changes! For detection, models can be time-aware. So, the
# complete data, including dates, must be passed as argument.
detections <- cd_method(values[["time_series"]])
detections <- cd_method(values[["time_series"]], "ts")
detections <- tibble::tibble(detections)
# Prepare result
result <- tibble::tibble(data.frame(values, detections = detections))
class(result) <- class(values)
# return
result

}, progress = progress)
}

return(detections)
#' @title Detect changes from a chunk of raster data using multicores
#' @name .detect_change_tile
#' @keywords internal
#' @noRd
#' @param tile Single tile of a data cube.
#' @param band Band to be produced.
#' @param cd_method Change Detection Model.
#' @param block Optimized block to be read into memory.
#' @param roi Region of interest.
#' @param filter_fn Smoothing filter function to be applied to the data.
#' @param impute_fn Imputation function.
#' @param output_dir Output directory.
#' @param version Version of result.
#' @param verbose Print processing information?
#' @param progress Show progress bar?
#' @return List of the classified raster layers.
.detect_change_tile <- function(tile,
band,
cd_method,
block,
roi,
filter_fn,
impute_fn,
output_dir,
version,
verbose,
progress) {
# Output file
out_file <- .file_derived_name(
tile = tile,
band = band,
version = version,
output_dir = output_dir
)
# Resume feature
if (file.exists(out_file)) {
if (.check_messages()) {
.check_recovery(out_file)
}
detected_changes_tile <- .tile_derived_from_file(
file = out_file,
band = band,
base_tile = tile,
labels = .ml_labels_code(cd_method),
derived_class = "detections_cube",
update_bbox = TRUE
)
return(detected_changes_tile)
}
# Show initial time for tile classification
tile_start_time <- .tile_classif_start(
tile = tile,
verbose = verbose
)
# Tile timeline
tile_timeline <- .tile_timeline(tile)
# Create chunks as jobs
chunks <- .tile_chunks_create(
tile = tile,
overlap = 0,
block = block
)
# By default, update_bbox is FALSE
update_bbox <- FALSE
if (.has(roi)) {
# How many chunks there are in tile?
nchunks <- nrow(chunks)
# Intersecting chunks with ROI
chunks <- .chunks_filter_spatial(
chunks = chunks,
roi = roi
)
# Should bbox of resulting tile be updated?
update_bbox <- nrow(chunks) != nchunks
}
# Process jobs in parallel
block_files <- .jobs_map_parallel_chr(chunks, function(chunk) {
# Job block
block <- .block(chunk)
# Block file name
block_file <- .file_block_name(
pattern = .file_pattern(out_file),
block = block,
output_dir = output_dir
)
# Resume processing in case of failure
if (.raster_is_valid(block_file)) {
return(block_file)
}
# Read and preprocess values
values <- .classify_data_read(
tile = tile,
block = block,
bands = .ml_bands(cd_method),
ml_model = cd_method,
impute_fn = impute_fn,
filter_fn = filter_fn
)
# Get mask of NA pixels
na_mask <- C_mask_na(values)
# Fill with zeros remaining NA pixels
values <- C_fill_na(values, 0)
# Used to check values (below)
n_input_pixels <- nrow(values)
# Include names in cube predictors
colnames(values) <- .pred_features_name(
.ml_bands(cd_method), tile_timeline
)
# Prepare values
values <- .pred_as_ts(values, .ml_bands(cd_method), tile_timeline) |>
tidyr::nest(.by = "sample_id", .key = "time_series")
# Log here
.debug_log(
event = "start_block_data_detection",
key = "model",
value = .ml_class(cd_method)
)
# Detect changes!
values <- cd_method(values[["time_series"]], "cube") |>
dplyr::as_tibble()
# Are the results consistent with the data input?
.check_processed_values(
values = values,
n_input_pixels = n_input_pixels
)
# Log here
.debug_log(
event = "end_block_data_detection",
key = "model",
value = .ml_class(cd_method)
)
# Prepare probability to be saved
band_conf <- .conf_derived_band(
derived_class = "detections_cube",
band = band
)
offset <- .offset(band_conf)
if (.has(offset) && offset != 0) {
values <- values - offset
}
scale <- .scale(band_conf)
if (.has(scale) && scale != 1) {
values <- values / scale
}
# Mask NA pixels with same probabilities for all classes
values[na_mask, ] <- 0 # event detection = 1, no event = 0
# Log
.debug_log(
event = "start_block_data_save",
key = "file",
value = block_file
)
# Prepare and save results as raster
.raster_write_block(
files = block_file,
block = block,
bbox = .bbox(chunk),
values = values,
data_type = .data_type(band_conf),
missing_value = .miss_value(band_conf),
crop_block = NULL
)
# Log
.debug_log(
event = "end_block_data_save",
key = "file",
value = block_file
)
# Free memory
gc()
# Returned block file
block_file
}, progress = progress)
# Merge blocks into a new detections_cube tile
detections_tile <- .tile_derived_merge_blocks(
file = out_file,
band = band,
labels = .ml_labels_code(cd_method),
base_tile = tile,
block_files = block_files,
derived_class = "detections_cube",
multicores = .jobs_multicores(),
update_bbox = update_bbox
)
# show final time for detection
.tile_classif_end(
tile = tile,
start_time = tile_start_time,
verbose = verbose
)
# Return detections tile
detections_tile
}
142 changes: 107 additions & 35 deletions R/api_dtw.R
Original file line number Diff line number Diff line change
@@ -1,38 +1,37 @@

#' @title Extract temporal pattern of samples using temporal median.
#' @name .pattern_temporal_median
# ---- Distances ----
#' @title Calculate the DTW distance between temporal patterns and time-series.
#' @name .dtw_distance
#' @description This function calculates the DTW distance between label patterns
#' and real data (e.g., sample data, data cube data). The distance is calculated
#' without a window. It's use is recommended for big datasets.
#' @keywords internal
#' @noRd
.pattern_temporal_median <- function(samples) {
samples |>
dplyr::group_by(.data[["label"]]) |>
dplyr::group_map(function(data, name) {
ts_median <- dplyr::bind_rows(data[["time_series"]]) |>
dplyr::group_by(.data[["Index"]]) |>
dplyr::summarize(dplyr::across(dplyr::everything(),
stats::median, na.rm = TRUE)) |>
dplyr::select(-.data[["Index"]])

ts_median["label"] <- name
ts_median
})
.dtw_distance <- function(data, patterns) {
# Prepare input data
data <- as.matrix(.ts_values(data))
# Calculate the DTW distance between `data` and `patterns`
purrr::map_dfc(patterns, function(pattern) {
# Prepare pattern data
pattern_ts <- as.matrix(.ts_values(pattern))
# Calculate distance
stats::setNames(
data.frame(distance = dtw_distance(data, pattern_ts)),
pattern[["label"]][[1]]
)
})
}

#' @title Calculate the DTW distance between label patterns and sample data.
#' @name .pattern_distance_dtw
#' @title Calculate the DTW distance between temporal patterns and time-series.
#' @name .dtw_distance_windowed
#' @description This function calculates the DTW distance between label patterns
#' and sample data in a given temporal window.
#' and real data (e.g., sample data, data cube data). The distance is calculated
#' using windows.
#' @keywords internal
#' @noRd
.pattern_distance_dtw <- function(data, patterns, windows) {
.dtw_distance_windowed <- function(data, patterns, windows) {
# Calculate the DTW distance between `data` and `patterns`
purrr::map_dfc(1:length(patterns), function(pattern_index) {
# Get pattern metadata
pattern <- patterns[pattern_index][[1]]
pattern_label <- unique(pattern[["label"]])
purrr::map_dfc(patterns, function(pattern) {
# Get pattern data
pattern_ts <- dplyr::select(pattern, -.data[["label"]])
pattern_ts <- as.matrix(pattern_ts)
pattern_ts <- as.matrix(.ts_values(pattern))
# Windowed search
distances <- purrr::map_df(windows, function(window) {
# Get time-series in the window
Expand All @@ -41,16 +40,89 @@
.data[["Index"]] >= window[["start"]],
.data[["Index"]] <= window[["end"]])
# Remove the time reference column
data_in_window <- dplyr::select(data_in_window, -.data[["Index"]])
# Transform values in matrix (as expected in the cpp code)
data_in_window <- as.matrix(data_in_window)
data_in_window <- data_in_window
data_in_window <- as.matrix(.ts_values(data_in_window))
# Calculate distance
distance_from_pattern <- dtw_distance(data_in_window, pattern_ts)
# Prepare result and return it
data.frame(distance = distance_from_pattern)
data.frame(distance = dtw_distance(data_in_window, pattern_ts))
})
# Associate the pattern name with the distances
stats::setNames(distances, pattern_label)
stats::setNames(distances, pattern[["label"]][[1]])
})
}
# ---- Operation mode ----
#' @title Search for events in time series using complete data (no windowing).
#' @name .dtw_complete_ts
#' @description This function searches for events in time series without
#' windowing.
#' @keywords internal
#' @noRd
.dtw_complete_ts <- function(values, patterns, threshold, ...) {
# Do the change detection for each time-series
purrr::map_vec(values, function(value_row) {
# Search for the patterns
patterns_distances <- .dtw_distance(
data = value_row,
patterns = patterns
)
# Remove distances out the user-defined threshold
as.numeric(any(patterns_distances <= threshold))
})
}
#' @title Search for events in time series using windowing.
#' @name .dtw_windowed_ts
#' @description This function searches for events in time series with windowing.
#' @keywords internal
#' @noRd
.dtw_windowed_ts <- function(values, patterns, window, threshold) {
# Extract dates
dates_min <- .ts_min_date(values[[1]])
dates_max <- .ts_max_date(values[[1]])
# Assume time-series are regularized, then use the period
# as the step of the moving window. As a result, we have
# one step per iteration.
dates_step <- lubridate::as.period(
lubridate::int_diff(.ts_index(values[[1]]))
)
dates_step <- dates_step[[1]]
# Create comparison windows
comparison_windows <- .period_windows(
period = window,
step = dates_step,
start_date = dates_min,
end_date = dates_max
)
# Do the change detection for each time-series
purrr::map(values, function(value_row) {
# Search for the patterns
patterns_distances <- .dtw_distance_windowed(
data = value_row,
patterns = patterns,
windows = comparison_windows
)
# Remove distances out the user-defined threshold
patterns_distances[patterns_distances > threshold] <- NA
# Define where each label was detected. For this, first
# get from each label the minimal distance
detections_idx <- apply(patterns_distances, 2, which.min)
detections_name <- names(detections_idx)
# For each label, extract the metadata where they had
# minimal distance
purrr::map_df(seq_len(length(detections_idx)), function(idx) {
# Extract detection name and inced
detection_name <- detections_name[idx]
detection_idx <- detections_idx[idx]
# Extract detection distance (min one defined above)
detection_distance <- patterns_distances[detection_idx,]
detection_distance <- detection_distance[detection_name]
detection_distance <- as.numeric(detection_distance)
# Extract detection dates
detection_dates <- comparison_windows[[detection_idx]]
# Prepare result and return it!
tibble::tibble(
change = detection_name,
distance = detection_distance,
from = detection_dates[["start"]],
to = detection_dates[["end"]]
)
})
})
}
Loading

0 comments on commit 61af7d9

Please sign in to comment.