From cf1be369bd1beb79ffb0c5a836be60ad5019932d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 3 May 2024 16:39:21 -0600 Subject: [PATCH 1/3] Re-using fill_and_smooth for QDM --- sup3r/bias/bias_calc.py | 175 +++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 82 deletions(-) diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 098c8005c9..369264a72b 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -721,7 +721,91 @@ def _reduce_base_data(base_ti, base_data, base_cs_ghi, base_dset, return base_data, daily_ti -class LinearCorrection(DataRetrievalBase): +class FillAndSmoothMixin(): + """Fill and extend parameters for calibration on missing positions""" + def fill_and_smooth(self, + out, + fill_extend=True, + smooth_extend=0, + smooth_interior=0): + """For a given set of parameters, fill and extend missing positions + + Fill data extending beyond the base meta data extent by doing a + nearest neighbor gap fill. Smooth interior and extended region with + given smoothing values. + Interior smoothing can reduce the affect of extreme values + within aggregations over large number of pixels. + The interior is assumed to be defined by the region without nan values. + The extended region is assumed to be the region with nan values. + + Parameters + ---------- + out : dict + Dictionary of values defining the mean/std of the bias + base + data and the scalar + adder factors to correct the biased data + like: bias_data * scalar + adder. Each value is of shape + (lat, lon, time). + fill_extend : bool + Whether to fill data extending beyond the base meta data with + nearest neighbor values. + smooth_extend : float + Option to smooth the scalar/adder data outside of the spatial + domain set by the threshold input. This alleviates the weird seams + far from the domain of interest. This value is the standard + deviation for the gaussian_filter kernel + smooth_interior : float + Value to use to smooth the scalar/adder data inside of the spatial + domain set by the threshold input. This can reduce the effect of + extreme values within aggregations over large number of pixels. + This value is the standard deviation for the gaussian_filter + kernel. + + Returns + ------- + out : dict + Dictionary of values defining the mean/std of the bias + base + data and the scalar + adder factors to correct the biased data + like: bias_data * scalar + adder. Each value is of shape + (lat, lon, time). + """ + if len(self.bad_bias_gids) > 0: + logger.info('Found {} bias gids that are out of bounds: {}' + .format(len(self.bad_bias_gids), self.bad_bias_gids)) + + for key, arr in out.items(): + nan_mask = np.isnan(arr[..., 0]) + for idt in range(arr.shape[-1]): + + arr_smooth = arr[..., idt] + + needs_fill = (np.isnan(arr_smooth).any() + and fill_extend) or smooth_interior > 0 + + if needs_fill: + logger.info('Filling NaN values outside of valid spatial ' + 'extent for dataset "{}" for timestep {}' + .format(key, idt)) + arr_smooth = nn_fill_array(arr_smooth) + + arr_smooth_int = arr_smooth_ext = arr_smooth + + if smooth_extend > 0: + arr_smooth_ext = gaussian_filter(arr_smooth_ext, + smooth_extend, + mode='nearest') + + if smooth_interior > 0: + arr_smooth_int = gaussian_filter(arr_smooth_int, + smooth_interior, + mode='nearest') + + out[key][nan_mask, idt] = arr_smooth_ext[nan_mask] + out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask] + + return out + + +class LinearCorrection(FillAndSmoothMixin, DataRetrievalBase): """Calculate linear correction *scalar +adder factors to bias correct data This calculation operates on single bias sites for the full time series of @@ -820,85 +904,6 @@ def _run_single(cls, base_dset) return out - def fill_and_smooth(self, - out, - fill_extend=True, - smooth_extend=0, - smooth_interior=0): - """Fill data extending beyond the base meta data extent by doing a - nearest neighbor gap fill. Smooth interior and extended region with - given smoothing values. - Interior smoothing can reduce the affect of extreme values - within aggregations over large number of pixels. - The interior is assumed to be defined by the region without nan values. - The extended region is assumed to be the region with nan values. - - Parameters - ---------- - out : dict - Dictionary of values defining the mean/std of the bias + base - data and the scalar + adder factors to correct the biased data - like: bias_data * scalar + adder. Each value is of shape - (lat, lon, time). - fill_extend : bool - Whether to fill data extending beyond the base meta data with - nearest neighbor values. - smooth_extend : float - Option to smooth the scalar/adder data outside of the spatial - domain set by the threshold input. This alleviates the weird seams - far from the domain of interest. This value is the standard - deviation for the gaussian_filter kernel - smooth_interior : float - Value to use to smooth the scalar/adder data inside of the spatial - domain set by the threshold input. This can reduce the effect of - extreme values within aggregations over large number of pixels. - This value is the standard deviation for the gaussian_filter - kernel. - - Returns - ------- - out : dict - Dictionary of values defining the mean/std of the bias + base - data and the scalar + adder factors to correct the biased data - like: bias_data * scalar + adder. Each value is of shape - (lat, lon, time). - """ - if len(self.bad_bias_gids) > 0: - logger.info('Found {} bias gids that are out of bounds: {}' - .format(len(self.bad_bias_gids), self.bad_bias_gids)) - - for key, arr in out.items(): - nan_mask = np.isnan(arr[..., 0]) - for idt in range(arr.shape[-1]): - - arr_smooth = arr[..., idt] - - needs_fill = (np.isnan(arr_smooth).any() - and fill_extend) or smooth_interior > 0 - - if needs_fill: - logger.info('Filling NaN values outside of valid spatial ' - 'extent for dataset "{}" for timestep {}' - .format(key, idt)) - arr_smooth = nn_fill_array(arr_smooth) - - arr_smooth_int = arr_smooth_ext = arr_smooth - - if smooth_extend > 0: - arr_smooth_ext = gaussian_filter(arr_smooth_ext, - smooth_extend, - mode='nearest') - - if smooth_interior > 0: - arr_smooth_int = gaussian_filter(arr_smooth_int, - smooth_interior, - mode='nearest') - - out[key][nan_mask, idt] = arr_smooth_ext[nan_mask] - out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask] - - return out - def write_outputs(self, fp_out, out): """Write outputs to an .h5 file. @@ -1176,7 +1181,7 @@ def get_linear_correction(bias_data, base_data, bias_feature, base_dset): return out -class QuantileDeltaMappingCorrection(DataRetrievalBase): +class QuantileDeltaMappingCorrection(FillAndSmoothMixin, DataRetrievalBase): """Estimate probability distributions required by Quantile Delta Mapping The main purpose of this class is to estimate the probability @@ -1457,7 +1462,10 @@ def write_outputs(self, fp_out, out=None): def run(self, fp_out=None, max_workers=None, - daily_reduction='avg'): + daily_reduction='avg', + fill_extend=True, + smooth_extend=0, + smooth_interior=0): """Estimate the statistical distributions for each location Parameters @@ -1572,6 +1580,9 @@ def run(self, logger.info('Finished calculating bias correction factors.') + self.out = self.fill_and_smooth(self.out, fill_extend, smooth_extend, + smooth_interior) + self.write_outputs(fp_out, self.out) return copy.deepcopy(self.out) From d4578799e0c0d5dff345b0edfd927683cac3e67e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 3 May 2024 16:39:53 -0600 Subject: [PATCH 2/3] Checking with ruff on 'E' rules --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 707e96a338..5307cd3848 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,11 @@ indent-width = 4 target-version = "py38" [tool.ruff.lint] +fixable = ["ALL"] +preview = true +select = [ + "E", # pycodestyle + ] ignore = [ "B008", # function-call-in-default-argument "B024", # abstract-base-class-without-abstract-method From 4c188efbb2159651e461d9aac649ce3ac5b123c9 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 3 May 2024 16:42:39 -0600 Subject: [PATCH 3/3] Testing fill_extend with QDM --- tests/bias/test_qdm_bias_correction.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/bias/test_qdm_bias_correction.py b/tests/bias/test_qdm_bias_correction.py index 49d3faf717..37cfad0c14 100644 --- a/tests/bias/test_qdm_bias_correction.py +++ b/tests/bias/test_qdm_bias_correction.py @@ -141,6 +141,27 @@ def test_parallel(fp_fut_cc): ), f"Different results for {k}" +def test_fill_nan(fp_fut_cc): + """No NaN when running with fill_extend""" + + c = QuantileDeltaMappingCorrection(FP_NSRDB, FP_CC, fp_fut_cc, + 'ghi', 'rsds', + target=TARGET, shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC') + + # Without filling, at least one NaN or this test is useless. + out = c.run(fill_extend=False) + assert np.all([np.isnan(v).any() for v in out.values()]), ( + "Assume at least one NaN value for each param" + ) + + out = c.run() + assert ~np.any([np.isnan(v) for v in out.values()]), ( + "All NaN values where supposed to be filled" + ) + + def test_save_file(tmp_path, fp_fut_cc): """Save valid output