diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 2780aacf4..8498afe43 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -37,7 +37,7 @@ def __init__(self, bias_fps, base_dset, bias_feature, - distance_upper_bound, + distance_upper_bound=None, target=None, shape=None, base_handler='Resource', @@ -66,7 +66,8 @@ def __init__(self, distance_upper_bound : float Upper bound on the nearest neighbor distance in decimal degrees. This should be the approximate resolution of the low-resolution - bias data. + bias data. None (default) will calculate this based on the median + distance between points in bias_fps target : tuple (lat, lon) lower left corner of raster to retrieve from bias_fps. If None then the lower left corner of the full domain will be used. @@ -108,6 +109,7 @@ class to be retrieved from the rex/sup3r library. If a self.base_handler_kwargs = base_handler_kwargs or {} self.bias_handler_kwargs = bias_handler_kwargs or {} self.bad_bias_gids = [] + self._distance_upper_bound = distance_upper_bound if isinstance(self.base_fps, str): self.base_fps = sorted(glob(self.base_fps)) @@ -153,10 +155,9 @@ class to be retrieved from the rex/sup3r library. If a self.bias_gid_raster = np.arange(lats.size) self.bias_gid_raster = self.bias_gid_raster.reshape(raster_shape) - out = self.bias_tree.query(self.base_meta[['latitude', 'longitude']], - k=1, - distance_upper_bound=distance_upper_bound) - self.nn_dist, self.nn_ind = out + self.nn_dist, self.nn_ind = self.bias_tree.query( + self.base_meta[['latitude', 'longitude']], k=1, + distance_upper_bound=self.distance_upper_bound) self.out = None self._init_out() @@ -180,6 +181,19 @@ def meta(self): 'version_record': VERSION_RECORD} return meta + @property + def distance_upper_bound(self): + """Maximum distance (float) to map high-resolution data from exo_source + to the low-resolution file_paths input.""" + if self._distance_upper_bound is None: + diff = np.diff(self.bias_meta[['latitude', 'longitude']].values, + axis=0) + diff = np.max(np.median(diff, axis=0)) + self._distance_upper_bound = diff + logger.info('Set distance upper bound to {:.4f}' + .format(self._distance_upper_bound)) + return self._distance_upper_bound + @staticmethod def compare_dists(base_data, bias_data, adder=0, scalar=1): """Compare two distributions using the two-sample Kolmogorov-Smirnov. diff --git a/sup3r/preprocessing/data_handling/exo_extraction.py b/sup3r/preprocessing/data_handling/exo_extraction.py index 1e5970f4d..b0700c017 100644 --- a/sup3r/preprocessing/data_handling/exo_extraction.py +++ b/sup3r/preprocessing/data_handling/exo_extraction.py @@ -56,9 +56,14 @@ def __init__(self, typically low-res WRF output or GCM netcdf data files that is source low-resolution data intended to be sup3r resolved. exo_source : str - Filepath to source wtk or nsrdb file to get hi-res (2km or 4km) - elevation data from which will be mapped to the enhanced grid of - the file_paths input + Filepath to source data file to get hi-res elevation data from + which will be mapped to the enhanced grid of the file_paths input. + Pixels from this exo_source will be mapped to their nearest low-res + pixel in the file_paths input. Accordingly, exo_source should be a + significantly higher resolution than file_paths. Warnings will be + raised if the low-resolution pixels in file_paths do not have + unique nearest pixels from exo_source. File format can be .h5 for + TopoExtractH5 or .nc for TopoExtractNC s_enhance : int Factor by which the Sup3rGan model will enhance the spatial dimensions of low resolution data from file_paths input. For @@ -72,12 +77,13 @@ def __init__(self, t_enhance is 4, this class will output a sza raster corresponding to the file_paths temporally enhanced 4x to 15 min t_agg_factor : int - Factor by which to aggregate the exo_source data to the resolution - of the file_paths input enhanced by t_enhance. For example, if - getting sza data, file_paths have hourly data, and t_enhance - is 4 resulting in a desired resolution of 5 min and exo_source - has a resolution of 5 min, the t_agg_factor should be 4 so that - every fourth timestep in the exo_source data is skipped. + Factor by which to aggregate / subsample the exo_source data to the + resolution of the file_paths input enhanced by t_enhance. For + example, if getting sza data, file_paths have hourly data, and + t_enhance is 4 resulting in a target resolution of 15 min and + exo_source has a resolution of 5 min, the t_agg_factor should be 3 + so that only timesteps that are a multiple of 15min are selected + e.g., [0, 5, 10, 15, 20, 25, 30][slice(0, None, 3)] = [0, 15, 30] target : tuple (lat, lon) lower left corner of raster. Either need target+shape or raster_file. @@ -308,6 +314,8 @@ def distance_upper_bound(self): diff = np.diff(self.source_lat_lon, axis=0) diff = np.max(np.median(diff, axis=0)) self._distance_upper_bound = diff + logger.info('Set distance upper bound to {:.4f}' + .format(self._distance_upper_bound)) return self._distance_upper_bound @property diff --git a/sup3r/preprocessing/data_handling/exogenous_data_handling.py b/sup3r/preprocessing/data_handling/exogenous_data_handling.py index 50cf0ecdd..0c5519dd7 100644 --- a/sup3r/preprocessing/data_handling/exogenous_data_handling.py +++ b/sup3r/preprocessing/data_handling/exogenous_data_handling.py @@ -245,9 +245,13 @@ def __init__(self, source. e.g. {'spatial': '4km', 'temporal': '60min'}. This is used only if agg factors are not provided in the steps list. source_file : str - Filepath to source wtk, nsrdb, or netcdf file to get hi-res (2km or - 4km) data from which will be mapped to the enhanced grid of the - file_paths input + Filepath to source wtk, nsrdb, or netcdf file to get hi-res data + from which will be mapped to the enhanced grid of the file_paths + input. Pixels from this file will be mapped to their nearest + low-res pixel in the file_paths input. Accordingly, the input + should be a significantly higher resolution than file_paths. + Warnings will be raised if the low-resolution pixels in file_paths + do not have unique nearest pixels from this exo source data. target : tuple (lat, lon) lower left corner of raster. Either need target+shape or raster_file. diff --git a/tests/data_handling/test_utils_topo.py b/tests/data_handling/test_utils_topo.py index eee1b54a9..01826a754 100644 --- a/tests/data_handling/test_utils_topo.py +++ b/tests/data_handling/test_utils_topo.py @@ -104,7 +104,7 @@ def test_topo_extraction_h5(s_enhance, plot=False): true_out = te.source_data[iloc].mean() assert np.allclose(test_out, true_out) - shutil.rmtree('./exo_cache/') + shutil.rmtree('./exo_cache/', ignore_errors=True) if plot: a = plt.scatter(te.source_lat_lon[:, 1], te.source_lat_lon[:, 0], diff --git a/tests/forward_pass/test_forward_pass_exo.py b/tests/forward_pass/test_forward_pass_exo.py index c6bfd4d32..747eeae36 100644 --- a/tests/forward_pass/test_forward_pass_exo.py +++ b/tests/forward_pass/test_forward_pass_exo.py @@ -2,6 +2,7 @@ """pytests for data handling""" import json import os +import shutil import tempfile import matplotlib.pyplot as plt @@ -963,6 +964,8 @@ def test_fwp_multi_step_model_multi_exo(): 'U_100m', 'V_100m', 'topography' ] + shutil.rmtree('./exo_cache', ignore_errors=True) + def test_fwp_multi_step_exo_hi_res_topo_and_sza(): """Test the forward pass with multiple ExoGan models requiring @@ -1197,3 +1200,5 @@ def test_fwp_multi_step_exo_hi_res_topo_and_sza(): for fp in handler.out_files: assert os.path.exists(fp) + + shutil.rmtree('./exo_cache', ignore_errors=True)