Skip to content

Commit

Permalink
bias calc to calculate distance threshold by default
Browse files Browse the repository at this point in the history
  • Loading branch information
grantbuster committed Dec 8, 2023
1 parent fe030c2 commit 73912b4
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 19 deletions.
26 changes: 20 additions & 6 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self,
bias_fps,
base_dset,
bias_feature,
distance_upper_bound,
distance_upper_bound=None,
target=None,
shape=None,
base_handler='Resource',
Expand Down Expand Up @@ -66,7 +66,8 @@ def __init__(self,
distance_upper_bound : float
Upper bound on the nearest neighbor distance in decimal degrees.
This should be the approximate resolution of the low-resolution
bias data.
bias data. None (default) will calculate this based on the median
distance between points in bias_fps
target : tuple
(lat, lon) lower left corner of raster to retrieve from bias_fps.
If None then the lower left corner of the full domain will be used.
Expand Down Expand Up @@ -108,6 +109,7 @@ class to be retrieved from the rex/sup3r library. If a
self.base_handler_kwargs = base_handler_kwargs or {}
self.bias_handler_kwargs = bias_handler_kwargs or {}
self.bad_bias_gids = []
self._distance_upper_bound = distance_upper_bound

if isinstance(self.base_fps, str):
self.base_fps = sorted(glob(self.base_fps))
Expand Down Expand Up @@ -153,10 +155,9 @@ class to be retrieved from the rex/sup3r library. If a
self.bias_gid_raster = np.arange(lats.size)
self.bias_gid_raster = self.bias_gid_raster.reshape(raster_shape)

out = self.bias_tree.query(self.base_meta[['latitude', 'longitude']],
k=1,
distance_upper_bound=distance_upper_bound)
self.nn_dist, self.nn_ind = out
self.nn_dist, self.nn_ind = self.bias_tree.query(
self.base_meta[['latitude', 'longitude']], k=1,
distance_upper_bound=self.distance_upper_bound)

self.out = None
self._init_out()
Expand All @@ -180,6 +181,19 @@ def meta(self):
'version_record': VERSION_RECORD}
return meta

@property
def distance_upper_bound(self):
"""Maximum distance (float) to map high-resolution data from exo_source
to the low-resolution file_paths input."""
if self._distance_upper_bound is None:
diff = np.diff(self.bias_meta[['latitude', 'longitude']].values,
axis=0)
diff = np.max(np.median(diff, axis=0))
self._distance_upper_bound = diff
logger.info('Set distance upper bound to {:.4f}'
.format(self._distance_upper_bound))
return self._distance_upper_bound

@staticmethod
def compare_dists(base_data, bias_data, adder=0, scalar=1):
"""Compare two distributions using the two-sample Kolmogorov-Smirnov.
Expand Down
26 changes: 17 additions & 9 deletions sup3r/preprocessing/data_handling/exo_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,14 @@ def __init__(self,
typically low-res WRF output or GCM netcdf data files that is
source low-resolution data intended to be sup3r resolved.
exo_source : str
Filepath to source wtk or nsrdb file to get hi-res (2km or 4km)
elevation data from which will be mapped to the enhanced grid of
the file_paths input
Filepath to source data file to get hi-res elevation data from
which will be mapped to the enhanced grid of the file_paths input.
Pixels from this exo_source will be mapped to their nearest low-res
pixel in the file_paths input. Accordingly, exo_source should be a
significantly higher resolution than file_paths. Warnings will be
raised if the low-resolution pixels in file_paths do not have
unique nearest pixels from exo_source. File format can be .h5 for
TopoExtractH5 or .nc for TopoExtractNC
s_enhance : int
Factor by which the Sup3rGan model will enhance the spatial
dimensions of low resolution data from file_paths input. For
Expand All @@ -72,12 +77,13 @@ def __init__(self,
t_enhance is 4, this class will output a sza raster
corresponding to the file_paths temporally enhanced 4x to 15 min
t_agg_factor : int
Factor by which to aggregate the exo_source data to the resolution
of the file_paths input enhanced by t_enhance. For example, if
getting sza data, file_paths have hourly data, and t_enhance
is 4 resulting in a desired resolution of 5 min and exo_source
has a resolution of 5 min, the t_agg_factor should be 4 so that
every fourth timestep in the exo_source data is skipped.
Factor by which to aggregate / subsample the exo_source data to the
resolution of the file_paths input enhanced by t_enhance. For
example, if getting sza data, file_paths have hourly data, and
t_enhance is 4 resulting in a target resolution of 15 min and
exo_source has a resolution of 5 min, the t_agg_factor should be 3
so that only timesteps that are a multiple of 15min are selected
e.g., [0, 5, 10, 15, 20, 25, 30][slice(0, None, 3)] = [0, 15, 30]
target : tuple
(lat, lon) lower left corner of raster. Either need target+shape or
raster_file.
Expand Down Expand Up @@ -308,6 +314,8 @@ def distance_upper_bound(self):
diff = np.diff(self.source_lat_lon, axis=0)
diff = np.max(np.median(diff, axis=0))
self._distance_upper_bound = diff
logger.info('Set distance upper bound to {:.4f}'
.format(self._distance_upper_bound))
return self._distance_upper_bound

@property
Expand Down
10 changes: 7 additions & 3 deletions sup3r/preprocessing/data_handling/exogenous_data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,13 @@ def __init__(self,
source. e.g. {'spatial': '4km', 'temporal': '60min'}. This is used
only if agg factors are not provided in the steps list.
source_file : str
Filepath to source wtk, nsrdb, or netcdf file to get hi-res (2km or
4km) data from which will be mapped to the enhanced grid of the
file_paths input
Filepath to source wtk, nsrdb, or netcdf file to get hi-res data
from which will be mapped to the enhanced grid of the file_paths
input. Pixels from this file will be mapped to their nearest
low-res pixel in the file_paths input. Accordingly, the input
should be a significantly higher resolution than file_paths.
Warnings will be raised if the low-resolution pixels in file_paths
do not have unique nearest pixels from this exo source data.
target : tuple
(lat, lon) lower left corner of raster. Either need target+shape or
raster_file.
Expand Down
2 changes: 1 addition & 1 deletion tests/data_handling/test_utils_topo.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_topo_extraction_h5(s_enhance, plot=False):
true_out = te.source_data[iloc].mean()
assert np.allclose(test_out, true_out)

shutil.rmtree('./exo_cache/')
shutil.rmtree('./exo_cache/', ignore_errors=True)

if plot:
a = plt.scatter(te.source_lat_lon[:, 1], te.source_lat_lon[:, 0],
Expand Down
5 changes: 5 additions & 0 deletions tests/forward_pass/test_forward_pass_exo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""pytests for data handling"""
import json
import os
import shutil
import tempfile

import matplotlib.pyplot as plt
Expand Down Expand Up @@ -963,6 +964,8 @@ def test_fwp_multi_step_model_multi_exo():
'U_100m', 'V_100m', 'topography'
]

shutil.rmtree('./exo_cache', ignore_errors=True)


def test_fwp_multi_step_exo_hi_res_topo_and_sza():
"""Test the forward pass with multiple ExoGan models requiring
Expand Down Expand Up @@ -1197,3 +1200,5 @@ def test_fwp_multi_step_exo_hi_res_topo_and_sza():

for fp in handler.out_files:
assert os.path.exists(fp)

shutil.rmtree('./exo_cache', ignore_errors=True)

0 comments on commit 73912b4

Please sign in to comment.