Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gb/bc #96

Merged
merged 29 commits into from
Oct 4, 2022
Merged
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
efcf50e
added bias module with a place for bias transformation functions and …
grantbuster Sep 20, 2022
115e9c2
feature specific bias correction
grantbuster Sep 20, 2022
c184917
removed warnings about excessive padding - not a bad thing
grantbuster Sep 20, 2022
1d823e3
added a site-by-site linear bias correction calculation method
grantbuster Sep 21, 2022
8b7f25b
bug fix and logging
grantbuster Sep 21, 2022
56148db
bias calc mods and new functions
grantbuster Sep 22, 2022
50c3692
added bias calc cli
grantbuster Sep 22, 2022
2f7d4f7
added bias calc to main cli
grantbuster Sep 22, 2022
c132b81
make bias out dir
grantbuster Sep 22, 2022
915edd5
bug fixes and minor refactor to run on eagle
grantbuster Sep 22, 2022
7b28943
added local linear bias correct to forward pass bc options
grantbuster Sep 22, 2022
0cee671
added option to smooth spatial bias correction factors outside of the…
grantbuster Sep 23, 2022
aa0a040
better enumerated progress logging for fwp
grantbuster Sep 23, 2022
240a0d6
added bias correction option to QA
grantbuster Sep 23, 2022
9ac0905
minor refactor to bias correct u and v instead of windspeed and direc…
grantbuster Sep 23, 2022
d2fb1e2
fixed up the u/v QA with bias correction
grantbuster Sep 27, 2022
01fbeda
added meta data to bc h5 output attrs
grantbuster Sep 27, 2022
b370b9f
more bc convenience functions
grantbuster Sep 28, 2022
24c40b6
added monthly bias correction
grantbuster Sep 28, 2022
0deab95
added montly bias correction data transformation method and integrate…
grantbuster Sep 29, 2022
1f28ccc
fixed collection logic for undefined mask meta variable when file is …
grantbuster Oct 2, 2022
5d93283
added bias correction calc tests
grantbuster Oct 3, 2022
0638d7b
added bias transform calcs
grantbuster Oct 3, 2022
6cc6ced
added fwp+bc integration test
grantbuster Oct 3, 2022
ca24793
added qa+bc integration test
grantbuster Oct 3, 2022
b75b3fc
added version record to bias calc output files and incremented versio…
grantbuster Oct 4, 2022
b0a2c49
simplify qa test and pylint issue
grantbuster Oct 4, 2022
7b9c88f
fixed test on h5 meta attrs dtype and docstrings
grantbuster Oct 4, 2022
2ea15e3
serial data handling for QA+BC bug
grantbuster Oct 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added monthly bias correction
  • Loading branch information
grantbuster committed Oct 4, 2022
commit 24c40b6b25635eb4287180d6b8e26dd5ec42165a
115 changes: 88 additions & 27 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def __init__(self, base_fps, bias_fps, base_dset, bias_feature,
lats = self.bias_dh.lat_lon[..., 0].flatten()
lons = self.bias_dh.lat_lon[..., 1].flatten()
self.bias_meta = pd.DataFrame({'latitude': lats, 'longitude': lons})
self.bias_ti = self.bias_dh.time_index

raster_shape = self.bias_dh.lat_lon[..., 0].shape
self.bias_tree = KDTree(self.bias_meta[['latitude', 'longitude']])
Expand All @@ -108,7 +109,9 @@ def meta(self):
'base_dset': self.base_dset,
'bias_feature': self.bias_feature,
'target': self.target,
'shape': self.shape}
'shape': self.shape,
'class': str(self.__class__),
}
return meta

@staticmethod
Expand Down Expand Up @@ -162,7 +165,8 @@ def get_node_cmd(cls, config):
# pylint: disable=E1101
init_str = get_fun_call_str(cls, config)
fun_str = get_fun_call_str(cls.run, config)
fun_str = fun_str.replace(cls.__name__, 'bc')
fun_str = fun_str.partition('.')[-1]
fun_str = 'bc.' + fun_str

log_file = config.get('log_file', None)
log_level = config.get('log_level', 'INFO')
Expand Down Expand Up @@ -271,6 +275,7 @@ def get_data_pair(self, coord, knn, daily_avg=True):
bias_data = self.get_bias_data(bias_gid)
base_data = self.get_base_data(self.base_fps, self.base_dset, base_gid,
self.base_handler, daily_avg=daily_avg)
base_data = base_data[0]
return base_data, bias_data, dist

def get_bias_data(self, bias_gid):
Expand Down Expand Up @@ -329,9 +334,13 @@ def get_base_data(base_fps, base_dset, base_gid, base_handler,
out : np.ndarray
1D array of base data spatially averaged across the base_gid input
and possibly daily-averaged as well.
out_ti : pd.DatetimeIndex
DatetimeIndex object of datetimes corresponding to the
output data.
"""

out = []
out_ti = []
for fp in base_fps:
with base_handler(fp) as res:
base_ti = res.time_index
Expand Down Expand Up @@ -359,16 +368,24 @@ def get_base_data(base_fps, base_dset, base_gid, base_handler,
for date in sorted(set(base_ti.date))]
base_data = np.array([base_data[s0].mean()
for s0 in slices])
base_ti = np.array(sorted(set(base_ti.date)))

out.append(base_data)
out_ti.append(base_ti)

return np.hstack(out)
return np.hstack(out), pd.DatetimeIndex(np.hstack(out_ti))


class LinearCorrection(DataRetrievalBase):
"""Calculate linear correction *scalar +adder factors to bias correct data

This calculation operates on single bias sites for the full time series of
available data (no season bias correction)
"""

# size of the time dimension, 1 is no time-based bias correction
NT = 1

@staticmethod
def get_linear_correction(bias_data, base_data):
"""Get the linear correction factors based on 1D bias and base datasets
Expand All @@ -395,13 +412,13 @@ def get_linear_correction(bias_data, base_data):

@classmethod
def _run_single(cls, bias_data, base_fps, base_dset, base_gid,
base_handler, daily_avg):
base_handler, daily_avg, bias_ti):
"""Find the nominal scalar + adder combination to bias correct data
at a single site"""

base_data = cls.get_base_data(base_fps, base_dset,
base_gid, base_handler,
daily_avg=daily_avg)
base_data, _ = cls.get_base_data(base_fps, base_dset,
base_gid, base_handler,
daily_avg=daily_avg)

scalar, adder = cls.get_linear_correction(bias_data, base_data)

Expand All @@ -415,11 +432,11 @@ def write_outputs(self, fp_out, scalar, adder):
fp_out : str | None
Optional .h5 output file to write scalar and adder arrays.
scalar : np.ndarray
2D array of scalar factors corresponding to the bias raster data
shape (lat, lon)
3D array of scalar factors corresponding to the bias raster data
shape (lat, lon, time)
adder : np.ndarray
2D array of adder factors corresponding to the bias raster data
shape (lat, lon)
3D array of adder factors corresponding to the bias raster data
shape (lat, lon, time)
"""

if not os.path.exists(os.path.dirname(fp_out)):
Expand Down Expand Up @@ -471,16 +488,21 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
Returns
-------
scalar : np.ndarray
2D array of scalar factors corresponding to the bias raster data
shape (lat, lon)
3D array of scalar factors corresponding to the bias raster data
shape (lat, lon, time)
adder : np.ndarray
2D array of adder factors corresponding to the bias raster data
shape (lat, lon)
3D array of adder factors corresponding to the bias raster data
shape (lat, lon, time)
"""
logger.debug('Starting linear correction calculation...')

scalar = np.full(self.bias_gid_raster.shape, np.nan, np.float32)
adder = np.full(self.bias_gid_raster.shape, np.nan, np.float32)
scalar = np.full(self.bias_gid_raster.shape + (self.NT,),
np.nan, np.float32)
adder = np.full(self.bias_gid_raster.shape + (self.NT,),
np.nan, np.float32)

logger.info('Initialized scalar / adder with shape: {}'
.format(scalar.shape))

if max_workers == 1:
logger.debug('Running serial calculation.')
Expand All @@ -493,7 +515,8 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
bias_data = self.get_bias_data(bias_gid)
out = self._run_single(bias_data, self.base_fps,
self.base_dset, base_gid,
self.base_handler, daily_avg)
self.base_handler, daily_avg,
self.bias_ti)
scalar[raster_loc] = out[0]
adder[raster_loc] = out[1]

Expand All @@ -516,7 +539,7 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
future = exe.submit(self._run_single, bias_data,
self.base_fps, self.base_dset,
base_gid, self.base_handler,
daily_avg)
daily_avg, self.bias_ti)
futures[future] = raster_loc

logger.debug('Finished launching futures.')
Expand All @@ -532,16 +555,54 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
'Mean scalar: {:.3f} mean adder: {:.3f}'
.format(np.nanmean(scalar), np.nanmean(adder)))

nan_mask = np.isnan(scalar)
scalar = nn_fill_array(scalar)
adder = nn_fill_array(adder)
nan_mask = np.isnan(scalar[..., 0])

if smoothing > 0:
scalar[nan_mask] = gaussian_filter(scalar, smoothing,
mode='nearest')[nan_mask]
adder[nan_mask] = gaussian_filter(adder, smoothing,
mode='nearest')[nan_mask]
for idt in range(self.NT):
scalar[..., idt] = nn_fill_array(scalar[..., idt])
adder[..., idt] = nn_fill_array(adder[..., idt])
if smoothing > 0:
scalar_smooth = gaussian_filter(scalar[..., idt], smoothing,
mode='nearest')
adder_smooth = gaussian_filter(adder[..., idt], smoothing,
mode='nearest')
scalar[nan_mask, idt] = scalar_smooth[nan_mask]
adder[nan_mask, idt] = adder_smooth[nan_mask]

self.write_outputs(fp_out, scalar, adder)

return scalar, adder


class MonthlyLinearCorrection(LinearCorrection):
"""Calculate linear correction *scalar +adder factors to bias correct data

This calculation operates on single bias sites on a montly basis
"""

# size of the time dimension, 12 is monthly bias correction
NT = 12

@classmethod
def _run_single(cls, bias_data, base_fps, base_dset, base_gid,
base_handler, daily_avg, bias_ti):
"""Find the nominal scalar + adder combination to bias correct data
at a single site"""

base_data, base_ti = cls.get_base_data(base_fps, base_dset,
base_gid, base_handler,
daily_avg=daily_avg)

scalar = np.full(cls.NT, np.nan, dtype=np.float32)
adder = np.full(cls.NT, np.nan, dtype=np.float32)

for month in range(1, 13):
bias_mask = bias_ti.month == month
base_mask = base_ti.month == month

ms, ma = cls.get_linear_correction(bias_data[bias_mask],
base_data[base_mask])

scalar[month - 1] = ms
adder[month - 1] = ma

return scalar, adder