Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gb/bc cs ratio #127

Merged
merged 4 commits into from
Dec 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 112 additions & 33 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def get_data_pair(self, coord, knn, daily_reduction='avg'):
-------
base_data : np.ndarray
1D array of base data spatially averaged across the base_gid input
and possibly daily-averaged as well.
and possibly daily-averaged or min/max'd as well.
bias_data : np.ndarray
1D array of temporal data at the requested gid.
base_dist : np.ndarray
Expand Down Expand Up @@ -327,8 +327,8 @@ def get_bias_data(self, bias_gid):

return bias_data

@staticmethod
def get_base_data(base_fps, base_dset, base_gid, base_handler,
@classmethod
def get_base_data(cls, base_fps, base_dset, base_gid, base_handler,
daily_reduction='avg', decimals=None):
"""Get data from the baseline data source, possibly for many high-res
base gids corresponding to a single coarse low-res bias gid.
Expand Down Expand Up @@ -360,7 +360,7 @@ def get_base_data(base_fps, base_dset, base_gid, base_handler,
-------
out : np.ndarray
1D array of base data spatially averaged across the base_gid input
and possibly daily-averaged as well.
and possibly daily-averaged or min/max'd as well.
out_ti : pd.DatetimeIndex
DatetimeIndex object of datetimes corresponding to the
output data.
Expand All @@ -372,37 +372,13 @@ def get_base_data(base_fps, base_dset, base_gid, base_handler,
with base_handler(fp) as res:
base_ti = res.time_index

if base_dset.startswith(('U_', 'V_')):
dset_ws = base_dset.replace('U_', 'windspeed_')
dset_ws = dset_ws.replace('V_', 'windspeed_')
dset_wd = dset_ws.replace('speed', 'direction')
base_ws = res[dset_ws, :, base_gid]
base_wd = res[dset_wd, :, base_gid]

if base_dset.startswith('U_'):
base_data = -base_ws * np.sin(np.radians(base_wd))
else:
base_data = -base_ws * np.cos(np.radians(base_wd))

else:
base_data = res[base_dset, :, base_gid]

if len(base_data.shape) == 2:
base_data = base_data.mean(axis=1)

base_data, base_cs_ghi = cls._read_base_data(res, base_dset,
base_gid)
if daily_reduction is not None:
slices = [np.where(base_ti.date == date)
for date in sorted(set(base_ti.date))]
base_data = cls._reduce_base_data(base_ti, base_data,
base_cs_ghi, base_dset,
daily_reduction)
base_ti = np.array(sorted(set(base_ti.date)))
if daily_reduction.lower() == 'avg':
base_data = np.array([base_data[s0].mean()
for s0 in slices])
elif daily_reduction.lower() == 'max':
base_data = np.array([base_data[s0].max()
for s0 in slices])
elif daily_reduction.lower() == 'min':
base_data = np.array([base_data[s0].min()
for s0 in slices])

out.append(base_data)
out_ti.append(base_ti)
Expand All @@ -414,6 +390,109 @@ def get_base_data(base_fps, base_dset, base_gid, base_handler,

return out, pd.DatetimeIndex(np.hstack(out_ti))

@staticmethod
def _read_base_data(res, base_dset, base_gid):
"""Read baseline data from the resource handler with extra logic for
special datasets (e.g. u/v wind components or clearsky_ratio)

Parameters
----------
res : rex.Resource
rex Resource handler that is an open file handler of the base
file(s)
base_dset : str
A single dataset from the base_fps to retrieve.
base_gid : int | np.ndarray
One or more spatial gids to retrieve from base_fps. The data will
be spatially averaged across all of these sites.

Returns
-------
base_data : np.ndarray
1D array of base data spatially averaged across the base_gid input
base_cs_ghi : np.ndarray | None
If base_dset == "clearsky_ratio", the base_data array is GHI and
this base_cs_ghi is clearsky GHI. Otherwise this is None
"""

base_cs_ghi = None

if base_dset.startswith(('U_', 'V_')):
dset_ws = base_dset.replace('U_', 'windspeed_')
dset_ws = dset_ws.replace('V_', 'windspeed_')
dset_wd = dset_ws.replace('speed', 'direction')
base_ws = res[dset_ws, :, base_gid]
base_wd = res[dset_wd, :, base_gid]

if base_dset.startswith('U_'):
base_data = -base_ws * np.sin(np.radians(base_wd))
else:
base_data = -base_ws * np.cos(np.radians(base_wd))

elif base_dset == 'clearsky_ratio':
base_data = res['ghi', :, base_gid]
base_cs_ghi = res['clearsky_ghi', :, base_gid]

else:
base_data = res[base_dset, :, base_gid]

if len(base_data.shape) == 2:
base_data = base_data.mean(axis=1)
if base_cs_ghi is not None:
base_cs_ghi = base_cs_ghi.mean(axis=1)

return base_data, base_cs_ghi

@staticmethod
def _reduce_base_data(base_ti, base_data, base_cs_ghi, base_dset,
daily_reduction):
"""Reduce the base timeseries data using some sort of daily reduction
function.

Parameters
----------
base_ti : pd.DatetimeIndex
Time index associated with base_data
base_data : np.ndarray
1D array of base data spatially averaged across the base_gid input
base_cs_ghi : np.ndarray | None
If base_dset == "clearsky_ratio", the base_data array is GHI and
this base_cs_ghi is clearsky GHI. Otherwise this is None
base_dset : str
A single dataset from the base_fps to retrieve.
daily_reduction : str
Option to do a reduction of the hourly+ source base data to daily
data. Can be None (no reduction, keep source time frequency), "avg"
(daily average), "max" (daily max), or "min" (daily min)

Returns
-------
base_data : np.ndarray
1D array of base data spatially averaged across the base_gid input
and possibly daily-averaged or min/max'd as well.
"""

if daily_reduction is None:
return base_data

slices = [np.where(base_ti.date == date)
for date in sorted(set(base_ti.date))]

if base_dset == 'clearsky_ratio' and daily_reduction.lower() == 'avg':
base_data = np.array([base_data[s0].sum() / base_cs_ghi[s0].sum()
for s0 in slices])

elif daily_reduction.lower() == 'avg':
base_data = np.array([base_data[s0].mean() for s0 in slices])

elif daily_reduction.lower() == 'max':
base_data = np.array([base_data[s0].max() for s0 in slices])

elif daily_reduction.lower() == 'min':
base_data = np.array([base_data[s0].min() for s0 in slices])

return base_data


class LinearCorrection(DataRetrievalBase):
"""Calculate linear correction *scalar +adder factors to bias correct data
Expand Down
13 changes: 10 additions & 3 deletions sup3r/preprocessing/data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2371,9 +2371,11 @@ def get_clearsky_ghi(self):
ti_deltas_hours = ti_deltas.total_seconds()[1:-1] / 3600
time_freq = float(mode(ti_deltas_hours).mode[0])
t_start = self.temporal_slice.start or 0
t_end = self.temporal_slice.stop or len(self.raw_time_index)
t_slice = slice(int(t_start * 24 * (1 / time_freq)),
int(t_end * 24 * (1 / time_freq)))
t_end_target = self.temporal_slice.stop or len(self.raw_time_index)
t_start = int(t_start * 24 * (1 / time_freq))
t_end = int(t_end_target * 24 * (1 / time_freq))
t_end = np.minimum(t_end, len(ti_nsrdb))
t_slice = slice(t_start, t_end)

# pylint: disable=E1136
lat = self.lat_lon[:, :, 0].flatten()
Expand Down Expand Up @@ -2414,6 +2416,11 @@ def get_clearsky_ghi(self):
self._nsrdb_smoothing,
mode='nearest')

if cs_ghi.shape[-1] < t_end_target:
n = int(np.ceil(t_end_target / cs_ghi.shape[-1]))
cs_ghi = np.repeat(cs_ghi, n, axis=2)
cs_ghi = cs_ghi[..., :t_end_target]

logger.info('Reshaped clearsky_ghi data to final shape {} to '
'correspond with CC daily average data over source '
'temporal_slice {} with (lat, lon) grid shape of {}'
Expand Down
22 changes: 22 additions & 0 deletions tests/test_bias_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,28 @@ def test_montly_linear_transform():
assert np.allclose(truth, out[..., i])


def test_clearsky_ratio():
"""Test that bias correction of daily clearsky ratio instead of raw ghi
works."""
bias_handler_kwargs = {'nsrdb_source_fp': FP_NSRDB, 'nsrdb_agg': 4,
'temporal_slice': [0, 30, 1]}
calc = LinearCorrection(FP_NSRDB, FP_CC,
'clearsky_ratio', 'clearsky_ratio',
TARGET, SHAPE,
bias_handler_kwargs=bias_handler_kwargs,
bias_handler='DataHandlerNCforCC')
out = calc.run(knn=1, threshold=100, fill_extend=False, max_workers=1)

assert not np.isnan(out['clearsky_ratio_scalar']).any()
assert not np.isnan(out['clearsky_ratio_adder']).any()

assert (out['base_clearsky_ratio_mean'] > 0.3).all()
assert (out['base_clearsky_ratio_mean'] < 1.0).all()

assert (out['bias_clearsky_ratio_mean'] > 0.3).all()
assert (out['bias_clearsky_ratio_mean'] < 1.0).all()


def test_fwp_integration():
"""Test the integration of the bias correction method into the forward pass
framework"""
Expand Down