Skip to content

Commit

Permalink
added zero rate patch for precip data skill assessment
Browse files Browse the repository at this point in the history
  • Loading branch information
grantbuster committed Mar 28, 2024
1 parent 986a1fe commit 6f776eb
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 9 deletions.
79 changes: 72 additions & 7 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def __init__(self,
bias_handler='DataHandlerNCforCC',
base_handler_kwargs=None,
bias_handler_kwargs=None,
decimals=None):
decimals=None,
match_zero_rate=False):
"""
Parameters
----------
Expand Down Expand Up @@ -94,6 +95,13 @@ class to be retrieved from the rex/sup3r library. If a
decimals, this gets passed to np.around(). If decimals
is negative, it specifies the number of positions to
the left of the decimal point.
match_zero_rate : bool
Option to fix the frequency of zero values in the biased data. The
lowest percentile of values in the biased data will be set to zero
to match the percentile of zeros in the base data. This helps
resolve the issue where global climate models produce too many days
with small precipitation totals e.g., the "drizzle problem".
Ref: Polade et al., 2014 https://doi.org/10.1038/srep04364
"""

logger.info('Initializing DataRetrievalBase for base dset "{}" '
Expand All @@ -110,6 +118,7 @@ class to be retrieved from the rex/sup3r library. If a
self.bias_handler_kwargs = bias_handler_kwargs or {}
self.bad_bias_gids = []
self._distance_upper_bound = distance_upper_bound
self.match_zero_rate = match_zero_rate

if isinstance(self.base_fps, str):
self.base_fps = sorted(glob(self.base_fps))
Expand Down Expand Up @@ -367,6 +376,7 @@ def get_bias_data(self, bias_gid):
bias_data : np.ndarray
1D array of temporal data at the requested gid.
"""

idx = np.where(self.bias_gid_raster == bias_gid)
if self.bias_dh.data is None:
self.bias_dh.load_cached_data()
Expand Down Expand Up @@ -486,6 +496,46 @@ def get_base_data(cls,

return out_data, out_ti

@staticmethod
def _match_zero_rate(bias_data, base_data):
"""The lowest percentile of values in the biased data will be set to
zero to match the percentile of zeros in the base data. This helps
resolve the issue where global climate models produce too many days
with small precipitation totals e.g., the "drizzle problem".
Ref: Polade et al., 2014 https://doi.org/10.1038/srep04364
Parameters
----------
bias_data : np.ndarray
1D array of biased data observations.
base_data : np.ndarray
1D array of base data observations.
Returns
-------
bias_data : np.ndarray
1D array of biased data observations. Values below the quantile
associated with zeros in base_data will be set to zero
"""

q_zero_base_in = (base_data == 0).mean()
q_zero_bias_in = (bias_data == 0).mean()

q_bias = np.linspace(0, 1, len(bias_data))
min_value_bias = np.interp(q_zero_base_in, q_bias, sorted(bias_data))

bias_data[bias_data < min_value_bias] = 0

q_zero_base_out = (base_data == 0).mean()
q_zero_bias_out = (bias_data == 0).mean()

logger.debug('Input bias/base zero rate is {:.3e}/{:.3e}, '
'output is {:.3e}/{:.3e}'
.format(q_zero_bias_in, q_zero_base_in,
q_zero_bias_out, q_zero_base_out))

return bias_data

@staticmethod
def _read_base_sup3r_data(dh, base_dset, base_gid):
"""Read baseline data from a sup3r DataHandler
Expand Down Expand Up @@ -727,7 +777,8 @@ def _run_single(cls,
daily_reduction,
bias_ti,
decimals,
base_dh_inst=None):
base_dh_inst=None,
match_zero_rate=False):
"""Find the nominal scalar + adder combination to bias correct data
at a single site"""

Expand All @@ -739,6 +790,9 @@ def _run_single(cls,
decimals=decimals,
base_dh_inst=base_dh_inst)

if match_zero_rate:
cls._match_zero_rate(bias_data, base_data)

out = cls.get_linear_correction(bias_data, base_data, bias_feature,
base_dset)
return out
Expand Down Expand Up @@ -931,6 +985,7 @@ def run(self,
self.bias_ti,
self.decimals,
base_dh_inst=self.base_dh,
match_zero_rate=self.match_zero_rate,
)
for key, arr in single_out.items():
self.out[key][raster_loc] = arr
Expand Down Expand Up @@ -963,6 +1018,7 @@ def run(self,
daily_reduction,
self.bias_ti,
self.decimals,
match_zero_rate=self.match_zero_rate,
)
futures[future] = raster_loc

Expand Down Expand Up @@ -1006,7 +1062,8 @@ def _run_single(cls,
daily_reduction,
bias_ti,
decimals,
base_dh_inst=None):
base_dh_inst=None,
match_zero_rate=False):
"""Find the nominal scalar + adder combination to bias correct data
at a single site"""

Expand All @@ -1018,6 +1075,9 @@ def _run_single(cls,
decimals=decimals,
base_dh_inst=base_dh_inst)

if match_zero_rate:
cls._match_zero_rate(bias_data, base_data)

base_arr = np.full(cls.NT, np.nan, dtype=np.float32)
out = {}

Expand Down Expand Up @@ -1117,10 +1177,12 @@ def _init_out(self):
f'bias_{self.bias_feature}_std',
f'bias_{self.bias_feature}_skew',
f'bias_{self.bias_feature}_kurtosis',
f'bias_{self.bias_feature}_zero_rate',
f'base_{self.base_dset}_mean',
f'base_{self.base_dset}_std',
f'base_{self.base_dset}_skew',
f'base_{self.base_dset}_kurtosis',
f'base_{self.base_dset}_zero_rate',
]

self.out = {k: np.full((*self.bias_gid_raster.shape, self.NT),
Expand Down Expand Up @@ -1154,11 +1216,13 @@ def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
out[f'bias_{bias_feature}_std'] = np.nanstd(bias_data)
out[f'bias_{bias_feature}_skew'] = stats.skew(bias_data)
out[f'bias_{bias_feature}_kurtosis'] = stats.kurtosis(bias_data)
out[f'bias_{bias_feature}_zero_rate'] = (bias_data == 0).mean()

out[f'base_{base_dset}_mean'] = base_mean
out[f'base_{base_dset}_std'] = np.nanstd(base_data)
out[f'base_{base_dset}_skew'] = stats.skew(base_data)
out[f'base_{base_dset}_kurtosis'] = stats.kurtosis(base_data)
out[f'base_{base_dset}_zero_rate'] = (base_data == 0).mean()

ks_out = stats.ks_2samp(base_data - base_mean, bias_data - bias_mean)
out[f'{bias_feature}_ks_stat'] = ks_out.statistic
Expand All @@ -1175,7 +1239,7 @@ def _run_skill_eval(cls, bias_data, base_data, bias_feature, base_dset):
@classmethod
def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
base_gid, base_handler, daily_reduction, bias_ti,
decimals, base_dh_inst=None):
decimals, base_dh_inst=None, match_zero_rate=False):
"""Do a skill assessment at a single site"""

base_data, base_ti = cls.get_base_data(base_fps, base_dset,
Expand All @@ -1184,13 +1248,14 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
decimals=decimals,
base_dh_inst=base_dh_inst)

if match_zero_rate:
cls._match_zero_rate(bias_data, base_data)

arr = np.full(cls.NT, np.nan, dtype=np.float32)
out = {f'bias_{bias_feature}_mean_monthly': arr.copy(),
f'bias_{bias_feature}_std_monthly': arr.copy(),
f'base_{base_dset}_mean_monthly': arr.copy(),
f'base_{base_dset}_std_monthly': arr.copy(),
f'{bias_feature}_scalar': arr.copy(),
f'{bias_feature}_adder': arr.copy(),
}

out.update(cls._run_skill_eval(bias_data, base_data,
Expand All @@ -1208,6 +1273,6 @@ def _run_single(cls, bias_data, base_fps, bias_feature, base_dset,
for k, v in mout.items():
if not k.endswith(('_scalar', '_adder')):
k += '_monthly'
out[k][month - 1] = v
out[k][month - 1] = v

return out
22 changes: 20 additions & 2 deletions tests/bias/test_bias_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,25 @@ def test_nc_base_file():

out = calc.run(fill_extend=True, max_workers=1)

assert (out['rsds_scalar'] == 1).all()
assert (out['rsds_adder'] == 0).all()
assert np.allclose(out['base_rsds_mean_monthly'],
out['bias_rsds_mean_monthly'])
assert np.allclose(out['base_rsds_mean'], out['bias_rsds_mean'])
assert np.allclose(out['base_rsds_std'], out['bias_rsds_std'])


def test_match_zero_rate():
"""Test feature to match the rate of zeros in the bias data based on the
zero rate in the base data. Useful for precip where GCMs have a low-precip
"drizzle" problem."""
bias_data = np.random.uniform(0, 1, 1000)
base_data = np.random.uniform(0, 1, 1000)
base_data[base_data < 0.1] = 0

skill = SkillAssessment._run_skill_eval(bias_data, base_data, 'f1', 'f1')
assert skill['bias_f1_zero_rate'] != skill['base_f1_zero_rate']
assert (bias_data == 0).mean() != (base_data == 0).mean()

bias_data = SkillAssessment._match_zero_rate(bias_data, base_data)
skill = SkillAssessment._run_skill_eval(bias_data, base_data, 'f1', 'f1')
assert (bias_data == 0).mean() == (base_data == 0).mean()
assert skill['bias_f1_zero_rate'] == skill['base_f1_zero_rate']

0 comments on commit 6f776eb

Please sign in to comment.