added bias correction calc tests

NREL · grantbuster · Oct 4, 2022 · Sep 20, 2022 · Sep 20, 2022 · Sep 20, 2022
commit 5d932838a40f6fa7d879ecbe8bacdcfbb9e6577b
diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py
@@ -406,8 +406,8 @@ def get_linear_correction(bias_data, base_data):
  Factor to adjust the biased data before comparing distributions:
  bias_data * scalar + adder
  """
- scalar = (base_data.std() / bias_data.std())
- adder = (base_data.mean() - bias_data.mean() * scalar)
+ scalar = base_data.std() / bias_data.std()
+ adder = base_data.mean() - bias_data.mean() * scalar
  return scalar, adder
 
  @classmethod
@@ -439,10 +439,10 @@ def write_outputs(self, fp_out, scalar, adder):
  shape (lat, lon, time)
  """
 
- if not os.path.exists(os.path.dirname(fp_out)):
- os.makedirs(os.path.dirname(fp_out), exist_ok=True)
-
  if fp_out is not None:
+ if not os.path.exists(os.path.dirname(fp_out)):
+ os.makedirs(os.path.dirname(fp_out), exist_ok=True)
+
  with h5py.File(fp_out, 'w') as f:
  # pylint: disable=E1136
  lat = self.bias_dh.lat_lon[..., 0]
@@ -459,7 +459,7 @@ def write_outputs(self, fp_out, scalar, adder):
  .format(fp_out))
 
  def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
- daily_avg=True, smoothing=0):
+ daily_avg=True, fill_extend=True, smooth_extend=0):
  """Run linear correction factor calculations for every site in the bias
  dataset
 
@@ -472,18 +472,22 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
  If the bias data coordinate is on average further from the base
  data coordinates than this threshold, no bias correction factors
  will be calculated directly and will just be filled from nearest
- neighbor.
+ neighbor (if fill_extend=True, else it will be nan).
  fp_out : str | None
  Optional .h5 output file to write scalar and adder arrays.
  max_workers : int
  Number of workers to run in parallel. 1 is serial and None is all
  available.
  daily_avg : bool
  Flag to do temporal daily averaging of the base data.
- smoothing : float
+ fill_extend : bool
+ Flag to fill data past threshold using spatial nearest neighbor. If
+ False, the extended domain will be left as NaN.
+ smooth_extend : float
  Option to smooth the scalar/adder data outside of the spatial
  domain set by the threshold input. This alleviates the weird seams
- far from the domain of interest.
+ far from the domain of interest. This value is the standard
+ deviation for the gaussian_filter kernel
 
  Returns
  -------
@@ -533,7 +537,7 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
  coord = bias_row[['latitude', 'longitude']]
  dist, base_gid = self.base_tree.query(coord, k=knn)
 
- if dist.mean() < threshold:
+ if np.mean(dist) < threshold:
  bias_data = self.get_bias_data(bias_gid)
 
  future = exe.submit(self._run_single, bias_data,
@@ -557,16 +561,19 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
 
  nan_mask = np.isnan(scalar[..., 0])
 
- for idt in range(self.NT):
- scalar[..., idt] = nn_fill_array(scalar[..., idt])
- adder[..., idt] = nn_fill_array(adder[..., idt])
- if smoothing > 0:
- scalar_smooth = gaussian_filter(scalar[..., idt], smoothing,
- mode='nearest')
- adder_smooth = gaussian_filter(adder[..., idt], smoothing,
- mode='nearest')
- scalar[nan_mask, idt] = scalar_smooth[nan_mask]
- adder[nan_mask, idt] = adder_smooth[nan_mask]
+ if fill_extend:
+ for idt in range(self.NT):
+ scalar[..., idt] = nn_fill_array(scalar[..., idt])
+ adder[..., idt] = nn_fill_array(adder[..., idt])
+ if smooth_extend > 0:
+ scalar_smooth = gaussian_filter(scalar[..., idt],
+ smooth_extend,
+ mode='nearest')
+ adder_smooth = gaussian_filter(adder[..., idt],
+ smooth_extend,
+ mode='nearest')
+ scalar[nan_mask, idt] = scalar_smooth[nan_mask]
+ adder[nan_mask, idt] = adder_smooth[nan_mask]
 
  self.write_outputs(fp_out, scalar, adder)
 
@@ -599,10 +606,11 @@ def _run_single(cls, bias_data, base_fps, base_dset, base_gid,
  bias_mask = bias_ti.month == month
  base_mask = base_ti.month == month
 
- ms, ma = cls.get_linear_correction(bias_data[bias_mask],
- base_data[base_mask])
+ if any(bias_mask) and any(base_mask):
+ ms, ma = cls.get_linear_correction(bias_data[bias_mask],
+ base_data[base_mask])
 
- scalar[month - 1] = ms
- adder[month - 1] = ma
+  scalar[month - 1] = ms
+  adder[month - 1] = ma
 
  return scalar, adder
diff --git a/tests/test_bias_calc.py b/tests/test_bias_calc.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+"""pytests bias correction calculations"""
+import os
+import numpy as np
+import xarray as xr
+
+from sup3r import TEST_DATA_DIR
+from sup3r.bias.bias_calc import LinearCorrection, MonthlyLinearCorrection
+
+
+FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5')
+FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc')
+
+
+def test_linear_bc():
+ """Test linear bias correction"""
+ with xr.open_mfdataset(FP_CC) as fh:
+ min_lat = np.min(fh.lat.values)
+ min_lon = np.min(fh.lon.values) - 360
+ target = (min_lat, min_lon)
+ shape = (len(fh.lat.values), len(fh.lon.values))
+
+ calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
+ target, shape, bias_handler='DataHandlerNCforCC')
+
+ # test a known in-bounds gid
+ bias_gid = 5
+ dist, base_gid = calc.get_base_gid(bias_gid, 1)
+ bias_data = calc.get_bias_data(bias_gid)
+ base_data, base_ti = calc.get_base_data(calc.base_fps, calc.base_dset,
+ base_gid, calc.base_handler,
+ daily_avg=True)
+ bias_coord = calc.bias_meta.loc[bias_gid, ['latitude', 'longitude']]
+ base_coord = calc.base_meta.loc[base_gid, ['latitude', 'longitude']]
+ true_dist = bias_coord.values - base_coord.values
+ true_dist = np.hypot(true_dist[0], true_dist[1])
+ assert np.allclose(true_dist, dist)
+ assert true_dist < 0.1
+ true_scalar = base_data.std() / bias_data.std()
+ true_adder = base_data.mean() - bias_data.mean() * true_scalar
+
+ scalar, adder = calc.run(knn=1, threshold=0.6, fill_extend=False,
+ max_workers=1)
+
+ assert len(scalar.shape) == 3
+ assert len(adder.shape) == 3
+ assert scalar.shape[-1] == 1
+ assert adder.shape[-1] == 1
+
+ iloc = np.where(calc.bias_gid_raster == bias_gid)
+ assert np.allclose(true_scalar, scalar[iloc])
+ assert np.allclose(true_adder, adder[iloc])
+
+ corners = ((0, 0, 0), (-1, 0, 0), (0, -1, 0), (-1, -1, 0))
+ for corner in corners:
+ assert np.isnan(scalar[corner])
+ assert np.isnan(adder[corner])
+ nan_mask = np.isnan(scalar)
+ assert np.isnan(adder[nan_mask]).all()
+
+ # make sure the NN fill works for out-of-bounds pixels
+ scalar, adder = calc.run(knn=1, threshold=0.6, fill_extend=True,
+ max_workers=1)
+
+ iloc = np.where(calc.bias_gid_raster == bias_gid)
+ assert np.allclose(true_scalar, scalar[iloc])
+ assert np.allclose(true_adder, adder[iloc])
+
+ assert not np.isnan(scalar[nan_mask]).any()
+ assert not np.isnan(adder[nan_mask]).any()
+
+ # make sure smoothing affects the out-of-bounds pixels but not the in-bound
+ smooth_scalar, smooth_adder = calc.run(knn=1, threshold=0.6,
+ fill_extend=True, smooth_extend=2,
+ max_workers=1)
+ assert np.allclose(smooth_scalar[~nan_mask], scalar[~nan_mask])
+ assert np.allclose(smooth_adder[~nan_mask], adder[~nan_mask])
+ assert not np.allclose(smooth_scalar[nan_mask], scalar[nan_mask])
+ assert not np.allclose(smooth_adder[nan_mask], adder[nan_mask])
+
+
+def test_monthly_linear_bc():
+ """Test linear bias correction on a month-by-month basis"""
+ with xr.open_mfdataset(FP_CC) as fh:
+ min_lat = np.min(fh.lat.values)
+ min_lon = np.min(fh.lon.values) - 360
+ target = (min_lat, min_lon)
+ shape = (len(fh.lat.values), len(fh.lon.values))
+
+ calc = MonthlyLinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
+ target, shape,
+ bias_handler='DataHandlerNCforCC')
+
+ # test a known in-bounds gid
+ bias_gid = 5
+ dist, base_gid = calc.get_base_gid(bias_gid, 1)
+ bias_data = calc.get_bias_data(bias_gid)
+ base_data, base_ti = calc.get_base_data(calc.base_fps, calc.base_dset,
+ base_gid, calc.base_handler,
+ daily_avg=True)
+ bias_coord = calc.bias_meta.loc[bias_gid, ['latitude', 'longitude']]
+ base_coord = calc.base_meta.loc[base_gid, ['latitude', 'longitude']]
+ true_dist = bias_coord.values - base_coord.values
+ true_dist = np.hypot(true_dist[0], true_dist[1])
+ assert np.allclose(true_dist, dist)
+ assert true_dist < 0.1
+ base_data = base_data[:31] # just take Jan for testing
+ bias_data = bias_data[:31] # just take Jan for testing
+ true_scalar = base_data.std() / bias_data.std()
+ true_adder = base_data.mean() - bias_data.mean() * true_scalar
+
+ scalar, adder = calc.run(knn=1, threshold=0.6, fill_extend=True,
+ max_workers=1)
+
+ assert len(scalar.shape) == 3
+ assert len(adder.shape) == 3
+ assert scalar.shape[-1] == 12
+ assert adder.shape[-1] == 12
+
+ iloc = np.where(calc.bias_gid_raster == bias_gid)
+ iloc += (0, )
+ assert np.allclose(true_scalar, scalar[iloc])
+ assert np.allclose(true_adder, adder[iloc])
+
+ last_mon = base_ti.month[-1]
+ assert np.isnan(scalar[..., last_mon:]).all()
+ assert np.isnan(adder[..., last_mon:]).all()