Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gb/bc #96

Merged
merged 29 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
efcf50e
added bias module with a place for bias transformation functions and …
grantbuster Sep 20, 2022
115e9c2
feature specific bias correction
grantbuster Sep 20, 2022
c184917
removed warnings about excessive padding - not a bad thing
grantbuster Sep 20, 2022
1d823e3
added a site-by-site linear bias correction calculation method
grantbuster Sep 21, 2022
8b7f25b
bug fix and logging
grantbuster Sep 21, 2022
56148db
bias calc mods and new functions
grantbuster Sep 22, 2022
50c3692
added bias calc cli
grantbuster Sep 22, 2022
2f7d4f7
added bias calc to main cli
grantbuster Sep 22, 2022
c132b81
make bias out dir
grantbuster Sep 22, 2022
915edd5
bug fixes and minor refactor to run on eagle
grantbuster Sep 22, 2022
7b28943
added local linear bias correct to forward pass bc options
grantbuster Sep 22, 2022
0cee671
added option to smooth spatial bias correction factors outside of the…
grantbuster Sep 23, 2022
aa0a040
better enumerated progress logging for fwp
grantbuster Sep 23, 2022
240a0d6
added bias correction option to QA
grantbuster Sep 23, 2022
9ac0905
minor refactor to bias correct u and v instead of windspeed and direc…
grantbuster Sep 23, 2022
d2fb1e2
fixed up the u/v QA with bias correction
grantbuster Sep 27, 2022
01fbeda
added meta data to bc h5 output attrs
grantbuster Sep 27, 2022
b370b9f
more bc convenience functions
grantbuster Sep 28, 2022
24c40b6
added monthly bias correction
grantbuster Sep 28, 2022
0deab95
added montly bias correction data transformation method and integrate…
grantbuster Sep 29, 2022
1f28ccc
fixed collection logic for undefined mask meta variable when file is …
grantbuster Oct 2, 2022
5d93283
added bias correction calc tests
grantbuster Oct 3, 2022
0638d7b
added bias transform calcs
grantbuster Oct 3, 2022
6cc6ced
added fwp+bc integration test
grantbuster Oct 3, 2022
ca24793
added qa+bc integration test
grantbuster Oct 3, 2022
b75b3fc
added version record to bias calc output files and incremented versio…
grantbuster Oct 4, 2022
b0a2c49
simplify qa test and pylint issue
grantbuster Oct 4, 2022
7b9c88f
fixed test on h5 meta attrs dtype and docstrings
grantbuster Oct 4, 2022
2ea15e3
serial data handling for QA+BC bug
grantbuster Oct 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
added bias correction calc tests
  • Loading branch information
grantbuster committed Oct 4, 2022
commit 5d932838a40f6fa7d879ecbe8bacdcfbb9e6577b
56 changes: 32 additions & 24 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,8 @@ def get_linear_correction(bias_data, base_data):
Factor to adjust the biased data before comparing distributions:
bias_data * scalar + adder
"""
scalar = (base_data.std() / bias_data.std())
adder = (base_data.mean() - bias_data.mean() * scalar)
scalar = base_data.std() / bias_data.std()
adder = base_data.mean() - bias_data.mean() * scalar
return scalar, adder

@classmethod
Expand Down Expand Up @@ -439,10 +439,10 @@ def write_outputs(self, fp_out, scalar, adder):
shape (lat, lon, time)
"""

if not os.path.exists(os.path.dirname(fp_out)):
os.makedirs(os.path.dirname(fp_out), exist_ok=True)

if fp_out is not None:
if not os.path.exists(os.path.dirname(fp_out)):
os.makedirs(os.path.dirname(fp_out), exist_ok=True)

with h5py.File(fp_out, 'w') as f:
# pylint: disable=E1136
lat = self.bias_dh.lat_lon[..., 0]
Expand All @@ -459,7 +459,7 @@ def write_outputs(self, fp_out, scalar, adder):
.format(fp_out))

def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
daily_avg=True, smoothing=0):
daily_avg=True, fill_extend=True, smooth_extend=0):
"""Run linear correction factor calculations for every site in the bias
dataset

Expand All @@ -472,18 +472,22 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
If the bias data coordinate is on average further from the base
data coordinates than this threshold, no bias correction factors
will be calculated directly and will just be filled from nearest
neighbor.
neighbor (if fill_extend=True, else it will be nan).
fp_out : str | None
Optional .h5 output file to write scalar and adder arrays.
max_workers : int
Number of workers to run in parallel. 1 is serial and None is all
available.
daily_avg : bool
Flag to do temporal daily averaging of the base data.
smoothing : float
fill_extend : bool
Flag to fill data past threshold using spatial nearest neighbor. If
False, the extended domain will be left as NaN.
smooth_extend : float
Option to smooth the scalar/adder data outside of the spatial
domain set by the threshold input. This alleviates the weird seams
far from the domain of interest.
far from the domain of interest. This value is the standard
deviation for the gaussian_filter kernel

Returns
-------
Expand Down Expand Up @@ -533,7 +537,7 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,
coord = bias_row[['latitude', 'longitude']]
dist, base_gid = self.base_tree.query(coord, k=knn)

if dist.mean() < threshold:
if np.mean(dist) < threshold:
bias_data = self.get_bias_data(bias_gid)

future = exe.submit(self._run_single, bias_data,
Expand All @@ -557,16 +561,19 @@ def run(self, knn, threshold=0.6, fp_out=None, max_workers=None,

nan_mask = np.isnan(scalar[..., 0])

for idt in range(self.NT):
scalar[..., idt] = nn_fill_array(scalar[..., idt])
adder[..., idt] = nn_fill_array(adder[..., idt])
if smoothing > 0:
scalar_smooth = gaussian_filter(scalar[..., idt], smoothing,
mode='nearest')
adder_smooth = gaussian_filter(adder[..., idt], smoothing,
mode='nearest')
scalar[nan_mask, idt] = scalar_smooth[nan_mask]
adder[nan_mask, idt] = adder_smooth[nan_mask]
if fill_extend:
for idt in range(self.NT):
scalar[..., idt] = nn_fill_array(scalar[..., idt])
adder[..., idt] = nn_fill_array(adder[..., idt])
if smooth_extend > 0:
scalar_smooth = gaussian_filter(scalar[..., idt],
smooth_extend,
mode='nearest')
adder_smooth = gaussian_filter(adder[..., idt],
smooth_extend,
mode='nearest')
scalar[nan_mask, idt] = scalar_smooth[nan_mask]
adder[nan_mask, idt] = adder_smooth[nan_mask]

self.write_outputs(fp_out, scalar, adder)

Expand Down Expand Up @@ -599,10 +606,11 @@ def _run_single(cls, bias_data, base_fps, base_dset, base_gid,
bias_mask = bias_ti.month == month
base_mask = base_ti.month == month

ms, ma = cls.get_linear_correction(bias_data[bias_mask],
base_data[base_mask])
if any(bias_mask) and any(base_mask):
ms, ma = cls.get_linear_correction(bias_data[bias_mask],
base_data[base_mask])

scalar[month - 1] = ms
adder[month - 1] = ma
scalar[month - 1] = ms
adder[month - 1] = ma

return scalar, adder
127 changes: 127 additions & 0 deletions tests/test_bias_calc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
"""pytests bias correction calculations"""
import os
import numpy as np
import xarray as xr

from sup3r import TEST_DATA_DIR
from sup3r.bias.bias_calc import LinearCorrection, MonthlyLinearCorrection


FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5')
FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc')


def test_linear_bc():
"""Test linear bias correction"""
with xr.open_mfdataset(FP_CC) as fh:
min_lat = np.min(fh.lat.values)
min_lon = np.min(fh.lon.values) - 360
target = (min_lat, min_lon)
shape = (len(fh.lat.values), len(fh.lon.values))

calc = LinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
target, shape, bias_handler='DataHandlerNCforCC')

# test a known in-bounds gid
bias_gid = 5
dist, base_gid = calc.get_base_gid(bias_gid, 1)
bias_data = calc.get_bias_data(bias_gid)
base_data, base_ti = calc.get_base_data(calc.base_fps, calc.base_dset,
base_gid, calc.base_handler,
daily_avg=True)
bias_coord = calc.bias_meta.loc[bias_gid, ['latitude', 'longitude']]
base_coord = calc.base_meta.loc[base_gid, ['latitude', 'longitude']]
true_dist = bias_coord.values - base_coord.values
true_dist = np.hypot(true_dist[0], true_dist[1])
assert np.allclose(true_dist, dist)
assert true_dist < 0.1
true_scalar = base_data.std() / bias_data.std()
true_adder = base_data.mean() - bias_data.mean() * true_scalar

scalar, adder = calc.run(knn=1, threshold=0.6, fill_extend=False,
max_workers=1)

assert len(scalar.shape) == 3
assert len(adder.shape) == 3
assert scalar.shape[-1] == 1
assert adder.shape[-1] == 1

iloc = np.where(calc.bias_gid_raster == bias_gid)
assert np.allclose(true_scalar, scalar[iloc])
assert np.allclose(true_adder, adder[iloc])

corners = ((0, 0, 0), (-1, 0, 0), (0, -1, 0), (-1, -1, 0))
for corner in corners:
assert np.isnan(scalar[corner])
assert np.isnan(adder[corner])
nan_mask = np.isnan(scalar)
assert np.isnan(adder[nan_mask]).all()

# make sure the NN fill works for out-of-bounds pixels
scalar, adder = calc.run(knn=1, threshold=0.6, fill_extend=True,
max_workers=1)

iloc = np.where(calc.bias_gid_raster == bias_gid)
assert np.allclose(true_scalar, scalar[iloc])
assert np.allclose(true_adder, adder[iloc])

assert not np.isnan(scalar[nan_mask]).any()
assert not np.isnan(adder[nan_mask]).any()

# make sure smoothing affects the out-of-bounds pixels but not the in-bound
smooth_scalar, smooth_adder = calc.run(knn=1, threshold=0.6,
fill_extend=True, smooth_extend=2,
max_workers=1)
assert np.allclose(smooth_scalar[~nan_mask], scalar[~nan_mask])
assert np.allclose(smooth_adder[~nan_mask], adder[~nan_mask])
assert not np.allclose(smooth_scalar[nan_mask], scalar[nan_mask])
assert not np.allclose(smooth_adder[nan_mask], adder[nan_mask])


def test_monthly_linear_bc():
"""Test linear bias correction on a month-by-month basis"""
with xr.open_mfdataset(FP_CC) as fh:
min_lat = np.min(fh.lat.values)
min_lon = np.min(fh.lon.values) - 360
target = (min_lat, min_lon)
shape = (len(fh.lat.values), len(fh.lon.values))

calc = MonthlyLinearCorrection(FP_NSRDB, FP_CC, 'ghi', 'rsds',
target, shape,
bias_handler='DataHandlerNCforCC')

# test a known in-bounds gid
bias_gid = 5
dist, base_gid = calc.get_base_gid(bias_gid, 1)
bias_data = calc.get_bias_data(bias_gid)
base_data, base_ti = calc.get_base_data(calc.base_fps, calc.base_dset,
base_gid, calc.base_handler,
daily_avg=True)
bias_coord = calc.bias_meta.loc[bias_gid, ['latitude', 'longitude']]
base_coord = calc.base_meta.loc[base_gid, ['latitude', 'longitude']]
true_dist = bias_coord.values - base_coord.values
true_dist = np.hypot(true_dist[0], true_dist[1])
assert np.allclose(true_dist, dist)
assert true_dist < 0.1
base_data = base_data[:31] # just take Jan for testing
bias_data = bias_data[:31] # just take Jan for testing
true_scalar = base_data.std() / bias_data.std()
true_adder = base_data.mean() - bias_data.mean() * true_scalar

scalar, adder = calc.run(knn=1, threshold=0.6, fill_extend=True,
max_workers=1)

assert len(scalar.shape) == 3
assert len(adder.shape) == 3
assert scalar.shape[-1] == 12
assert adder.shape[-1] == 12

iloc = np.where(calc.bias_gid_raster == bias_gid)
iloc += (0, )
assert np.allclose(true_scalar, scalar[iloc])
assert np.allclose(true_adder, adder[iloc])

last_mon = base_ti.month[-1]
assert np.isnan(scalar[..., last_mon:]).all()
assert np.isnan(adder[..., last_mon:]).all()