added local linear bias correct to forward pass bc options

NREL · grantbuster · Oct 4, 2022 · Sep 20, 2022 · Sep 20, 2022 · Sep 20, 2022
commit 7b28943dd9338df42445e5c72dc8ec7f775b708b
diff --git a/sup3r/bias/__init__.py b/sup3r/bias/__init__.py
@@ -1,3 +1,3 @@
 # -*- coding: utf-8 -*-
 """Bias calculation and correction modules."""
-from .bias_transforms import bc_scalar_adder
+from .bias_transforms import global_linear_bc, local_linear_bc
diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py
@@ -183,8 +183,8 @@ def get_node_cmd(cls, config):
  "t_elap = time.time() - t0;\n")
 
  job_name = config.get('job_name', None)
- if job_name is not None:
-  status_dir = config.get('status_dir', None)
+ status_dir = config.get('status_dir', None)
+ if job_name is not None and status_dir is not None:
  status_file_arg_str = f'"{status_dir}", '
  status_file_arg_str += f'module="{ModuleName.BIAS_CALC}", '
  status_file_arg_str += f'job_name="{job_name}", '

diff --git a/sup3r/bias/bias_calc_cli.py b/sup3r/bias/bias_calc_cli.py
@@ -75,6 +75,7 @@ def from_config(ctx, config_file, verbose):
  jobs = config['jobs']
  for i_node, job in enumerate(jobs):
  node_config = copy.deepcopy(job)
+ node_config['status_dir'] = status_dir
  node_config['log_file'] = (
  log_pattern if log_pattern is None
  else os.path.normpath(log_pattern.format(node_index=i_node)))

diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py
@@ -1,8 +1,10 @@
 # -*- coding: utf-8 -*-
 """Bias correction transformation functions."""
+import numpy as np
+from rex import Resource
 
 
-def bc_scalar_adder(input, scalar, adder):
+def global_linear_bc(input, scalar, adder, out_range=None):
  """Bias correct data using a simple global *scalar +adder method.
 
  Parameters
@@ -13,10 +15,71 @@ def bc_scalar_adder(input, scalar, adder):
  Scalar (multiplicative) value to apply to input data.
  adder : float
  Adder value to apply to input data.
+ out_range : None | tuple
+ Option to set floor/ceiling values on the output data.
 
  Returns
  -------
  out : np.ndarray
  out = input * scalar + adder
  """
- return input * scalar + adder
+ out = input * scalar + adder
+ if out_range is not None:
+ out = np.maximum(out, np.min(out_range))
+ out = np.minimum(out, np.max(out_range))
+ return out
+
+
+def local_linear_bc(input, feature_name, bias_fp, lr_padded_slice,
+ out_range=None):
+ """Bias correct data using a simple global *scalar +adder method.
+
+ Parameters
+ ----------
+ input : np.ndarray
+ Any data to be bias corrected
+ feature_name : str
+ Name of feature that is being corrected. Datasets with names
+ "{feature_name}_scalar" and "{feature_name}_adder" will be retrieved
+ from bias_fp.
+ bias_fp : str
+ Filepath to bias correction file from the bias calc module. Must have
+ datasets "{feature_name}_scalar" and "{feature_name}_adder" that are
+ the full low-resolution shape of the forward pass input that will be
+ sliced using lr_padded_slice for the current chunk.
+ lr_padded_slice : tuple
+ Tuple of length four that slices (spatial_1, spatial_2, temporal,
+ features) where each tuple entry is a slice object for that axes.
+ Note that if this method is called as part of a sup3r forward pass, the
+ lr_padded_slice will be included in the kwargs for the active chunk.
+ out_range : None | tuple
+ Option to set floor/ceiling values on the output data.
+
+ Returns
+ -------
+ out : np.ndarray
+ out = input * scalar + adder
+ """
+
+ scalar = f'{feature_name}_scalar'
+ adder = f'{feature_name}_adder'
+ with Resource(bias_fp) as res:
+ scalar = res[scalar]
+ adder = res[adder]
+
+ spatial_slice = (lr_padded_slice[0], lr_padded_slice[1])
+ scalar = scalar[spatial_slice]
+ adder = adder[spatial_slice]
+
+ scalar = np.expand_dims(scalar, axis=-1)
+ adder = np.expand_dims(adder, axis=-1)
+
+ scalar = np.repeat(scalar, input.shape[-1], axis=-1)
+ adder = np.repeat(adder, input.shape[-1], axis=-1)
+
+ out = input * scalar + adder
+ if out_range is not None:
+ out = np.maximum(out, np.min(out_range))
+ out = np.minimum(out, np.max(out_range))
+
+ return out
diff --git a/sup3r/pipeline/forward_pass.py b/sup3r/pipeline/forward_pass.py
@@ -12,6 +12,7 @@
 import copy
 from datetime import datetime as dt
 import psutil
+from inspect import signature
 
 from rex.utilities.fun_utils import get_fun_call_str
 from rex.utilities.execution import SpawnProcessPool
@@ -1138,17 +1139,18 @@ def __init__(self, strategy, chunk_index=0, node_index=0):
  input_handler_kwargs.update(self.strategy._input_handler_kwargs)
  self.data_handler = self.input_handler_class(**input_handler_kwargs)
  self.data_handler.load_cached_data()
+ self.input_data = self.data_handler.data
+
+ self.input_data = self.bias_correct_source_data(self.input_data)
 
  exo_s_en = self.exo_kwargs.get('s_enhancements', None)
- out = self.pad_source_data(self.data_handler.data,
+ out = self.pad_source_data(self.input_data,
  self.pad_s1_start, self.pad_s1_end,
  self.pad_s2_start, self.pad_s2_end,
  self.pad_t_start, self.pad_t_end,
  self.exogenous_data, exo_s_en)
  self.input_data, self.exogenous_data = out
 
- self.input_data = self.bias_correct_source_data(self.input_data)
-
  @property
  def file_paths(self):
  """Get a list of source filepaths to get data from. This list is
@@ -1168,17 +1170,31 @@ def temporal_pad_slice(self):
 
  return ti_pad_slice
 
+ @property
+ def lr_padded_slice(self):
+ """Get the padded slice argument that can be used to slice the full
+ domain source low res data to return just the extent used for the
+ current chunk.
+
+ Returns
+ -------
+ lr_padded_slice : tuple
+ Tuple of length four that slices (spatial_1, spatial_2, temporal,
+ features) where each tuple entry is a slice object for that axes.
+ """
+ return self.strategy.lr_pad_slices[self.spatial_chunk_index]
+
  @property
  def target(self):
  """Get target for current spatial chunk"""
- lr_slice = self.strategy.lr_pad_slices[self.spatial_chunk_index]
- return self.strategy.lr_lat_lon[lr_slice[0], lr_slice[1]][-1, 0]
+ spatial_slice = self.lr_padded_slice[0], self.lr_padded_slice[1]
+ return self.strategy.lr_lat_lon[spatial_slice][-1, 0]
 
  @property
  def shape(self):
  """Get shape for current spatial chunk"""
- lr_slice = self.strategy.lr_pad_slices[self.spatial_chunk_index]
- return self.strategy.lr_lat_lon[lr_slice[0], lr_slice[1]].shape[:-1]
+ spatial_slice = self.lr_padded_slice[0], self.lr_padded_slice[1]
+ return self.strategy.lr_lat_lon[spatial_slice].shape[:-1]
 
  @property
  def chunks(self):
@@ -1382,11 +1398,16 @@ def bias_correct_source_data(self, data):
  logger.info('Running bias correction with: {}'.format(method))
  for feature, feature_kwargs in kwargs.items():
  idf = self.data_handler.features.index(feature)
- data[..., idf] = method(data[..., idf], **feature_kwargs)
- logger.debug('Bias corrected feature "{}" at axis index {} '
+
+ if 'lr_padded_slice' in signature(method).parameters:
+ feature_kwargs['lr_padded_slice'] = self.lr_padded_slice
+
+ logger.debug('Bias correcting feature "{}" at axis index {} '
  'using function: {} with kwargs: {}'
  .format(feature, idf, method, feature_kwargs))
 
+ data[..., idf] = method(data[..., idf], **feature_kwargs)
+
  return data
 
  def _prep_exogenous_input(self, chunk_shape):
@@ -1623,11 +1644,17 @@ def incremental_check_run(cls, strategy, node_index, chunk_index):
  logger.info('Not running chunk index {}, output file '
  'exists: {}'.format(chunk_index, out_file))
  else:
- fwp = cls(strategy, chunk_index, node_index)
- logger.info(f'Running forward pass for chunk_index={chunk_index}, '
- f'node_index={node_index}, '
- f'file_paths={fwp.file_paths}')
- fwp.run_chunk()
+ try:
+ fwp = cls(strategy, chunk_index, node_index)
+ logger.info(f'Running forward pass for '
+ f'chunk_index={chunk_index}, '
+ f'node_index={node_index}, '
+ f'file_paths={fwp.file_paths}')
+ fwp.run_chunk()
+ except Exception as e:
+ msg = ('Sup3r ForwardPass chunk failed!')
+ logger.exception(msg)
+ raise RuntimeError(msg) from e
 
  @classmethod
  def run(cls, strategy, node_index):
@@ -1667,9 +1694,10 @@ def run(cls, strategy, node_index):
  for chunk_index in strategy.node_chunks[node_index]:
  future = exe.submit(cls.incremental_check_run,
  strategy=strategy,
- chunk_index=chunk_index,
- node_index=node_index)
+ node_index=node_index,
+ chunk_index=chunk_index)
  futures[future] = chunk_index
+
  logger.info(f'Started {len(futures)} forward passes '
  f'in {dt.now() - now}.')