Merge remote-tracking branch 'origin/main' into pp/gaps_integration

Conflicts: requirements.txt sup3r/batch/batch.py sup3r/bias/bias_calc.py sup3r/pipeline/__init__.py sup3r/pipeline/config.py sup3r/pipeline/forward_pass.py sup3r/pipeline/pipeline.py sup3r/postprocessing/collection.py sup3r/preprocessing/data_handling.py sup3r/qa/qa.py sup3r/qa/stats.py sup3r/qa/visual_qa.py sup3r/solar/solar.py sup3r/utilities/cli.py sup3r/utilities/regridder.py
NREL · Sep 8, 2023 · 52de2ee · 52de2ee
2 parents 40fd9be + fb000fa
commit 52de2ee
Show file tree

Hide file tree

Showing 33 changed files with 8,285 additions and 5,249 deletions.
diff --git a/.gitignore b/.gitignore
@@ -43,6 +43,7 @@ nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
+*.png
 
 # Translations
 *.mo

diff --git a/requirements.txt b/requirements.txt
@@ -3,8 +3,9 @@ matplotlib>=3.1
 NREL-rex>=0.2.84
 NREL-phygnn>=0.0.23
 NREL-gaps>=0.4.5
-numpy==1.22
+NREL-rev<0.8.0
 NREL-farms>=1.0.4
+google-auth-oauthlib==0.5.3
 pytest>=5.2
 pillow
 tensorflow>2.4
@@ -13,3 +14,4 @@ netCDF4==1.5.8
 dask
 sphinx
 pandas
+numpy==1.22
diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py
diff --git a/sup3r/pipeline/forward_pass.py b/sup3r/pipeline/forward_pass.py
diff --git a/sup3r/postprocessing/collection.py b/sup3r/postprocessing/collection.py
@@ -132,23 +132,13 @@ def get_slices(
  raise RuntimeError(msg)
 
  row_slice = slice(np.min(row_loc), np.max(row_loc) + 1)
- col_slice = slice(np.min(col_loc), np.max(col_loc) + 1)
 
  msg = (
  f'row_slice={row_slice} conflict with row_indices={row_loc}. '
  'Indices do not seem to be increasing and/or contiguous.'
  )
  assert (row_slice.stop - row_slice.start) == len(row_loc), msg
 
- msg = (
- f'col_slice={col_slice} conflict with col_indices={col_loc}. '
- 'Indices do not seem to be increasing and/or contiguous.'
- )
- check = (col_slice.stop - col_slice.start) == len(col_loc)
- if not check:
- logger.warning(msg)
- warn(msg)
-
  return row_slice, col_loc
 
  def get_coordinate_indices(self, target_meta, full_meta, threshold=1e-4):

diff --git a/sup3r/postprocessing/file_handling.py b/sup3r/postprocessing/file_handling.py
@@ -2,28 +2,30 @@
 
 author : @bbenton
 """
+import json
+import logging
+import os
+import re
 from abc import abstractmethod
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime as dt
+from warnings import warn
+
 import numpy as np
-import xarray as xr
 import pandas as pd
-import logging
+import xarray as xr
+from rex.outputs import Outputs as BaseRexOutputs
 from scipy.interpolate import griddata
-import re
-from datetime import datetime as dt
-import json
-import os
-from warnings import warn
 
-from sup3r.version import __version__
-from sup3r.utilities import VERSION_RECORD
-from sup3r.utilities.utilities import (invert_uv,
- get_time_dim_name,
- estimate_max_workers,
- pd_date_range)
 from sup3r.preprocessing.feature_handling import Feature
-
-from rex.outputs import Outputs as BaseRexOutputs
+from sup3r.utilities import VERSION_RECORD
+from sup3r.utilities.utilities import (
+ estimate_max_workers,
+ get_time_dim_name,
+ invert_uv,
+ pd_date_range,
+)
+from sup3r.version import __version__
 
 logger = logging.getLogger(__name__)
 
@@ -235,6 +237,8 @@ def write_data(cls, out_file, dsets, time_index, data_list, meta,
  Pre-existing H5 file output path
  dsets : list
  list of datasets to write to out_file
+ time_index : pd.DatetimeIndex()
+ Pandas datetime index to use for file time_index.
  data_list : list
  List of np.ndarray objects to write to out_file
  meta : pd.DataFrame
@@ -260,7 +264,7 @@ def write_data(cls, out_file, dsets, time_index, data_list, meta,
 
  os.replace(tmp_file, out_file)
  msg = ('Saved output of size '
- f'{(len(data_list),) + data_list[0].shape} to: {out_file}')
+ f'{(len(data_list), *data_list[0].shape)} to: {out_file}')
  logger.info(msg)
 
 

diff --git a/sup3r/preprocessing/batch_handling.py b/sup3r/preprocessing/batch_handling.py
@@ -12,7 +12,9 @@
 from rex.utilities import log_mem
 from scipy.ndimage.filters import gaussian_filter
 
-from sup3r.preprocessing.data_handling import DataHandlerDCforH5
+from sup3r.preprocessing.data_handling.h5_data_handling import (
+ DataHandlerDCforH5,
+)
 from sup3r.utilities.utilities import (
  estimate_max_workers,
  nn_fill_array,
@@ -221,20 +223,21 @@ def __init__(
  handler_shapes = np.array([d.sample_shape for d in data_handlers])
  assert np.all(handler_shapes[0] == handler_shapes)
 
- self.handlers = data_handlers
+ self.s_enhance = s_enhance
+ self.t_enhance = t_enhance
+ self.data_handlers = data_handlers
  self.batch_size = batch_size
  self.sample_shape = handler_shapes[0]
  self.val_indices = self._get_val_indices()
  self.max = np.ceil(len(self.val_indices) / (batch_size))
- self.s_enhance = s_enhance
- self.t_enhance = t_enhance
  self._remaining_observations = len(self.val_indices)
  self.temporal_coarsening_method = temporal_coarsening_method
  self._i = 0
  self.output_features_ind = output_features_ind
  self.output_features = output_features
  self.smoothing = smoothing
  self.smoothing_ignore = smoothing_ignore
+ self.current_batch_indices = []
 
  def _get_val_indices(self):
  """List of dicts to index each validation data observation across all
@@ -249,7 +252,7 @@ def _get_val_indices(self):
  """
 
  val_indices = []
- for i, h in enumerate(self.handlers):
+ for i, h in enumerate(self.data_handlers):
  if h.val_data is not None:
  for _ in range(h.val_data.shape[2]):
  spatial_slice = uniform_box_sampler(
@@ -286,13 +289,13 @@ def shape(self):
  dimension
  """
  time_steps = 0
- for h in self.handlers:
+ for h in self.data_handlers:
  time_steps += h.val_data.shape[2]
  return (
- self.handlers[0].val_data.shape[0],
- self.handlers[0].val_data.shape[1],
+ self.data_handlers[0].val_data.shape[0],
+ self.data_handlers[0].val_data.shape[1],
  time_steps,
- self.handlers[0].val_data.shape[3],
+ self.data_handlers[0].val_data.shape[3],
  )
 
  def __iter__(self):
@@ -343,35 +346,30 @@ def __next__(self):
  validation data batch with low and high res data each with
  n_observations = batch_size
  """
+ self.current_batch_indices = []
  if self._remaining_observations > 0:
  if self._remaining_observations > self.batch_size:
- high_res = np.zeros(
- (
- self.batch_size,
- self.sample_shape[0],
- self.sample_shape[1],
- self.sample_shape[2],
- self.handlers[0].shape[-1],
- ),
- dtype=np.float32,
- )
+ n_obs = self.batch_size
  else:
- high_res = np.zeros(
- (
- self._remaining_observations,
- self.sample_shape[0],
- self.sample_shape[1],
- self.sample_shape[2],
- self.handlers[0].shape[-1],
- ),
- dtype=np.float32,
- )
+ n_obs = self._remaining_observations
+
+ high_res = np.zeros(
+ (
+ n_obs,
+ self.sample_shape[0],
+ self.sample_shape[1],
+ self.sample_shape[2],
+ self.data_handlers[0].shape[-1],
+ ),
+ dtype=np.float32,
+ )
  for i in range(high_res.shape[0]):
  val_index = self.val_indices[self._i + i]
- high_res[i, ...] = self.handlers[
+ high_res[i, ...] = self.data_handlers[
  val_index['handler_index']
  ].val_data[val_index['tuple_index']]
  self._remaining_observations -= 1
+ self.current_batch_indices.append(val_index['handler_index'])
 
  if self.sample_shape[2] == 1:
  high_res = high_res[..., 0, :]
@@ -663,7 +661,8 @@ def parallel_load(self):
  max_workers = self.load_workers
  if max_workers == 1:
  for d in self.data_handlers:
- d.load_cached_data()
+ if d.data is None:
+ d.load_cached_data()
  else:
  with ThreadPoolExecutor(max_workers=max_workers) as exe:
  futures = {}
@@ -1296,8 +1295,8 @@ def _get_val_indices(self):
  val_indices = {}
  for t in range(self.N_TIME_BINS):
  val_indices[t] = []
- h_idx = np.random.choice(np.arange(len(self.handlers)))
- h = self.handlers[h_idx]
+ h_idx = np.random.choice(np.arange(len(self.data_handlers)))
+ h = self.data_handlers[h_idx]
  for _ in range(self.batch_size):
  spatial_slice = uniform_box_sampler(
  h.data, self.sample_shape[:2]
@@ -1319,8 +1318,8 @@ def _get_val_indices(self):
  )
  for s in range(self.N_SPACE_BINS):
  val_indices[s + self.N_TIME_BINS] = []
- h_idx = np.random.choice(np.arange(len(self.handlers)))
- h = self.handlers[h_idx]
+ h_idx = np.random.choice(np.arange(len(self.data_handlers)))
+ h = self.data_handlers[h_idx]
  for _ in range(self.batch_size):
  weights = np.zeros(self.N_SPACE_BINS)
  weights[s] = 1
@@ -1350,15 +1349,15 @@ def __next__(self):
  self.sample_shape[0],
  self.sample_shape[1],
  self.sample_shape[2],
- self.handlers[0].shape[-1],
+ self.data_handlers[0].shape[-1],
  ),
  dtype=np.float32,
  )
  val_indices = self.val_indices[self._i]
  for i, idx in enumerate(val_indices):
- high_res[i, ...] = self.handlers[idx['handler_index']].data[
- idx['tuple_index']
- ]
+ high_res[i, ...] = self.data_handlers[
+ idx['handler_index']
+ ].data[idx['tuple_index']]
 
  batch = self.BATCH_CLASS.get_coarse_batch(
  high_res,
@@ -1394,15 +1393,15 @@ def __next__(self):
  self.batch_size,
  self.sample_shape[0],
  self.sample_shape[1],
- self.handlers[0].shape[-1],
+ self.data_handlers[0].shape[-1],
  ),
  dtype=np.float32,
  )
  val_indices = self.val_indices[self._i]
  for i, idx in enumerate(val_indices):
- high_res[i, ...] = self.handlers[idx['handler_index']].data[
- idx['tuple_index']
- ][..., 0, :]
+ high_res[i, ...] = self.data_handlers[
+ idx['handler_index']
+ ].data[idx['tuple_index']][..., 0, :]
 
  batch = self.BATCH_CLASS.get_coarse_batch(
  high_res,