Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into pp/gaps_integration
Browse files Browse the repository at this point in the history
Conflicts:
	requirements.txt
	sup3r/batch/batch.py
	sup3r/bias/bias_calc.py
	sup3r/pipeline/__init__.py
	sup3r/pipeline/config.py
	sup3r/pipeline/forward_pass.py
	sup3r/pipeline/pipeline.py
	sup3r/postprocessing/collection.py
	sup3r/preprocessing/data_handling.py
	sup3r/qa/qa.py
	sup3r/qa/stats.py
	sup3r/qa/visual_qa.py
	sup3r/solar/solar.py
	sup3r/utilities/cli.py
	sup3r/utilities/regridder.py
  • Loading branch information
ppinchuk committed Sep 8, 2023
2 parents 40fd9be + fb000fa commit 52de2ee
Show file tree
Hide file tree
Showing 33 changed files with 8,285 additions and 5,249 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ nosetests.xml
coverage.xml
*.cover
.hypothesis/
*.png

# Translations
*.mo
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ matplotlib>=3.1
NREL-rex>=0.2.84
NREL-phygnn>=0.0.23
NREL-gaps>=0.4.5
numpy==1.22
NREL-rev<0.8.0
NREL-farms>=1.0.4
google-auth-oauthlib==0.5.3
pytest>=5.2
pillow
tensorflow>2.4
Expand All @@ -13,3 +14,4 @@ netCDF4==1.5.8
dask
sphinx
pandas
numpy==1.22
200 changes: 130 additions & 70 deletions sup3r/bias/bias_calc.py

Large diffs are not rendered by default.

408 changes: 260 additions & 148 deletions sup3r/pipeline/forward_pass.py

Large diffs are not rendered by default.

10 changes: 0 additions & 10 deletions sup3r/postprocessing/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,23 +132,13 @@ def get_slices(
raise RuntimeError(msg)

row_slice = slice(np.min(row_loc), np.max(row_loc) + 1)
col_slice = slice(np.min(col_loc), np.max(col_loc) + 1)

msg = (
f'row_slice={row_slice} conflict with row_indices={row_loc}. '
'Indices do not seem to be increasing and/or contiguous.'
)
assert (row_slice.stop - row_slice.start) == len(row_loc), msg

msg = (
f'col_slice={col_slice} conflict with col_indices={col_loc}. '
'Indices do not seem to be increasing and/or contiguous.'
)
check = (col_slice.stop - col_slice.start) == len(col_loc)
if not check:
logger.warning(msg)
warn(msg)

return row_slice, col_loc

def get_coordinate_indices(self, target_meta, full_meta, threshold=1e-4):
Expand Down
36 changes: 20 additions & 16 deletions sup3r/postprocessing/file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,30 @@
author : @bbenton
"""
import json
import logging
import os
import re
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime as dt
from warnings import warn

import numpy as np
import xarray as xr
import pandas as pd
import logging
import xarray as xr
from rex.outputs import Outputs as BaseRexOutputs
from scipy.interpolate import griddata
import re
from datetime import datetime as dt
import json
import os
from warnings import warn

from sup3r.version import __version__
from sup3r.utilities import VERSION_RECORD
from sup3r.utilities.utilities import (invert_uv,
get_time_dim_name,
estimate_max_workers,
pd_date_range)
from sup3r.preprocessing.feature_handling import Feature

from rex.outputs import Outputs as BaseRexOutputs
from sup3r.utilities import VERSION_RECORD
from sup3r.utilities.utilities import (
estimate_max_workers,
get_time_dim_name,
invert_uv,
pd_date_range,
)
from sup3r.version import __version__

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -235,6 +237,8 @@ def write_data(cls, out_file, dsets, time_index, data_list, meta,
Pre-existing H5 file output path
dsets : list
list of datasets to write to out_file
time_index : pd.DatetimeIndex()
Pandas datetime index to use for file time_index.
data_list : list
List of np.ndarray objects to write to out_file
meta : pd.DataFrame
Expand All @@ -260,7 +264,7 @@ def write_data(cls, out_file, dsets, time_index, data_list, meta,

os.replace(tmp_file, out_file)
msg = ('Saved output of size '
f'{(len(data_list),) + data_list[0].shape} to: {out_file}')
f'{(len(data_list), *data_list[0].shape)} to: {out_file}')
logger.info(msg)


Expand Down
85 changes: 42 additions & 43 deletions sup3r/preprocessing/batch_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from rex.utilities import log_mem
from scipy.ndimage.filters import gaussian_filter

from sup3r.preprocessing.data_handling import DataHandlerDCforH5
from sup3r.preprocessing.data_handling.h5_data_handling import (
DataHandlerDCforH5,
)
from sup3r.utilities.utilities import (
estimate_max_workers,
nn_fill_array,
Expand Down Expand Up @@ -221,20 +223,21 @@ def __init__(
handler_shapes = np.array([d.sample_shape for d in data_handlers])
assert np.all(handler_shapes[0] == handler_shapes)

self.handlers = data_handlers
self.s_enhance = s_enhance
self.t_enhance = t_enhance
self.data_handlers = data_handlers
self.batch_size = batch_size
self.sample_shape = handler_shapes[0]
self.val_indices = self._get_val_indices()
self.max = np.ceil(len(self.val_indices) / (batch_size))
self.s_enhance = s_enhance
self.t_enhance = t_enhance
self._remaining_observations = len(self.val_indices)
self.temporal_coarsening_method = temporal_coarsening_method
self._i = 0
self.output_features_ind = output_features_ind
self.output_features = output_features
self.smoothing = smoothing
self.smoothing_ignore = smoothing_ignore
self.current_batch_indices = []

def _get_val_indices(self):
"""List of dicts to index each validation data observation across all
Expand All @@ -249,7 +252,7 @@ def _get_val_indices(self):
"""

val_indices = []
for i, h in enumerate(self.handlers):
for i, h in enumerate(self.data_handlers):
if h.val_data is not None:
for _ in range(h.val_data.shape[2]):
spatial_slice = uniform_box_sampler(
Expand Down Expand Up @@ -286,13 +289,13 @@ def shape(self):
dimension
"""
time_steps = 0
for h in self.handlers:
for h in self.data_handlers:
time_steps += h.val_data.shape[2]
return (
self.handlers[0].val_data.shape[0],
self.handlers[0].val_data.shape[1],
self.data_handlers[0].val_data.shape[0],
self.data_handlers[0].val_data.shape[1],
time_steps,
self.handlers[0].val_data.shape[3],
self.data_handlers[0].val_data.shape[3],
)

def __iter__(self):
Expand Down Expand Up @@ -343,35 +346,30 @@ def __next__(self):
validation data batch with low and high res data each with
n_observations = batch_size
"""
self.current_batch_indices = []
if self._remaining_observations > 0:
if self._remaining_observations > self.batch_size:
high_res = np.zeros(
(
self.batch_size,
self.sample_shape[0],
self.sample_shape[1],
self.sample_shape[2],
self.handlers[0].shape[-1],
),
dtype=np.float32,
)
n_obs = self.batch_size
else:
high_res = np.zeros(
(
self._remaining_observations,
self.sample_shape[0],
self.sample_shape[1],
self.sample_shape[2],
self.handlers[0].shape[-1],
),
dtype=np.float32,
)
n_obs = self._remaining_observations

high_res = np.zeros(
(
n_obs,
self.sample_shape[0],
self.sample_shape[1],
self.sample_shape[2],
self.data_handlers[0].shape[-1],
),
dtype=np.float32,
)
for i in range(high_res.shape[0]):
val_index = self.val_indices[self._i + i]
high_res[i, ...] = self.handlers[
high_res[i, ...] = self.data_handlers[
val_index['handler_index']
].val_data[val_index['tuple_index']]
self._remaining_observations -= 1
self.current_batch_indices.append(val_index['handler_index'])

if self.sample_shape[2] == 1:
high_res = high_res[..., 0, :]
Expand Down Expand Up @@ -663,7 +661,8 @@ def parallel_load(self):
max_workers = self.load_workers
if max_workers == 1:
for d in self.data_handlers:
d.load_cached_data()
if d.data is None:
d.load_cached_data()
else:
with ThreadPoolExecutor(max_workers=max_workers) as exe:
futures = {}
Expand Down Expand Up @@ -1296,8 +1295,8 @@ def _get_val_indices(self):
val_indices = {}
for t in range(self.N_TIME_BINS):
val_indices[t] = []
h_idx = np.random.choice(np.arange(len(self.handlers)))
h = self.handlers[h_idx]
h_idx = np.random.choice(np.arange(len(self.data_handlers)))
h = self.data_handlers[h_idx]
for _ in range(self.batch_size):
spatial_slice = uniform_box_sampler(
h.data, self.sample_shape[:2]
Expand All @@ -1319,8 +1318,8 @@ def _get_val_indices(self):
)
for s in range(self.N_SPACE_BINS):
val_indices[s + self.N_TIME_BINS] = []
h_idx = np.random.choice(np.arange(len(self.handlers)))
h = self.handlers[h_idx]
h_idx = np.random.choice(np.arange(len(self.data_handlers)))
h = self.data_handlers[h_idx]
for _ in range(self.batch_size):
weights = np.zeros(self.N_SPACE_BINS)
weights[s] = 1
Expand Down Expand Up @@ -1350,15 +1349,15 @@ def __next__(self):
self.sample_shape[0],
self.sample_shape[1],
self.sample_shape[2],
self.handlers[0].shape[-1],
self.data_handlers[0].shape[-1],
),
dtype=np.float32,
)
val_indices = self.val_indices[self._i]
for i, idx in enumerate(val_indices):
high_res[i, ...] = self.handlers[idx['handler_index']].data[
idx['tuple_index']
]
high_res[i, ...] = self.data_handlers[
idx['handler_index']
].data[idx['tuple_index']]

batch = self.BATCH_CLASS.get_coarse_batch(
high_res,
Expand Down Expand Up @@ -1394,15 +1393,15 @@ def __next__(self):
self.batch_size,
self.sample_shape[0],
self.sample_shape[1],
self.handlers[0].shape[-1],
self.data_handlers[0].shape[-1],
),
dtype=np.float32,
)
val_indices = self.val_indices[self._i]
for i, idx in enumerate(val_indices):
high_res[i, ...] = self.handlers[idx['handler_index']].data[
idx['tuple_index']
][..., 0, :]
high_res[i, ...] = self.data_handlers[
idx['handler_index']
].data[idx['tuple_index']][..., 0, :]

batch = self.BATCH_CLASS.get_coarse_batch(
high_res,
Expand Down
Loading

0 comments on commit 52de2ee

Please sign in to comment.