Skip to content

Commit

Permalink
Merge pull request #210 from NREL/bnb/nc4_dep_removal
Browse files Browse the repository at this point in the history
removed netCDF4 depedency in favor of h5netcdf and cftime.
added lr_padded_slice arg to qdm bias transform method.
some streamlining for era_downloader
  • Loading branch information
bnb32 committed May 2, 2024
2 parents a171300 + 7c29d0b commit d6f3288
Show file tree
Hide file tree
Showing 13 changed files with 239 additions and 288 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/publish_to_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ jobs:
id-token: write
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
fetch-tags: true
- name: Set up Python
uses: actions/setup-python@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ dependencies = [
"NREL-farms>=1.0.4",
"dask>=2022.0",
"google-auth-oauthlib==0.5.3",
"h5netcdf>=1.1.0",
"cftime>=1.6.2",
"matplotlib>=3.1",
"numpy>=1.7.0",
"netCDF4==1.5.8",
"pandas>=2.0",
"pillow>=10.0",
"pytest>=5.2",
Expand Down
14 changes: 0 additions & 14 deletions requirements.txt

This file was deleted.

43 changes: 7 additions & 36 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@
import os
from abc import abstractmethod
from concurrent.futures import ProcessPoolExecutor, as_completed
from glob import glob
from pathlib import Path

import h5py
import numpy as np
import pandas as pd
import rex
from rex.utilities.fun_utils import get_fun_call_str
from rex.utilities.bc_utils import (
sample_q_invlog,
sample_q_linear,
sample_q_log,
sample_q_invlog,
)
from rex.utilities.fun_utils import get_fun_call_str
from scipy import stats
from scipy.ndimage import gaussian_filter
from scipy.spatial import KDTree
Expand All @@ -29,39 +27,11 @@
from sup3r.preprocessing.data_handling.base import DataHandler
from sup3r.utilities import VERSION_RECORD, ModuleName
from sup3r.utilities.cli import BaseCLI
from sup3r.utilities.utilities import nn_fill_array
from sup3r.utilities.utilities import expand_paths, nn_fill_array

logger = logging.getLogger(__name__)


def _expand_paths(fps):
"""Expand path(s)
Parameter
---------
fps : str or pathlib.Path or any Sequence of those
One or multiple paths to file
Returns
-------
list[str]
A list of expanded unique and sorted paths as str
Examples
--------
>>> _expand_paths("myfile.h5")
>>> _expand_paths(["myfile.h5", "*.hdf"])
"""
if isinstance(fps, (str, Path)):
fps = (fps, )

out = []
for f in fps:
out.extend(glob(f))
return sorted(set(out))


class DataRetrievalBase:
"""Base class to handle data retrieval for the biased data and the
baseline data
Expand Down Expand Up @@ -163,8 +133,8 @@ class to be retrieved from the rex/sup3r library. If a
self._distance_upper_bound = distance_upper_bound
self.match_zero_rate = match_zero_rate

self.base_fps = _expand_paths(self.base_fps)
self.bias_fps = _expand_paths(self.bias_fps)
self.base_fps = expand_paths(self.base_fps)
self.bias_fps = expand_paths(self.bias_fps)

base_sup3r_handler = getattr(sup3r.preprocessing.data_handling,
base_handler, None)
Expand Down Expand Up @@ -1224,6 +1194,7 @@ class QuantileDeltaMappingCorrection(DataRetrievalBase):
:func:`~sup3r.bias.bias_transforms.local_qdm_bc` to actually correct
a dataset.
"""

def __init__(self,
base_fps,
bias_fps,
Expand Down Expand Up @@ -1308,7 +1279,7 @@ def __init__(self,

self.bias_fut_fps = bias_fut_fps

self.bias_fut_fps = _expand_paths(self.bias_fut_fps)
self.bias_fut_fps = expand_paths(self.bias_fut_fps)

self.bias_fut_dh = self.bias_handler(self.bias_fut_fps,
[self.bias_feature],
Expand Down
27 changes: 20 additions & 7 deletions sup3r/bias/bias_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def local_linear_bc(input,
lat_lon,
feature_name,
bias_fp,
lr_padded_slice,
lr_padded_slice=None,
out_range=None,
smoothing=0,
):
Expand Down Expand Up @@ -292,8 +292,8 @@ def monthly_local_linear_bc(input,
lat_lon,
feature_name,
bias_fp,
lr_padded_slice,
time_index,
lr_padded_slice=None,
temporal_avg=True,
out_range=None,
smoothing=0,
Expand All @@ -318,18 +318,18 @@ def monthly_local_linear_bc(input,
datasets "{feature_name}_scalar" and "{feature_name}_adder" that are
the full low-resolution shape of the forward pass input that will be
sliced using lr_padded_slice for the current chunk.
time_index : pd.DatetimeIndex
DatetimeIndex object associated with the input data temporal axis
(assumed 3rd axis e.g. axis=2). Note that if this method is called as
part of a sup3r resolution forward pass, the time_index will be
included automatically for the current chunk.
lr_padded_slice : tuple | None
Tuple of length four that slices (spatial_1, spatial_2, temporal,
features) where each tuple entry is a slice object for that axes.
Note that if this method is called as part of a sup3r forward pass, the
lr_padded_slice will be included automatically in the kwargs for the
active chunk. If this is None, no slicing will be done and the full
bias correction source shape will be used.
time_index : pd.DatetimeIndex
DatetimeIndex object associated with the input data temporal axis
(assumed 3rd axis e.g. axis=2). Note that if this method is called as
part of a sup3r resolution forward pass, the time_index will be
included automatically for the current chunk.
temporal_avg : bool
Take the average scalars and adders for the chunk's time index, this
will smooth the transition of scalars/adders from month to month if
Expand Down Expand Up @@ -403,6 +403,7 @@ def local_qdm_bc(data: np.array,
base_dset: str,
feature_name: str,
bias_fp,
lr_padded_slice=None,
threshold=0.1,
relative=True,
no_trend=False):
Expand Down Expand Up @@ -433,6 +434,13 @@ def local_qdm_bc(data: np.array,
"bias_fut_{feature_name}_params", and "base_{base_dset}_params" that
are the parameters to define the statistical distributions to be used
to correct the given `data`.
lr_padded_slice : tuple | None
Tuple of length four that slices (spatial_1, spatial_2, temporal,
features) where each tuple entry is a slice object for that axes.
Note that if this method is called as part of a sup3r forward pass, the
lr_padded_slice will be included automatically in the kwargs for the
active chunk. If this is None, no slicing will be done and the full
bias correction source shape will be used.
no_trend: bool, default=False
An option to ignore the trend component of the correction, thus
resulting in an ordinary Quantile Mapping, i.e. corrects the bias by
Expand Down Expand Up @@ -485,6 +493,11 @@ def local_qdm_bc(data: np.array,
feature_name,
bias_fp,
threshold)
if lr_padded_slice is not None:
spatial_slice = (lr_padded_slice[0], lr_padded_slice[1])
base = base[spatial_slice]
bias = bias[spatial_slice]
bias_fut = bias[spatial_slice]

if no_trend:
mf = None
Expand Down
6 changes: 4 additions & 2 deletions sup3r/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,8 @@ def train_epoch(self,

if only_gen or (train_gen and not gen_too_good):
trained_gen = True
b_loss_details = self.run_gradient_descent(
b_loss_details = self.timer(
self.run_gradient_descent,
batch.low_res,
batch.high_res,
self.generator_weights,
Expand All @@ -700,7 +701,8 @@ def train_epoch(self,

if only_disc or (train_disc and not disc_too_good):
trained_disc = True
b_loss_details = self.run_gradient_descent(
b_loss_details = self.timer(
self.run_gradient_descent,
batch.low_res,
batch.high_res,
self.discriminator_weights,
Expand Down
17 changes: 5 additions & 12 deletions sup3r/preprocessing/data_handling/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
@author: bbenton
"""

import glob
import logging
import os
import pickle
Expand All @@ -18,6 +17,7 @@

from sup3r.utilities.utilities import (
estimate_max_workers,
expand_paths,
get_source_type,
ignore_case_path_fetch,
uniform_box_sampler,
Expand Down Expand Up @@ -644,22 +644,15 @@ def file_paths(self, file_paths):
----------
file_paths : str | list
A list of files to extract raster data from. Each file must have
the same number of timesteps. Can also pass a string with a
unix-style file path which will be passed through glob.glob
the same number of timesteps. Can also pass a string or list of
strings with a unix-style file path which will be passed through
glob.glob
"""
self._file_paths = file_paths
if isinstance(self._file_paths, str):
if '*' in file_paths:
self._file_paths = glob.glob(self._file_paths)
else:
self._file_paths = [self._file_paths]

self._file_paths = expand_paths(file_paths)
msg = ('No valid files provided to DataHandler. '
f'Received file_paths={file_paths}. Aborting.')
assert file_paths is not None and len(self._file_paths) > 0, msg

self._file_paths = sorted(self._file_paths)

@property
def ti_workers(self):
"""Get max number of workers for computing time index"""
Expand Down
18 changes: 9 additions & 9 deletions sup3r/utilities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
"""Sup3r utilities"""
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
import sklearn
import dask
import xarray
import netCDF4
from enum import Enum

import dask
import h5netcdf
import numpy as np
import pandas as pd
import phygnn
import rex
import sklearn
import tensorflow as tf
import xarray

from sup3r import __version__


VERSION_RECORD = {'sup3r': __version__,
'tensorflow': tf.__version__,
'sklearn': sklearn.__version__,
Expand All @@ -24,7 +23,7 @@
'nrel-rex': rex.__version__,
'python': sys.version,
'xarray': xarray.__version__,
'netCDF4': netCDF4.__version__,
'h5netcdf': h5netcdf.__version__,
'dask': dask.__version__,
}

Expand Down Expand Up @@ -56,6 +55,7 @@ def __format__(self, format_spec):
@classmethod
def all_names(cls):
"""All module names.
Returns
-------
set
Expand Down

0 comments on commit d6f3288

Please sign in to comment.