From d88de6f32d059338be10a766fe8380e2a36edf0c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 22 Apr 2024 21:38:02 -0600 Subject: [PATCH 01/92] Initializing PresRat with reference --- sup3r/bias/bias_calc.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 05dfd7c7d..2fe7ecb7c 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -1092,6 +1092,21 @@ def get_linear_correction(bias_data, base_data, bias_feature, base_dset): return out +class PresRat(DataRetrievalBase): + """PresRat bias correction method (precipitation) + + The PresRat correction is defined as the combination of using the + model-predicted change ratio, the treatment of zero-precipitation days, + and the final correction factor (K) [Pierce2015]_. + + References + ---------- + .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. + T., & Hegewisch, K. C. (2015). Improved bias correction techniques for + hydrological simulations of climate change. Journal of Hydrometeorology, + 16(6), 2421-2442. + """ + class SkillAssessment(MonthlyLinearCorrection): """Calculate historical skill of one dataset compared to another.""" From 3d6f9ae512a0c9ce8507f38548c2ce9edc4dc71a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 23 Apr 2024 11:47:51 -0600 Subject: [PATCH 02/92] feat: ZeroRateMixin() --- sup3r/bias/bias_calc.py | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 2fe7ecb7c..b8a3d40b2 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -1092,6 +1092,79 @@ def get_linear_correction(bias_data, base_data, bias_feature, base_dset): return out +class ZeroRateMixin(DataRetrievalBase): + """Estimate zero rate + + + [Pierce2015]_. + + References + ---------- + .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. + T., & Hegewisch, K. C. (2015). Improved bias correction techniques for + hydrological simulations of climate change. Journal of Hydrometeorology, + 16(6), 2421-2442. + """ + def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): + """Rate of (nearly) zero precipitation days + + Estimate the rate of values less than a given ``threshold``. In concept + the threshold would be zero (thus the name zero precipitation rate) + but it is often used a small threshold to truncate negligible values. + For instance, [Pierce2015]_ uses 0.01 (mm/day) for PresRat correction. + + Parameters + ---------- + arr : np.array + An array of values to be analyzed. Usually precipitation but it + could be applied to other quantities. + threshold : float + Minimum value accepted. Less than that is assumed to be zero. + + Returns + ------- + rate : float + Rate of days with negligible precipitation. (see Z_gf in + [Pierce2015]_) + + Notes + ----- + The ``NaN`` are ignored for the rate estimate. Therefore, a large + number of ``NaN`` might mislead this rate estimate. + """ + return np.nanmean((arr < 0.01).astype('i')) + + def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): + """Enforce the zero precipitation rate + + Replace lowest values by zero to satisfy the given rate of zero + precipitation. + + Parameters + ---------- + arr : np.array + An array of values to be analyzed. Usually precipitation but it + rate : float + Rate of zero, or negligible, days of precipitation. + + Returns + ------- + corrected : np.array + A copy of given array that satisfies the rate of zero precipitation + days, i.e. the lowest values of precipitation are changed to zero + to satisfy that rate. + + Examples + -------- + >>> data = np.array([5, 0.1, np.nan, 0.2, 1] + >>> apply_zero_precipitation_rate(data, 0.30) + array([5. , 0. , nan, 0.2, 1. ]) + """ + valid = arr[np.isfinite(arr)] + threshold = np.sort(valid)[round(rate * len(valid))] + return np.where(arr < threshold, 0, arr) + + class PresRat(DataRetrievalBase): """PresRat bias correction method (precipitation) From 8d3fb02471da950b035a2e3fe8b23099fd42bede Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 24 May 2024 11:36:50 -0600 Subject: [PATCH 03/92] refact: Moving ZeroRateMixin to mixins --- sup3r/bias/bias_calc.py | 74 +---------------------------------------- sup3r/bias/mixins.py | 73 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 73 deletions(-) diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index b8a3d40b2..21fd3772b 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -1092,79 +1092,6 @@ def get_linear_correction(bias_data, base_data, bias_feature, base_dset): return out -class ZeroRateMixin(DataRetrievalBase): - """Estimate zero rate - - - [Pierce2015]_. - - References - ---------- - .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. - T., & Hegewisch, K. C. (2015). Improved bias correction techniques for - hydrological simulations of climate change. Journal of Hydrometeorology, - 16(6), 2421-2442. - """ - def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): - """Rate of (nearly) zero precipitation days - - Estimate the rate of values less than a given ``threshold``. In concept - the threshold would be zero (thus the name zero precipitation rate) - but it is often used a small threshold to truncate negligible values. - For instance, [Pierce2015]_ uses 0.01 (mm/day) for PresRat correction. - - Parameters - ---------- - arr : np.array - An array of values to be analyzed. Usually precipitation but it - could be applied to other quantities. - threshold : float - Minimum value accepted. Less than that is assumed to be zero. - - Returns - ------- - rate : float - Rate of days with negligible precipitation. (see Z_gf in - [Pierce2015]_) - - Notes - ----- - The ``NaN`` are ignored for the rate estimate. Therefore, a large - number of ``NaN`` might mislead this rate estimate. - """ - return np.nanmean((arr < 0.01).astype('i')) - - def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): - """Enforce the zero precipitation rate - - Replace lowest values by zero to satisfy the given rate of zero - precipitation. - - Parameters - ---------- - arr : np.array - An array of values to be analyzed. Usually precipitation but it - rate : float - Rate of zero, or negligible, days of precipitation. - - Returns - ------- - corrected : np.array - A copy of given array that satisfies the rate of zero precipitation - days, i.e. the lowest values of precipitation are changed to zero - to satisfy that rate. - - Examples - -------- - >>> data = np.array([5, 0.1, np.nan, 0.2, 1] - >>> apply_zero_precipitation_rate(data, 0.30) - array([5. , 0. , nan, 0.2, 1. ]) - """ - valid = arr[np.isfinite(arr)] - threshold = np.sort(valid)[round(rate * len(valid))] - return np.where(arr < threshold, 0, arr) - - class PresRat(DataRetrievalBase): """PresRat bias correction method (precipitation) @@ -1180,6 +1107,7 @@ class PresRat(DataRetrievalBase): 16(6), 2421-2442. """ + class SkillAssessment(MonthlyLinearCorrection): """Calculate historical skill of one dataset compared to another.""" diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index 1d8c8174a..e16863e7f 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -92,3 +92,76 @@ def fill_and_smooth(self, out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask] return out + + +class ZeroRateMixin(DataRetrievalBase): + """Estimate zero rate + + + [Pierce2015]_. + + References + ---------- + .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. + T., & Hegewisch, K. C. (2015). Improved bias correction techniques for + hydrological simulations of climate change. Journal of Hydrometeorology, + 16(6), 2421-2442. + """ + def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): + """Rate of (nearly) zero precipitation days + + Estimate the rate of values less than a given ``threshold``. In concept + the threshold would be zero (thus the name zero precipitation rate) + but it is often used a small threshold to truncate negligible values. + For instance, [Pierce2015]_ uses 0.01 (mm/day) for PresRat correction. + + Parameters + ---------- + arr : np.array + An array of values to be analyzed. Usually precipitation but it + could be applied to other quantities. + threshold : float + Minimum value accepted. Less than that is assumed to be zero. + + Returns + ------- + rate : float + Rate of days with negligible precipitation. (see Z_gf in + [Pierce2015]_) + + Notes + ----- + The ``NaN`` are ignored for the rate estimate. Therefore, a large + number of ``NaN`` might mislead this rate estimate. + """ + return np.nanmean((arr < 0.01).astype('i')) + + def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): + """Enforce the zero precipitation rate + + Replace lowest values by zero to satisfy the given rate of zero + precipitation. + + Parameters + ---------- + arr : np.array + An array of values to be analyzed. Usually precipitation but it + rate : float + Rate of zero, or negligible, days of precipitation. + + Returns + ------- + corrected : np.array + A copy of given array that satisfies the rate of zero precipitation + days, i.e. the lowest values of precipitation are changed to zero + to satisfy that rate. + + Examples + -------- + >>> data = np.array([5, 0.1, np.nan, 0.2, 1] + >>> apply_zero_precipitation_rate(data, 0.30) + array([5. , 0. , nan, 0.2, 1. ]) + """ + valid = arr[np.isfinite(arr)] + threshold = np.sort(valid)[round(rate * len(valid))] + return np.where(arr < threshold, 0, arr) From c8ad983eb5239f1ea2a36cfc745f984fcb389c7d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 24 May 2024 11:37:13 -0600 Subject: [PATCH 04/92] Adding ipython into dev environment --- pixi.lock | 276 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 275 insertions(+), 2 deletions(-) diff --git a/pixi.lock b/pixi.lock index 82d2b6519..7da0dc1b9 100644 --- a/pixi.lock +++ b/pixi.lock @@ -624,6 +624,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/distributed-2024.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/docutils-0.20.1-py311h38be061_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/flatbuffers-23.5.26-h59595ed_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 @@ -666,9 +667,11 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-8.0.0-hd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-8.24.0-pyh707e725_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.classes-3.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.context-5.3.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.functools-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jeepney-0.8.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/keras-2.15.0-pyhd8ed1ab_0.conda @@ -788,10 +791,13 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/pkginfo-1.10.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.42-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/protobuf-4.24.4-py311h46cbc50_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-6.0.0-py311h331c9d8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.2-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-15.0.0-py311h39c9aba_0_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.0-pyhd8ed1ab_0.conda @@ -843,6 +849,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tensorboard-2.15.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tensorboard-data-server-0.7.0-py311h63ff55d_1.conda @@ -928,6 +935,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aiohttp-3.9.5-py311h05b510d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/aiosignal-1.3.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-2.4.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/astunparse-1.6.3-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-23.2.0-pyh71513ae_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-auth-0.7.11-ha4ce7b8_1.conda @@ -976,6 +984,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/distributed-2024.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/docutils-0.20.1-py311h267d04e_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/flatbuffers-23.5.26-h13dd4ca_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/fonttools-4.53.0-py311hd3f4193_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/freetype-2.12.1-hadb7bae_2.conda @@ -1002,6 +1011,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-8.0.0-hd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-8.24.0-pyh707e725_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.classes-3.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.context-5.3.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.functools-4.0.0-pyhd8ed1ab_0.conda @@ -1087,9 +1097,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pillow-10.3.0-py311hd7951ec_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pkginfo-1.10.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.42-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/protobuf-4.24.4-py311h4d1eceb_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/psutil-6.0.0-py311hd3f4193_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pthread-stubs-0.4-h27ca646_1001.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.2-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-15.0.0-py311hd7bc329_0_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.0-pyhd8ed1ab_0.conda @@ -1134,6 +1147,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tensorboard-2.15.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tensorboard-data-server-0.7.0-py311h5fb2c35_1.conda @@ -2467,6 +2481,24 @@ packages: requires_dist: - typing-extensions>=4.0.0 ; python_version < '3.11' requires_python: '>=3.8.0' +- kind: conda + name: asttokens + version: 2.4.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/asttokens-2.4.1-pyhd8ed1ab_0.conda + sha256: 708168f026df19a0344983754d27d1f7b28bb21afc7b97a82f02c4798a3d2111 + md5: 5f25798dcefd8252ce5f9dc494d5f571 + depends: + - python >=3.5 + - six >=1.12.0 + license: Apache-2.0 + license_family: Apache + purls: + - pkg:pypi/asttokens + size: 28922 + timestamp: 1698341257884 - kind: conda name: astunparse version: 1.6.3 @@ -3850,6 +3882,23 @@ packages: purls: [] size: 618596 timestamp: 1640112124844 +- kind: conda + name: decorator + version: 5.1.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/decorator-5.1.1-pyhd8ed1ab_0.tar.bz2 + sha256: 328a6a379f9bdfd0230e51de291ce858e6479411ea4b0545fb377c71662ef3e2 + md5: 43afe5ab04e35e17ba28649471dd7364 + depends: + - python >=3.5 + license: BSD-2-Clause + license_family: BSD + purls: + - pkg:pypi/decorator + size: 12072 + timestamp: 1641555714315 - kind: pypi name: dill version: 0.3.8 @@ -3951,6 +4000,23 @@ packages: - pkg:pypi/exceptiongroup?source=conda-forge-mapping size: 20551 timestamp: 1704921321122 +- kind: conda + name: executing + version: 2.0.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/executing-2.0.1-pyhd8ed1ab_0.conda + sha256: c738804ab1e6376f8ea63372229a04c8d658dc90fd5a218c6273a2eaf02f4057 + md5: e16be50e378d8a4533b989035b196ab8 + depends: + - python >=2.7 + license: MIT + license_family: MIT + purls: + - pkg:pypi/executing + size: 27689 + timestamp: 1698580072627 - kind: conda name: expat version: 2.6.2 @@ -4971,6 +5037,35 @@ packages: - pkg:pypi/iniconfig?source=conda-forge-mapping size: 11101 timestamp: 1673103208955 +- kind: conda + name: ipython + version: 8.24.0 + build: pyh707e725_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.24.0-pyh707e725_0.conda + sha256: d3ce492dac53a8f1c6cd682a25313f02993a1333b5e4787a15259a6e7cb28562 + md5: 1fb1f1fcbe053a762748dbf0ae4cfd0d + depends: + - __unix + - decorator + - exceptiongroup + - jedi >=0.16 + - matplotlib-inline + - pexpect >4.3 + - pickleshare + - prompt-toolkit >=3.0.41,<3.1.0 + - pygments >=2.4.0 + - python >=3.10 + - stack_data + - traitlets >=5.13.0 + - typing_extensions >=4.6 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/ipython + size: 596366 + timestamp: 1715263505659 - kind: pypi name: isort version: 5.13.2 @@ -5035,6 +5130,24 @@ packages: - pkg:pypi/jaraco-functools?source=conda-forge-mapping size: 15192 timestamp: 1701695329516 +- kind: conda + name: jedi + version: 0.19.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.1-pyhd8ed1ab_0.conda + sha256: 362f0936ef37dfd1eaa860190e42a6ebf8faa094eaa3be6aa4d9ace95f40047a + md5: 81a3be0b2023e1ea8555781f0ad904a2 + depends: + - parso >=0.8.3,<0.9.0 + - python >=3.6 + license: MIT + license_family: MIT + purls: + - pkg:pypi/jedi + size: 841312 + timestamp: 1696326218364 - kind: conda name: jeepney version: 0.8.0 @@ -8483,6 +8596,23 @@ packages: - pkg:pypi/pandas?source=conda-forge-mapping size: 14742444 timestamp: 1715898315491 +- kind: conda + name: parso + version: 0.8.4 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.4-pyhd8ed1ab_0.conda + sha256: bfe404eebb930cc41782d34f8fc04c0388ea692eeebe2c5fc28df8ec8d4d61ae + md5: 81534b420deb77da8833f2289b8d47ac + depends: + - python >=3.6 + license: MIT + license_family: MIT + purls: + - pkg:pypi/parso + size: 75191 + timestamp: 1712320447201 - kind: conda name: partd version: 1.4.2 @@ -8537,6 +8667,41 @@ packages: - pkg:pypi/pep517?source=conda-forge-mapping size: 19044 timestamp: 1667916747996 +- kind: conda + name: pexpect + version: 4.9.0 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/pexpect-4.9.0-pyhd8ed1ab_0.conda + sha256: 90a09d134a4a43911b716d4d6eb9d169238aff2349056f7323d9db613812667e + md5: 629f3203c99b32e0988910c93e77f3b6 + depends: + - ptyprocess >=0.5 + - python >=3.7 + license: ISC + purls: + - pkg:pypi/pexpect + size: 53600 + timestamp: 1706113273252 +- kind: conda + name: pickleshare + version: 0.7.5 + build: py_1003 + build_number: 1003 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/pickleshare-0.7.5-py_1003.tar.bz2 + sha256: a1ed1a094dd0d1b94a09ed85c283a0eb28943f2e6f22161fb45e128d35229738 + md5: 415f0ebb6198cc2801c73438a9fb5761 + depends: + - python >=3 + license: MIT + license_family: MIT + purls: + - pkg:pypi/pickleshare + size: 9332 + timestamp: 1602536313357 - kind: conda name: pillow version: 10.3.0 @@ -8689,6 +8854,26 @@ packages: - pyyaml>=5.1 - virtualenv>=20.10.0 requires_python: '>=3.9' +- kind: conda + name: prompt-toolkit + version: 3.0.42 + build: pyha770c72_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.42-pyha770c72_0.conda + sha256: 58525b2a9305fb154b2b0d43a48b9a6495441b80e4fbea44f2a34a597d2cef16 + md5: 0bf64bf10eee21f46ac83c161917fa86 + depends: + - python >=3.7 + - wcwidth + constrains: + - prompt_toolkit 3.0.42 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/prompt-toolkit + size: 270398 + timestamp: 1702399557137 - kind: conda name: protobuf version: 4.24.4 @@ -8803,6 +8988,22 @@ packages: purls: [] size: 5625 timestamp: 1606147468727 +- kind: conda + name: ptyprocess + version: 0.7.0 + build: pyhd3deb0d_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 + sha256: fb31e006a25eb2e18f3440eb8d17be44c8ccfae559499199f73584566d0a444a + md5: 359eeb6536da0e687af562ed265ec263 + depends: + - python + license: ISC + purls: + - pkg:pypi/ptyprocess + size: 16546 + timestamp: 1609419417991 - kind: conda name: pulseaudio-client version: '17.0' @@ -8824,6 +9025,23 @@ packages: purls: [] size: 757633 timestamp: 1705690081905 +- kind: conda + name: pure_eval + version: 0.2.2 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.2-pyhd8ed1ab_0.tar.bz2 + sha256: 72792f9fc2b1820e37cc57f84a27bc819c71088c3002ca6db05a2e56404f9d44 + md5: 6784285c7e55cb7212efabc79e4c2883 + depends: + - python >=3.5 + license: MIT + license_family: MIT + purls: + - pkg:pypi/pure-eval + size: 14551 + timestamp: 1642876055775 - kind: conda name: pyarrow version: 15.0.0 @@ -8986,8 +9204,8 @@ packages: - kind: pypi name: pyjson5 version: 1.6.6 - url: https://files.pythonhosted.org/packages/0c/8b/24c7d8f7785bdaab55481b67e736e892b01e10be2366860d67484f48ad50/pyjson5-1.6.6-cp311-cp311-macosx_11_0_arm64.whl - sha256: 7d1fe850aa1763e8f9945a7120f032f5f707d0372d52c8e0fecda2a79640820e + url: https://files.pythonhosted.org/packages/26/ba/c21b9f0fad649cada64fdfbed8546c74c998f137b487affb1be451b23bb8/pyjson5-1.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl + sha256: 0abc60630bb1601ae9005e251d30cef363cb4d6cef2d2fb7a160329569170e38 requires_python: ~=3.5 - kind: pypi name: pyjson5 @@ -10251,6 +10469,26 @@ packages: - pkg:pypi/sphinxcontrib-serializinghtml?source=conda-forge-mapping size: 28776 timestamp: 1705118378942 +- kind: conda + name: stack_data + version: 0.6.2 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.2-pyhd8ed1ab_0.conda + sha256: a58433e75229bec39f3be50c02efbe9b7083e53a1f31d8ee247564f370191eec + md5: e7df0fdd404616638df5ece6e69ba7af + depends: + - asttokens + - executing + - pure_eval + - python >=3.5 + license: MIT + license_family: MIT + purls: + - pkg:pypi/stack-data + size: 26205 + timestamp: 1669632203115 - kind: pypi name: tabulate version: 0.9.0 @@ -10697,6 +10935,23 @@ packages: - pkg:pypi/tornado?source=conda-forge-mapping size: 857353 timestamp: 1717722957594 +- kind: conda + name: traitlets + version: 5.14.3 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_0.conda + sha256: 8a64fa0f19022828513667c2c7176cfd125001f3f4b9bc00d33732e627dd2592 + md5: 3df84416a021220d8b5700c613af2dc5 + depends: + - python >=3.8 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/traitlets + size: 110187 + timestamp: 1713535244513 - kind: conda name: twine version: 5.1.1 @@ -10825,6 +11080,23 @@ packages: - setuptools>=68 ; extra == 'test' - time-machine>=2.10 ; platform_python_implementation == 'CPython' and extra == 'test' requires_python: '>=3.7' +- kind: conda + name: wcwidth + version: 0.2.13 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.13-pyhd8ed1ab_0.conda + sha256: b6cd2fee7e728e620ec736d8dfee29c6c9e2adbd4e695a31f1d8f834a83e57e3 + md5: 68f0738df502a14213624b288c60c9ad + depends: + - python >=3.8 + license: MIT + license_family: MIT + purls: + - pkg:pypi/wcwidth + size: 32709 + timestamp: 1704731373922 - kind: conda name: werkzeug version: 3.0.3 diff --git a/pyproject.toml b/pyproject.toml index 865a26b2a..ce1988db8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -291,3 +291,4 @@ pytest = ">=5.2" build = ">=0.6" twine = ">=5.0" ruff = ">=0.4" +ipython = ">=8.0" From 86ac9cfaa5d4bea2bb9eb05fa8d070c336f4ad0e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 24 May 2024 11:42:17 -0600 Subject: [PATCH 05/92] fix: ZeroRateMixin shouldn't inherit DataRetrievalBase --- sup3r/bias/mixins.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index e16863e7f..61c7f9491 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -94,7 +94,7 @@ def fill_and_smooth(self, return out -class ZeroRateMixin(DataRetrievalBase): +class ZeroRateMixin(): """Estimate zero rate @@ -107,6 +107,7 @@ class ZeroRateMixin(DataRetrievalBase): hydrological simulations of climate change. Journal of Hydrometeorology, 16(6), 2421-2442. """ + @staticmethod def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): """Rate of (nearly) zero precipitation days @@ -136,6 +137,7 @@ def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): """ return np.nanmean((arr < 0.01).astype('i')) + @staticmethod def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): """Enforce the zero precipitation rate From a91dad7cd1e7c5c80167ee2c529bf50316c13b9f Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 24 May 2024 13:19:29 -0600 Subject: [PATCH 06/92] refact: Moving PresRat to qdm sub-module --- sup3r/bias/bias_calc.py | 16 --------------- sup3r/bias/qdm.py | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 21fd3772b..05dfd7c7d 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -1092,22 +1092,6 @@ def get_linear_correction(bias_data, base_data, bias_feature, base_dset): return out -class PresRat(DataRetrievalBase): - """PresRat bias correction method (precipitation) - - The PresRat correction is defined as the combination of using the - model-predicted change ratio, the treatment of zero-precipitation days, - and the final correction factor (K) [Pierce2015]_. - - References - ---------- - .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. - T., & Hegewisch, K. C. (2015). Improved bias correction techniques for - hydrological simulations of climate change. Journal of Hydrometeorology, - 16(6), 2421-2442. - """ - - class SkillAssessment(MonthlyLinearCorrection): """Calculate historical skill of one dataset compared to another.""" diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 259254f68..944d568ff 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -495,3 +495,48 @@ def run(self, self.write_outputs(fp_out, self.out) return copy.deepcopy(self.out) + + +class PresRat(QuantileDeltaMappingCorrection): + """PresRat bias correction method (precipitation) + + The PresRat correction is defined as the combination of using the + model-predicted change ratio, the treatment of zero-precipitation days, + and the final correction factor (K) [Pierce2015]_. + + References + ---------- + .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. + T., & Hegewisch, K. C. (2015). Improved bias correction techniques for + hydrological simulations of climate change. Journal of Hydrometeorology, + 16(6), 2421-2442. + # Todo: + # - Identify Z_gf. (0.01 mm) Also have to save on output params + # - Estimate K = / + """ + + def correction_factor(): + """Preserve the mean precipitation change (K factor) + + When bias correcting, the mean can fall in a different quantile, thus + modifying the corrected mean if the shape of the distribution changes. + That effect is more pronounced in skewed distributions, such as + precipitation. + """ + pass + self.bias_dh.bias_ti + + def dev(): + # Estimate means per month. Later will split this into resolution (n + # of chunks and window width such as 30 days) and end up with one + # value per month to be used later as the K numberator. + t = self.base_dh.time_index + t.month + + base_data, base_ti = cls.get_base_data(base_fps, + base_dset, + base_gid, + base_handler, + daily_reduction=daily_reduction, + decimals=decimals, + base_dh_inst=base_dh_inst) From 41504a04b5e51c6d906b2611b71e1b9db4c1b26a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 24 May 2024 13:22:57 -0600 Subject: [PATCH 07/92] fix: Use the given threshold (it was hardcoded before) --- sup3r/bias/mixins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index 61c7f9491..12e400627 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -135,7 +135,7 @@ def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): The ``NaN`` are ignored for the rate estimate. Therefore, a large number of ``NaN`` might mislead this rate estimate. """ - return np.nanmean((arr < 0.01).astype('i')) + return np.nanmean((arr < threshold).astype('i')) @staticmethod def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): From 1686520d743f84ee9354c21143b143f84b657b78 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 24 May 2024 13:59:21 -0600 Subject: [PATCH 08/92] PresRat using ZeroRateMixin --- sup3r/bias/qdm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 944d568ff..70969c7f3 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -21,7 +21,7 @@ from sup3r.preprocessing.data_handling.base import DataHandler from sup3r.utilities.utilities import expand_paths from .bias_calc import DataRetrievalBase -from .mixins import FillAndSmoothMixin +from .mixins import FillAndSmoothMixin, ZeroRateMixin logger = logging.getLogger(__name__) @@ -497,7 +497,7 @@ def run(self, return copy.deepcopy(self.out) -class PresRat(QuantileDeltaMappingCorrection): +class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): """PresRat bias correction method (precipitation) The PresRat correction is defined as the combination of using the From b92ec3c578e29fe10783157af1c274fba7452416 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 27 May 2024 17:14:39 -0600 Subject: [PATCH 09/92] doc: Explicit steps for PresRat --- sup3r/bias/qdm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 70969c7f3..f8b529a8a 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -500,9 +500,12 @@ def run(self, class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): """PresRat bias correction method (precipitation) - The PresRat correction is defined as the combination of using the - model-predicted change ratio, the treatment of zero-precipitation days, - and the final correction factor (K) [Pierce2015]_. + The PresRat correction [Pierce2015]_ is defined as the combination of + three steps: + * Use the model-predicted change ratio - require CDFs; + * The treatment of zero-precipitation days - require fraction of dry days; + * The final correction factor (K) to preserve the mean - require mean + estimate; References ---------- From 720848d877643844f50dc10b7abdf2d1b9c78001 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 27 May 2024 21:39:19 -0600 Subject: [PATCH 10/92] Prototype for quantiles on time windows --- sup3r/bias/qdm.py | 58 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index f8b529a8a..2e1f89ef7 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -518,28 +518,38 @@ class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): # - Estimate K = / """ - def correction_factor(): - """Preserve the mean precipitation change (K factor) + @staticmethod + def _window_mask(d, d0, window_size=31): + d_start = d0 - window_size + d_end = d0 + window_size + if d_start < 0: + idx = (d >= 365 + d_start) | (d <= d_end) + elif d_end > 365: + idx = (d >= d_start) | (d <= 365 - d_end) + else: + idx = (d >= d_start) & (d <= d_end) + return idx - When bias correcting, the mean can fall in a different quantile, thus - modifying the corrected mean if the shape of the distribution changes. - That effect is more pronounced in skewed distributions, such as - precipitation. - """ - pass - self.bias_dh.bias_ti - - def dev(): - # Estimate means per month. Later will split this into resolution (n - # of chunks and window width such as 30 days) and end up with one - # value per month to be used later as the K numberator. - t = self.base_dh.time_index - t.month - - base_data, base_ti = cls.get_base_data(base_fps, - base_dset, - base_gid, - base_handler, - daily_reduction=daily_reduction, - decimals=decimals, - base_dh_inst=base_dh_inst) + @staticmethod + def _single_quantile(da, d0, window_size, quantiles): + idx = self._window_mask( + da.time.dt.dayofyear, d0, window_size=window_size + ) + q = ( + da.where(idx, drop=True) + .chunk(dict(time=-1)) + .quantile(quantiles, dim=['time']) + ) + return q.expand_dims({'day_of_year': [d0]}) + + @staticmethod + def windowed_quantiles( + da: xr.DataArray, day_of_year, quantiles, window_size=90 + ): + q = ( + _single_quantile( + da, d0, window_size=window_size, quantiles=quantiles + ) + for d0 in day_of_year + ) + return xr.merge(q) From b173b6cf9a5a9f57ddfca0d7d4502466e0faf930 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 28 May 2024 09:33:59 -0600 Subject: [PATCH 11/92] fix: Missing load PresRat at module --- sup3r/bias/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sup3r/bias/__init__.py b/sup3r/bias/__init__.py index 6fd24ddfe..877f92e8c 100644 --- a/sup3r/bias/__init__.py +++ b/sup3r/bias/__init__.py @@ -4,7 +4,7 @@ local_qdm_bc, monthly_local_linear_bc) from .bias_calc import (LinearCorrection, MonthlyLinearCorrection, MonthlyScalarCorrection, SkillAssessment) -from .qdm import QuantileDeltaMappingCorrection +from .qdm import PresRat, QuantileDeltaMappingCorrection __all__ = [ "global_linear_bc", @@ -14,6 +14,7 @@ "LinearCorrection", "MonthlyLinearCorrection", "MonthlyScalarCorrection", + "PresRat", "QuantileDeltaMappingCorrection", "SkillAssessment", ] From 72ebb5e6439c63ee67f95bd6102612789e0fc985 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 3 Jun 2024 11:10:32 -0600 Subject: [PATCH 12/92] Overwritting QDM methods for PresRat requirements --- sup3r/bias/qdm.py | 352 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 2e1f89ef7..933144aa8 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -518,6 +518,358 @@ class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): # - Estimate K = / """ + def __init__( + self, + base_fps, + bias_fps, + bias_fut_fps, + base_dset, + bias_feature, + distance_upper_bound=None, + target=None, + shape=None, + base_handler='Resource', + bias_handler='DataHandlerNCforCC', + base_handler_kwargs=None, + bias_handler_kwargs=None, + decimals=None, + match_zero_rate=False, + n_quantiles=101, + dist='empirical', + sampling='linear', + log_base=10, + zero_rate_threshold=0.01, + ): + """ + Parameters + ---------- + base_fps : list | str + One or more baseline .h5 filepaths representing non-biased data to + use + to correct the biased dataset (observed historical in Cannon et. + al. (2015)). This is typically several years of WTK or NSRDB files. + bias_fps : list | str + One or more biased .nc or .h5 filepaths representing the biased + data + to be compared with the baseline data (modeled historical in Cannon + et. al. (2015)). This is typically several years of GCM .nc files. + bias_fut_fps : list | str + Consistent data to `bias_fps` but for a different time period + (modeled + future in Cannon et. al. (2015)). This is the dataset that would be + corrected, while `bias_fsp` is used to provide a transformation map + with the baseline data. + base_dset : str + A single dataset from the base_fps to retrieve. In the case of wind + components, this can be U_100m or V_100m which will retrieve + windspeed and winddirection and derive the U/V component. + bias_feature : str + This is the biased feature from bias_fps to retrieve. This should + be a single feature name corresponding to base_dset + distance_upper_bound : float + Upper bound on the nearest neighbor distance in decimal degrees. + This should be the approximate resolution of the low-resolution + bias data. None (default) will calculate this based on the median + distance between points in bias_fps + target : tuple + (lat, lon) lower left corner of raster to retrieve from bias_fps. + If None then the lower left corner of the full domain will be used. + shape : tuple + (rows, cols) grid size to retrieve from bias_fps. If None then the + full domain shape will be used. + base_handler : str + Name of rex resource handler or sup3r.preprocessing.data_handling + class to be retrieved from the rex/sup3r library. If a + sup3r.preprocessing.data_handling class is used, all data will be + loaded in this class' initialization and the subsequent bias + calculation will be done in serial + bias_handler : str + Name of the bias data handler class to be retrieved from the + sup3r.preprocessing.data_handling library. + base_handler_kwargs : dict | None + Optional kwargs to send to the initialization of the base_handler + class + bias_handler_kwargs : dict | None + Optional kwargs to send to the initialization of the bias_handler + class + decimals : int | None + Option to round bias and base data to this number of + decimals, this gets passed to np.around(). If decimals + is negative, it specifies the number of positions to + the left of the decimal point. + match_zero_rate : bool + Option to fix the frequency of zero values in the biased data. The + lowest percentile of values in the biased data will be set to zero + to match the percentile of zeros in the base data. If + SkillAssessment is being run and this is True, the distributions + will not be mean-centered. This helps resolve the issue where + global climate models produce too many days with small + precipitation totals e.g., the "drizzle problem" [Polade2014]_. + dist : str, default="empirical", + Define the type of distribution, which can be "empirical" or any + parametric distribution defined in "scipy". + n_quantiles : int, default=101 + Defines the number of quantiles (between 0 and 1) for an empirical + distribution. + sampling : str, default="linear", + Defines how the quantiles are sampled. For instance, 'linear' will + result in a linearly spaced quantiles. Other options are: 'log' + and 'invlog'. + log_base : int or float, default=10 + Log base value if sampling is "log" or "invlog". + zero_rate_threshold : float, default=0.01 + !!!!TODO!!!! + + See Also + -------- + sup3r.bias.bias_transforms.local_qdm_bc : + Bias correction using QDM. + sup3r.preprocessing.data_handling.DataHandler : + Bias correction using QDM directly from a derived handler. + rex.utilities.bc_utils.QuantileDeltaMapping + Quantile Delta Mapping method and support functions. Since + :mod:`rex.utilities.bc_utils` is used here, the arguments + ``dist``, ``n_quantiles``, ``sampling``, and ``log_base`` + must be consitent with that package/module. + + Notes + ----- + One way of using this class is by saving the distributions definitions + obtained here with the method :meth:`.write_outputs` and then use that + file with :func:`~sup3r.bias.bias_transforms.local_qdm_bc` or through + a derived :class:`~sup3r.preprocessing.data_handling.base.DataHandler`. + **ATTENTION**, be careful handling that file of parameters. There is + no checking process and one could missuse the correction estimated for + the wrong dataset. + + References + ---------- + .. [Cannon2015] Cannon, A. J., Sobie, S. R., & Murdock, T. Q. (2015). + Bias correction of GCM precipitation by quantile mapping: how well + do methods preserve changes in quantiles and extremes?. Journal of + Climate, 28(17), 6938-6959. + """ + super().__init__(base_fps=base_fps, + bias_fps=bias_fps, + bias_fut_fps=bias_fut_fps, + base_dset=base_dset, + bias_feature=bias_feature, + distance_upper_bound=distance_upper_bound, + target=target, + shape=shape, + base_handler=base_handler, + bias_handler=bias_handler, + base_handler_kwargs=base_handler_kwargs, + bias_handler_kwargs=bias_handler_kwargs, + decimals=decimals, + match_zero_rate=match_zero_rate, + n_quantiles=n_quantiles, + dist=dist, + sampling=sampling, + log_base=log_base, + ) + self.zero_rate_threshold = zero_rate_threshold + + # pylint: disable=W0613 + @classmethod + def _run_single( + cls, + bias_data, + bias_fut_data, + base_fps, + bias_feature, + base_dset, + base_gid, + base_handler, + daily_reduction, + decimals, + sampling, + n_samples, + log_base, + *, + zero_rate_threshold, + base_dh_inst=None, + ): + """Estimate probability distributions at a single site + + ATTENTION: This should be refactored. There is too much redundancy in + the code. Let's make it work first, and optimize later. + """ + base_data, base_ti = cls.get_base_data( + base_fps, + base_dset, + base_gid, + base_handler, + daily_reduction=daily_reduction, + decimals=decimals, + base_dh_inst=base_dh_inst, + ) + + out = cls.get_qdm_params( + bias_data, + bias_fut_data, + base_data, + bias_feature, + base_dset, + sampling, + n_samples, + log_base, + ) + + out[f'{base_dset}_zero_rate'] = cls.zero_precipitation_rate( + base_data, + zero_rate_threshold, + ) + + return out + + def run( + self, + fp_out=None, + max_workers=None, + daily_reduction='avg', + fill_extend=True, + smooth_extend=0, + smooth_interior=0, + zero_rate_threshold=0.01, + ): + """Estimate the statistical distributions for each location + + Parameters + ---------- + fp_out : str | None + Optional .h5 output file to write scalar and adder arrays. + max_workers : int, optional + Number of workers to run in parallel. 1 is serial and None is all + available. + daily_reduction : None | str + Option to do a reduction of the hourly+ source base data to daily + data. Can be None (no reduction, keep source time frequency), "avg" + (daily average), "max" (daily max), "min" (daily min), + "sum" (daily sum/total) + + Returns + ------- + out : dict + Dictionary with parameters defining the statistical distributions + for each of the three given datasets. Each value has dimensions + (lat, lon, n-parameters). + """ + + logger.debug('Calculate CDF parameters for QDM') + + logger.info( + 'Initialized params with shape: {}'.format( + self.bias_gid_raster.shape + ) + ) + self.bad_bias_gids = [] + + # sup3r DataHandler opening base files will load all data in parallel + # during the init and should not be passed in parallel to workers + if isinstance(self.base_dh, DataHandler): + max_workers = 1 + + if max_workers == 1: + logger.debug('Running serial calculation.') + for i, bias_gid in enumerate(self.bias_meta.index): + raster_loc = np.where(self.bias_gid_raster == bias_gid) + _, base_gid = self.get_base_gid(bias_gid) + + if not base_gid.any(): + self.bad_bias_gids.append(bias_gid) + logger.debug( + f'No base data for bias_gid: {bias_gid}. ' + 'Adding it to bad_bias_gids' + ) + else: + bias_data = self.get_bias_data(bias_gid) + bias_fut_data = self.get_bias_data( + bias_gid, self.bias_fut_dh + ) + single_out = self._run_single( + bias_data, + bias_fut_data, + self.base_fps, + self.bias_feature, + self.base_dset, + base_gid, + self.base_handler, + daily_reduction, + self.decimals, + sampling=self.sampling, + n_samples=self.n_quantiles, + log_base=self.log_base, + base_dh_inst=self.base_dh, + zero_rate_threshold=zero_rate_threshold, + ) + for key, arr in single_out.items(): + self.out[key][raster_loc] = arr + + logger.info( + 'Completed bias calculations for {} out of {} ' + 'sites'.format(i + 1, len(self.bias_meta)) + ) + + else: + logger.debug( + 'Running parallel calculation with {} workers.'.format( + max_workers + ) + ) + with ProcessPoolExecutor(max_workers=max_workers) as exe: + futures = {} + for bias_gid in self.bias_meta.index: + raster_loc = np.where(self.bias_gid_raster == bias_gid) + _, base_gid = self.get_base_gid(bias_gid) + + if not base_gid.any(): + self.bad_bias_gids.append(bias_gid) + else: + bias_data = self.get_bias_data(bias_gid) + bias_fut_data = self.get_bias_data( + bias_gid, self.bias_fut_dh + ) + future = exe.submit( + self._run_single, + bias_data, + bias_fut_data, + self.base_fps, + self.bias_feature, + self.base_dset, + base_gid, + self.base_handler, + daily_reduction, + self.decimals, + sampling=self.sampling, + n_samples=self.n_quantiles, + log_base=self.log_base, + zero_rate_threshold=zero_rate_threshold, + ) + futures[future] = raster_loc + + logger.debug('Finished launching futures.') + for i, future in enumerate(as_completed(futures)): + raster_loc = futures[future] + single_out = future.result() + for key, arr in single_out.items(): + self.out[key][raster_loc] = arr + + logger.info( + 'Completed bias calculations for {} out of {} ' + 'sites'.format(i + 1, len(futures)) + ) + + logger.info('Finished calculating bias correction factors.') + + self.out = self.fill_and_smooth( + self.out, fill_extend, smooth_extend, smooth_interior + ) + + self.write_outputs(fp_out, self.out) + + return copy.deepcopy(self.out) + @staticmethod def _window_mask(d, d0, window_size=31): d_start = d0 - window_size From 67a3387833c934ab4b463939ccda1a2e9a0f0744 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 3 Jun 2024 11:13:32 -0600 Subject: [PATCH 13/92] Don't need to overwrite __init__ if defining threshold in the runtime --- sup3r/bias/qdm.py | 152 ---------------------------------------------- 1 file changed, 152 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 933144aa8..34c976932 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -518,158 +518,6 @@ class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): # - Estimate K = / """ - def __init__( - self, - base_fps, - bias_fps, - bias_fut_fps, - base_dset, - bias_feature, - distance_upper_bound=None, - target=None, - shape=None, - base_handler='Resource', - bias_handler='DataHandlerNCforCC', - base_handler_kwargs=None, - bias_handler_kwargs=None, - decimals=None, - match_zero_rate=False, - n_quantiles=101, - dist='empirical', - sampling='linear', - log_base=10, - zero_rate_threshold=0.01, - ): - """ - Parameters - ---------- - base_fps : list | str - One or more baseline .h5 filepaths representing non-biased data to - use - to correct the biased dataset (observed historical in Cannon et. - al. (2015)). This is typically several years of WTK or NSRDB files. - bias_fps : list | str - One or more biased .nc or .h5 filepaths representing the biased - data - to be compared with the baseline data (modeled historical in Cannon - et. al. (2015)). This is typically several years of GCM .nc files. - bias_fut_fps : list | str - Consistent data to `bias_fps` but for a different time period - (modeled - future in Cannon et. al. (2015)). This is the dataset that would be - corrected, while `bias_fsp` is used to provide a transformation map - with the baseline data. - base_dset : str - A single dataset from the base_fps to retrieve. In the case of wind - components, this can be U_100m or V_100m which will retrieve - windspeed and winddirection and derive the U/V component. - bias_feature : str - This is the biased feature from bias_fps to retrieve. This should - be a single feature name corresponding to base_dset - distance_upper_bound : float - Upper bound on the nearest neighbor distance in decimal degrees. - This should be the approximate resolution of the low-resolution - bias data. None (default) will calculate this based on the median - distance between points in bias_fps - target : tuple - (lat, lon) lower left corner of raster to retrieve from bias_fps. - If None then the lower left corner of the full domain will be used. - shape : tuple - (rows, cols) grid size to retrieve from bias_fps. If None then the - full domain shape will be used. - base_handler : str - Name of rex resource handler or sup3r.preprocessing.data_handling - class to be retrieved from the rex/sup3r library. If a - sup3r.preprocessing.data_handling class is used, all data will be - loaded in this class' initialization and the subsequent bias - calculation will be done in serial - bias_handler : str - Name of the bias data handler class to be retrieved from the - sup3r.preprocessing.data_handling library. - base_handler_kwargs : dict | None - Optional kwargs to send to the initialization of the base_handler - class - bias_handler_kwargs : dict | None - Optional kwargs to send to the initialization of the bias_handler - class - decimals : int | None - Option to round bias and base data to this number of - decimals, this gets passed to np.around(). If decimals - is negative, it specifies the number of positions to - the left of the decimal point. - match_zero_rate : bool - Option to fix the frequency of zero values in the biased data. The - lowest percentile of values in the biased data will be set to zero - to match the percentile of zeros in the base data. If - SkillAssessment is being run and this is True, the distributions - will not be mean-centered. This helps resolve the issue where - global climate models produce too many days with small - precipitation totals e.g., the "drizzle problem" [Polade2014]_. - dist : str, default="empirical", - Define the type of distribution, which can be "empirical" or any - parametric distribution defined in "scipy". - n_quantiles : int, default=101 - Defines the number of quantiles (between 0 and 1) for an empirical - distribution. - sampling : str, default="linear", - Defines how the quantiles are sampled. For instance, 'linear' will - result in a linearly spaced quantiles. Other options are: 'log' - and 'invlog'. - log_base : int or float, default=10 - Log base value if sampling is "log" or "invlog". - zero_rate_threshold : float, default=0.01 - !!!!TODO!!!! - - See Also - -------- - sup3r.bias.bias_transforms.local_qdm_bc : - Bias correction using QDM. - sup3r.preprocessing.data_handling.DataHandler : - Bias correction using QDM directly from a derived handler. - rex.utilities.bc_utils.QuantileDeltaMapping - Quantile Delta Mapping method and support functions. Since - :mod:`rex.utilities.bc_utils` is used here, the arguments - ``dist``, ``n_quantiles``, ``sampling``, and ``log_base`` - must be consitent with that package/module. - - Notes - ----- - One way of using this class is by saving the distributions definitions - obtained here with the method :meth:`.write_outputs` and then use that - file with :func:`~sup3r.bias.bias_transforms.local_qdm_bc` or through - a derived :class:`~sup3r.preprocessing.data_handling.base.DataHandler`. - **ATTENTION**, be careful handling that file of parameters. There is - no checking process and one could missuse the correction estimated for - the wrong dataset. - - References - ---------- - .. [Cannon2015] Cannon, A. J., Sobie, S. R., & Murdock, T. Q. (2015). - Bias correction of GCM precipitation by quantile mapping: how well - do methods preserve changes in quantiles and extremes?. Journal of - Climate, 28(17), 6938-6959. - """ - super().__init__(base_fps=base_fps, - bias_fps=bias_fps, - bias_fut_fps=bias_fut_fps, - base_dset=base_dset, - bias_feature=bias_feature, - distance_upper_bound=distance_upper_bound, - target=target, - shape=shape, - base_handler=base_handler, - bias_handler=bias_handler, - base_handler_kwargs=base_handler_kwargs, - bias_handler_kwargs=bias_handler_kwargs, - decimals=decimals, - match_zero_rate=match_zero_rate, - n_quantiles=n_quantiles, - dist=dist, - sampling=sampling, - log_base=log_base, - ) - self.zero_rate_threshold = zero_rate_threshold - # pylint: disable=W0613 @classmethod def _run_single( From d53d8b63e2cff88659d7188824ac44c604213858 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 3 Jun 2024 11:37:08 -0600 Subject: [PATCH 14/92] doc: More details on what is PresRat --- sup3r/bias/qdm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 34c976932..13c8741f4 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -502,10 +502,10 @@ class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): The PresRat correction [Pierce2015]_ is defined as the combination of three steps: - * Use the model-predicted change ratio - require CDFs; - * The treatment of zero-precipitation days - require fraction of dry days; - * The final correction factor (K) to preserve the mean - require mean - estimate; + * Use the model-predicted change ratio (with the CDFs); + * The treatment of zero-precipitation days (with the fraction of dry days); + * The final correction factor (K) to preserve the mean (ratio between both + estimated means); References ---------- From cf16c8222e04cd03cea3bb04514bb9e81d24b7a1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 6 Jun 2024 10:12:34 -0600 Subject: [PATCH 15/92] Testing apply_zero_precipitation_rate() --- tests/bias/test_qdm_bias_correction.py | 96 +++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_qdm_bias_correction.py b/tests/bias/test_qdm_bias_correction.py index 457b5411a..5eff2cae5 100644 --- a/tests/bias/test_qdm_bias_correction.py +++ b/tests/bias/test_qdm_bias_correction.py @@ -11,7 +11,13 @@ from sup3r import CONFIG_DIR, TEST_DATA_DIR from sup3r.models import Sup3rGan from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy -from sup3r.bias import local_qdm_bc, QuantileDeltaMappingCorrection +from sup3r.bias import ( + apply_zero_precipitation_rate, + local_qdm_bc, + local_presrat_bc, + PresRat, + QuantileDeltaMappingCorrection, +) from sup3r.preprocessing.data_handling import DataHandlerNC, DataHandlerNCforCC FP_NSRDB = os.path.join(TEST_DATA_DIR, "test_nsrdb_co_2018.h5") @@ -465,5 +471,89 @@ def test_fwp_integration(tmp_path): ), "V reference offset is 1" delta = bc_fwp.run_chunk() - fwp.run_chunk() - assert delta[..., 0].mean() < 0, "Predicted U should trend <0" - assert delta[..., 1].mean() > 0, "Predicted V should trend >0" + assert delta[..., 0].mean() < 0, 'Predicted U should trend <0' + assert delta[..., 1].mean() > 0, 'Predicted V should trend >0' + + +def test_apply_zero_precipitation_rate(): + data = np.array([5, 0.1, 3, 0.2, 1]) + out = apply_zero_precipitation_rate(data, 0.25) + + assert np.allclose(np.array([5.0, 0.0, 3, 0.2, 1.0]), out, equal_nan=True) + + +def test_apply_zero_precipitation_rate_nan(): + data = np.array([5, 0.1, np.nan, 0.2, 1]) + out = apply_zero_precipitation_rate(data, 0.25) + + assert np.allclose( + np.array([5.0, 0.0, np.nan, 0.2, 1.0]), out, equal_nan=True + ) + + +def test_apply_zero_precipitation_rate_2D(): + data = np.array( + [ + [5, 0.1, np.nan, 0.2, 1], + [5, 0.1, 3, 0.2, 1], + ] + ) + out = apply_zero_precipitation_rate(data, [0.25, 0.41]) + + assert np.allclose( + np.array([[5.0, 0.0, np.nan, 0.2, 1.0], [5.0, 0.0, 3, 0.0, 1.0]]), + out, + equal_nan=True, + ) + +@pytest.mark.skip() +def test_presrat(fp_fut_cc): + """Test PresRat correction procedure + + Basic standard run. Using only required arguments. If this fails, + something fundamental is wrong. + """ + calc = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # A high zero_rate_threshold to gets at least something. + out = calc.run(max_workers=1, zero_rate_threshold=50) + + # Guarantee that we have some actual values, otherwise most of the + # remaining tests would be useless + for v in out: + assert np.isfinite(out[v]).any(), 'Something wrong, all CDFs are NaN.' + + # Check possible range + for v in out: + assert np.nanmin(out[v]) > 0, f'{v} should be all greater than zero.' + assert np.nanmax(out[v]) < 1300, f'{v} should be all less than 1300.' + + # Each location can be all finite or all NaN, but not both + for v in (v for v in out if len(out[v].shape) > 2): + tmp = np.isfinite(out[v].reshape(-1, *out[v].shape[2:])) + assert np.all( + np.all(tmp, axis=1) == ~np.all(~tmp, axis=1) + ), f'For each location of {v} it should be all finite or nonte' + + +@pytest.mark.skip() +def test_presrat_transform(presrat_params): + """ + WIP: Confirm it runs, but don't verify anything yet. + """ + data = np.ones((*FP_CC_LAT_LON.shape[:-1], 2)) + corrected = local_presrat_bc( + data, FP_CC_LAT_LON, 'ghi', 'rsds', presrat_params + ) + + assert not np.isnan(corrected).all(), "Can't compare if only NaN" + assert not np.allclose(data, corrected, equal_nan=False) From 41baa19656b87e8c6febcb5511333c3b1bfaedeb Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 10 Jun 2024 18:18:40 -0600 Subject: [PATCH 16/92] test: zero_precipitation_rate() Checking some edge cases that were wrong before. --- tests/bias/test_presrat_bias_correction.py | 138 +++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 tests/bias/test_presrat_bias_correction.py diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py new file mode 100644 index 000000000..5599965a8 --- /dev/null +++ b/tests/bias/test_presrat_bias_correction.py @@ -0,0 +1,138 @@ + + +@pytest.fixture(scope='module') +def fp_fut_cc(tmpdir_factory): + """Sample future CC dataset + + The same CC but with an offset (75.0) and negligible noise. + """ + fn = tmpdir_factory.mktemp('data').join('test_mf.nc') + ds = xr.open_dataset(FP_CC) + # Adding an offset + ds['rsds'] += 75.0 + # adding a noise + ds['rsds'] += np.random.randn(*ds['rsds'].shape) + ds.to_netcdf(fn) + # DataHandlerNCforCC requires a string + fn = str(fn) + return fn + + +@pytest.fixture(scope='module') +def fp_fut_cc_notrend(tmpdir_factory): + """Sample future CC dataset identical to historical CC + + This is currently a copy of FP_CC, thus no trend on time. + """ + fn = tmpdir_factory.mktemp('data').join('test_mf_notrend.nc') + shutil.copyfile(FP_CC, fn) + # DataHandlerNCforCC requires a string + fn = str(fn) + return fn + + +@pytest.fixture(scope='module') +def dist_params(tmpdir_factory, fp_fut_cc): + """Distribution parameters for standard datasets + + Use the standard datasets to estimate the distributions and save + in a temporary place to be re-used + """ + calc = QuantileDeltaMappingCorrection( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC', + ) + fn = tmpdir_factory.mktemp('params').join('standard.h5') + _ = calc.run(max_workers=1, fp_out=fn) + + # DataHandlerNCforCC requires a string + fn = str(fn) + + return fn + + +@pytest.fixture(scope='module') +def presrat_params(tmpdir_factory, fp_fut_cc): + """PresRat parameters for standard datasets + + Use the standard datasets to estimate the distributions and save + in a temporary place to be re-used + """ + calc = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC', + ) + fn = tmpdir_factory.mktemp('params').join('presrat.h5') + # Physically non-sense threshold choosed to result in gridpoints with and + # without zero rate correction for the given testing dataset. + _ = calc.run(zero_rate_threshold=80, fp_out=fn) + + # DataHandlerNCforCC requires a string + fn = str(fn) + + return fn + + +def test_zero_precipitation_rate(): + """Zero rate estimate with extremme thresholds""" + f = ZeroRateMixin().zero_precipitation_rate + arr = np.random.randn(100) + + rate = f(arr, threshold=np.median(arr)) + assert rate == 0.5 + + +def test_zero_precipitation_rate_extremes(): + """Zero rate estimate with extremme thresholds""" + f = ZeroRateMixin().zero_precipitation_rate + arr = np.arange(10) + + rate = f(arr, threshold=-1) + assert rate == 0 + + rate = f(arr, threshold=0) + assert rate == 0 + + # Remember, 9 is the last value, i.e. the 10th value + rate = f(arr, threshold=9) + assert rate == 0.9 + + rate = f(arr, threshold=100) + assert rate == 1 + + +def test_zero_precipitation_rate_nanonly(): + """Zero rate estimate with only NaNs gives NaN""" + f = ZeroRateMixin().zero_precipitation_rate + arr = np.arange(10) + + # All NaN gives NaN rate + rate = f(np.nan * arr) + assert np.isnan(rate) + + +def test_zero_precipitation_rate_nan(): + """Zero rate estimate with NaNs + + NaN shouldn't be counted to find the rate. + """ + f = ZeroRateMixin().zero_precipitation_rate + arr = np.arange(10) + + r1 = f(arr, threshold=5) + r2 = f(np.concatenate([5*[np.nan], arr]), threshold=5) + assert r1 == r2 From a1d484ad8eddfa015590d430da0a3fd25fdcf8db Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Wed, 12 Jun 2024 11:40:49 -0600 Subject: [PATCH 17/92] test: presrat_zero_rate with multiple thresholds --- tests/bias/test_presrat_bias_correction.py | 91 ++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 5599965a8..f31eb8907 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -136,3 +136,94 @@ def test_zero_precipitation_rate_nan(): r1 = f(arr, threshold=5) r2 = f(np.concatenate([5*[np.nan], arr]), threshold=5) assert r1 == r2 + +""" + breakpoint() + + # Physically non sense threshold. + out = calc.run(zero_rate_threshold=0) + + assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' + zero_rate = out['ghi_zero_rate'] + assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" + assert np.all(zero_rate==0), "It should be all zero percent" + + # Physically non sense threshold. + out = calc.run(zero_rate_threshold=1e6) + + assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' + zero_rate = out['ghi_zero_rate'] + assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" + assert np.all(zero_rate==1), "It should be all zero percent" +""" + +def test_presrat_zero_rate(fp_fut_cc): + """Estimate zero_rate within PresRat.run()""" + calc = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # Physically non sense threshold. + out = calc.run(zero_rate_threshold=50) + + assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' + zero_rate = out['ghi_zero_rate'] + assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" + assert np.all((zero_rate>=0) & (zero_rate<=1)), "Out of range [0, 1]" + + +def test_presrat_zero_rate_threshold_zero(fp_fut_cc): + """Estimate zero_rate within PresRat.run(), zero threshold + + This should give a zero rate answer, since all values are higher. + """ + calc = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # Physically non sense threshold. + out = calc.run(zero_rate_threshold=0) + + assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' + zero_rate = out['ghi_zero_rate'] + assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" + assert np.all(zero_rate==0), "Threshold=0, rate should be 0" + + +def test_presrat_zero_rate_threshold_1e9(fp_fut_cc): + """Estimate zero_rate within PresRat.run(), zero threshold + + This should give a zero rate answer, since all values are lower. + """ + calc = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # Physically non sense threshold. + out = calc.run(zero_rate_threshold=1e9) + + assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' + zero_rate = out['ghi_zero_rate'] + assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" + assert np.all(zero_rate==1), "Threshold=0, rate should be 0" From d3a7ba47a56f6608545d0cd85f8a2253216f4bf8 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Wed, 12 Jun 2024 11:49:01 -0600 Subject: [PATCH 18/92] fix, doc: Typo 'monthly' --- sup3r/bias/bias_calc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 05dfd7c7d..5fa77e98d 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -984,7 +984,7 @@ def run(self, class MonthlyLinearCorrection(LinearCorrection): """Calculate linear correction *scalar +adder factors to bias correct data - This calculation operates on single bias sites on a montly basis + This calculation operates on single bias sites on a monthly basis """ NT = 12 @@ -1044,7 +1044,7 @@ class MonthlyScalarCorrection(MonthlyLinearCorrection): scalar factors are computed as mean(base_data) / mean(bias_data). Adder factors are still written but are exactly zero. - This calculation operates on single bias sites on a montly basis + This calculation operates on single bias sites on a monthly basis """ @staticmethod From 3ea12ee479859f81912cdb877de4d21fcff2a679 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 13 Jun 2024 11:00:58 -0600 Subject: [PATCH 19/92] test: apply_zero_precipitation_rate() --- tests/bias/test_presrat_bias_correction.py | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index f31eb8907..72a0d1a32 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -227,3 +227,35 @@ def test_presrat_zero_rate_threshold_1e9(fp_fut_cc): zero_rate = out['ghi_zero_rate'] assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" assert np.all(zero_rate==1), "Threshold=0, rate should be 0" + + +def test_apply_zero_precipitation_rate(): + data = np.array([[[5, 0.1, 3, 0.2, 1]]]) + out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) + + assert np.allclose([5.0, 0.0, 3, 0.2, 1.0], out, equal_nan=True) + + +def test_apply_zero_precipitation_rate_nan(): + data = np.array([[[5, 0.1, np.nan, 0.2, 1]]]) + out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) + + assert np.allclose( + [5.0, 0.0, np.nan, 0.2, 1.0], out, equal_nan=True + ) + + +def test_apply_zero_precipitation_rate_2D(): + data = np.array( + [[ + [5, 0.1, np.nan, 0.2, 1], + [5, 0.1, 3, 0.2, 1], + ]] + ) + out = apply_zero_precipitation_rate(data, np.array([[[0.25], [0.41]]])) + + assert np.allclose( + [[5.0, 0.0, np.nan, 0.2, 1.0], [5.0, 0.0, 3, 0.0, 1.0]], + out, + equal_nan=True, + ) From d3a9d3f0d5d945f9570b6cf16e25de933ec3e3fd Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 13 Jun 2024 11:04:20 -0600 Subject: [PATCH 20/92] test: Minimalist check for test_presrat_transform() --- tests/bias/test_presrat_bias_correction.py | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 72a0d1a32..5d6ade493 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -259,3 +259,56 @@ def test_apply_zero_precipitation_rate_2D(): out, equal_nan=True, ) + +def test_presrat(fp_fut_cc): + """Test PresRat correction procedure + + Basic standard run. Using only required arguments. If this fails, + something fundamental is wrong. + """ + calc = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # A high zero_rate_threshold to gets at least something. + out = calc.run(zero_rate_threshold=50) + + # Guarantee that we have some actual values, otherwise most of the + # remaining tests would be useless + for v in out: + assert np.isfinite(out[v]).any(), 'Something wrong, all CDFs are NaN.' + + # Check possible range + for v in out: + assert np.nanmin(out[v]) > 0, f'{v} should be all greater than zero.' + assert np.nanmax(out[v]) < 1300, f'{v} should be all less than 1300.' + + # Each location can be all finite or all NaN, but not both + for v in (v for v in out if len(out[v].shape) > 2): + tmp = np.isfinite(out[v].reshape(-1, *out[v].shape[2:])) + assert np.all( + np.all(tmp, axis=1) == ~np.all(~tmp, axis=1) + ), f'For each location of {v} it should be all finite or nonte' + + +def test_presrat_transform(presrat_params, fp_fut_cc): + """ + WIP: Confirm it runs, but don't verify anything yet. + """ + data = np.ones((*FP_CC_LAT_LON.shape[:-1], 3)) + time = pd.to_datetime( + ['2015-01-01 12:00:00', '2015-01-02 12:00:00', '2015-01-01 12:00:00'] + ) + corrected = local_presrat_bc( + data, time, FP_CC_LAT_LON, 'ghi', 'rsds', presrat_params + ) + + assert not np.isnan(corrected).all(), "Can't compare if only NaN" + assert not np.allclose(data, corrected, equal_nan=False) From 5d9a09d03c3de723358db3c3ab2eecf3b58d3f04 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 13 Jun 2024 12:35:11 -0600 Subject: [PATCH 21/92] test: parallel results must equal serial --- tests/bias/test_presrat_bias_correction.py | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 5d6ade493..a73cae12a 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -157,6 +157,48 @@ def test_zero_precipitation_rate_nan(): assert np.all(zero_rate==1), "It should be all zero percent" """ + +@pytest.mark.parametrize("threshold", [0, 50, 1e6]) +def test_parallel(fp_fut_cc, threshold): + """Running in parallel must not alter results + + Check with different thresholds that will result in different zero rates. + """ + s = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # Physically non sense threshold. + out_s = s.run(max_workers=1, zero_rate_threshold=threshold) + + p = PresRat( + FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # Physically non sense threshold. + out_p = p.run(max_workers=2, zero_rate_threshold=threshold) + + for k in out_s.keys(): + assert k in out_p, f'Missing {k} in parallel run' + assert np.allclose( + out_s[k], out_p[k], equal_nan=True + ), f'Different results for {k}' + + def test_presrat_zero_rate(fp_fut_cc): """Estimate zero_rate within PresRat.run()""" calc = PresRat( From 6a89536b620534b862175bf4ae4210db7eaf5dd5 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 17 Jun 2024 10:39:25 -0600 Subject: [PATCH 22/92] style: Fixing code style --- tests/bias/test_presrat_bias_correction.py | 28 ++++++++++++---------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index a73cae12a..db6bbe393 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -134,9 +134,10 @@ def test_zero_precipitation_rate_nan(): arr = np.arange(10) r1 = f(arr, threshold=5) - r2 = f(np.concatenate([5*[np.nan], arr]), threshold=5) + r2 = f(np.concatenate([5 * [np.nan], arr]), threshold=5) assert r1 == r2 + """ breakpoint() @@ -158,7 +159,7 @@ def test_zero_precipitation_rate_nan(): """ -@pytest.mark.parametrize("threshold", [0, 50, 1e6]) +@pytest.mark.parametrize('threshold', [0, 50, 1e6]) def test_parallel(fp_fut_cc, threshold): """Running in parallel must not alter results @@ -242,8 +243,8 @@ def test_presrat_zero_rate_threshold_zero(fp_fut_cc): assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" - assert np.all(zero_rate==0), "Threshold=0, rate should be 0" + assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' + assert np.all(zero_rate == 0), 'Threshold=0, rate should be 0' def test_presrat_zero_rate_threshold_1e9(fp_fut_cc): @@ -267,8 +268,8 @@ def test_presrat_zero_rate_threshold_1e9(fp_fut_cc): assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" - assert np.all(zero_rate==1), "Threshold=0, rate should be 0" + assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' + assert np.all(zero_rate == 1), 'Threshold=0, rate should be 0' def test_apply_zero_precipitation_rate(): @@ -282,17 +283,17 @@ def test_apply_zero_precipitation_rate_nan(): data = np.array([[[5, 0.1, np.nan, 0.2, 1]]]) out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) - assert np.allclose( - [5.0, 0.0, np.nan, 0.2, 1.0], out, equal_nan=True - ) + assert np.allclose([5.0, 0.0, np.nan, 0.2, 1.0], out, equal_nan=True) def test_apply_zero_precipitation_rate_2D(): data = np.array( - [[ - [5, 0.1, np.nan, 0.2, 1], - [5, 0.1, 3, 0.2, 1], - ]] + [ + [ + [5, 0.1, np.nan, 0.2, 1], + [5, 0.1, 3, 0.2, 1], + ] + ] ) out = apply_zero_precipitation_rate(data, np.array([[[0.25], [0.41]]])) @@ -302,6 +303,7 @@ def test_apply_zero_precipitation_rate_2D(): equal_nan=True, ) + def test_presrat(fp_fut_cc): """Test PresRat correction procedure From 4461c413618877a21987216a9bf479dc2605324c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 09:03:32 -0600 Subject: [PATCH 23/92] test: test_presrat_transform_nochanges() --- tests/bias/test_presrat_bias_correction.py | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index db6bbe393..4c4f92d8c 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -356,3 +356,51 @@ def test_presrat_transform(presrat_params, fp_fut_cc): assert not np.isnan(corrected).all(), "Can't compare if only NaN" assert not np.allclose(data, corrected, equal_nan=False) + + +def test_presrat_transform_nochanges(presrat_nochanges_params, fp_fut_cc_notrend): + """No changes if the three datasets are the same and no zeros""" + # data = np.ones((*FP_CC_LAT_LON.shape[:-1], 3)) + # time = pd.to_datetime( ['2015-01-01 12:00:00', '2015-01-02 12:00:00', '2015-01-01 12:00:00']) + breakpoint() + ds = xr.open_dataset(fp_fut_cc_notrend) + da = ds['rsds'].compute().transpose('lat', 'lon', 'time') + # Unfortunatelly, _get_factors() assume latitude as descending + da = da.sortby('lat', ascending=False) + data = da.data + time = pd.to_datetime(da.time) + # latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), axis=-1).astype('float32') + latlon = np.stack(np.meshgrid(da['lon'] - 360, da['lat']), axis=-1)[ + :, :, ::-1 + ].astype('float32') + # da.sel(lat=latlon[ii,jj,0], method="nearest").sel(lon=latlon[ii,jj,1], method="nearest")[0] - data[ii,jj,0] + for ii in range(4): + for jj in range(4): + np.allclose( + da.sel(lat=latlon[ii, jj, 0], method='nearest').sel( + lon=latlon[ii, jj, 1] + 360, method='nearest' + )[0], + data[ii, jj, 0], + ) + np.allclose(latlon, FP_CC_LAT_LON) + + # data = np.ones((*FP_CC_LAT_LON.shape[:-1], 3)) + # time = pd.to_datetime( + # ['2015-01-01 12:00:00', '2015-01-02 12:00:00', '2015-01-01 12:00:00'] + # ) + corrected = local_presrat_bc( + data, + time, + latlon, + 'ghi', + 'rsds', + presrat_nochanges_params, + ) + + assert not np.isnan(corrected).all(), "Can't compare if only NaN" + assert np.allclose(data, corrected, equal_nan=False) + + +""" +Test concepts to implement: +""" From df92e2c7c6a9f4a3ad8d0b1a07680c3536942580 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 09:04:41 -0600 Subject: [PATCH 24/92] test: Missing to update PresRat standard params for testing --- tests/bias/test_presrat_bias_correction.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 4c4f92d8c..23d7a2810 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -32,13 +32,13 @@ def fp_fut_cc_notrend(tmpdir_factory): @pytest.fixture(scope='module') -def dist_params(tmpdir_factory, fp_fut_cc): - """Distribution parameters for standard datasets +def presrat_params(tmpdir_factory, fp_fut_cc): + """PresRat parameters for standard datasets Use the standard datasets to estimate the distributions and save in a temporary place to be re-used """ - calc = QuantileDeltaMappingCorrection( + calc = PresRat( FP_NSRDB, FP_CC, fp_fut_cc, @@ -49,8 +49,10 @@ def dist_params(tmpdir_factory, fp_fut_cc): distance_upper_bound=0.7, bias_handler='DataHandlerNCforCC', ) - fn = tmpdir_factory.mktemp('params').join('standard.h5') - _ = calc.run(max_workers=1, fp_out=fn) + fn = tmpdir_factory.mktemp('params').join('presrat.h5') + # Physically non-sense threshold choosed to result in gridpoints with and + # without zero rate correction for the given testing dataset. + _ = calc.run(max_workers=1, zero_rate_threshold=80, fp_out=fn) # DataHandlerNCforCC requires a string fn = str(fn) From 35931a39aa2c33d196bbfc8c2c90e1b7da433599 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 09:21:30 -0600 Subject: [PATCH 25/92] test: Adding back a small noise on future projections This should avoid singularities while not changing the averages. --- tests/bias/test_presrat_bias_correction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 23d7a2810..6fd6fad9a 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -10,8 +10,8 @@ def fp_fut_cc(tmpdir_factory): ds = xr.open_dataset(FP_CC) # Adding an offset ds['rsds'] += 75.0 - # adding a noise - ds['rsds'] += np.random.randn(*ds['rsds'].shape) + # adding a small noise + ds['rsds'] += 1e-4 * np.random.randn(*ds['rsds'].shape) ds.to_netcdf(fn) # DataHandlerNCforCC requires a string fn = str(fn) From 5f414b3231f876464a1d37587f2f389c805496b0 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 09:34:18 -0600 Subject: [PATCH 26/92] test: Defining some resources used in the tests It should help to follow the logic of the tests applied. --- tests/bias/test_presrat_bias_correction.py | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 6fd6fad9a..2d37aa917 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -1,3 +1,47 @@ +"""Validating PresRat correction procedures + + +Relevant sources used in the tests: +- fp_fut_cc: Future dataset based on FP_CC + an offset + small noise +- fp_fut_cc_notrend: Future dataset identical to FP_CC +- presrat_params: Parameters of reference to test PresRat +- presrat_notrend_params: Bias historical is identical to bias reference + (historical). Thus, there is no trend in the model. +- presrat_identity_params: All distributions are identical (oh & mf) to mh, + i.e. observations equal to model that doesn't change on time. +- presrat_nochanges_params: Like presrat_identity_params, but also all + zero_rate are zeros, i.e. no values should be forced to be zero. +- presrat_nozeros_params: Same of presrat_params, but no zero_rate, i.e. + all zero_rate values are equal to 0 (percent). +""" + +import os +import shutil + +import h5py +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from sup3r import TEST_DATA_DIR +from sup3r.bias import ( + apply_zero_precipitation_rate, + local_presrat_bc, + PresRat, +) +from sup3r.bias.mixins import ZeroRateMixin +from sup3r.preprocessing.data_handling import DataHandlerNC + +FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') +FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') +FP_CC_LAT_LON = DataHandlerNC(FP_CC, 'rsds').lat_lon + +with xr.open_dataset(FP_CC) as fh: + MIN_LAT = np.min(fh.lat.values.astype(np.float32)) + MIN_LON = np.min(fh.lon.values.astype(np.float32)) - 360 + TARGET = (float(MIN_LAT), float(MIN_LON)) + SHAPE = (len(fh.lat.values), len(fh.lon.values)) @pytest.fixture(scope='module') From 10747464c222afc4bdd39dc634920e2c926aa34e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 11:43:10 -0600 Subject: [PATCH 27/92] clean, test: --- tests/bias/test_presrat_bias_correction.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 2d37aa917..6bd6c7e22 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -222,7 +222,6 @@ def test_parallel(fp_fut_cc, threshold): bias_handler='DataHandlerNCforCC', ) - # Physically non sense threshold. out_s = s.run(max_workers=1, zero_rate_threshold=threshold) p = PresRat( @@ -236,7 +235,6 @@ def test_parallel(fp_fut_cc, threshold): bias_handler='DataHandlerNCforCC', ) - # Physically non sense threshold. out_p = p.run(max_workers=2, zero_rate_threshold=threshold) for k in out_s.keys(): From 5d172f35f368143be66b9529bc8f73ae457d449f Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 11:50:49 -0600 Subject: [PATCH 28/92] test: test_presrat_zero_rate with multiple thresholds --- tests/bias/test_presrat_bias_correction.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 6bd6c7e22..82fb08287 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -244,8 +244,12 @@ def test_parallel(fp_fut_cc, threshold): ), f'Different results for {k}' -def test_presrat_zero_rate(fp_fut_cc): - """Estimate zero_rate within PresRat.run()""" +@pytest.mark.parametrize('threshold', [0, 50, 1e6]) +def test_presrat_zero_rate(fp_fut_cc, threshold): + """Estimate zero_rate within PresRat.run() + + Use thresholds that gives 0%, 100%, and something between. + """ calc = PresRat( FP_NSRDB, FP_CC, @@ -257,13 +261,13 @@ def test_presrat_zero_rate(fp_fut_cc): bias_handler='DataHandlerNCforCC', ) - # Physically non sense threshold. - out = calc.run(zero_rate_threshold=50) + out = calc.run(zero_rate_threshold=threshold) assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" - assert np.all((zero_rate>=0) & (zero_rate<=1)), "Out of range [0, 1]" + assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' + assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' + def test_presrat_zero_rate_threshold_zero(fp_fut_cc): From 6b67870641c6bce4efa5c94403499e3e9ceb6c0d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 11:57:40 -0600 Subject: [PATCH 29/92] test, clean: Combining tests --- tests/bias/test_presrat_bias_correction.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 82fb08287..bb62d3bb5 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -268,12 +268,9 @@ def test_presrat_zero_rate(fp_fut_cc, threshold): assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' + if threshold == 0: + assert np.all(zero_rate >= 0), "It should be rate 0 for threhold==0" - -def test_presrat_zero_rate_threshold_zero(fp_fut_cc): - """Estimate zero_rate within PresRat.run(), zero threshold - - This should give a zero rate answer, since all values are higher. """ calc = PresRat( FP_NSRDB, From d5b86478dd8eef5df28de4e2ad1a6c1edb0eccf5 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 13:03:58 -0600 Subject: [PATCH 30/92] test: A fixture with fut_cc dataset itself for efficiency --- tests/bias/test_presrat_bias_correction.py | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index bb62d3bb5..2b9d34564 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -62,6 +62,47 @@ def fp_fut_cc(tmpdir_factory): return fn +@pytest.fixture(scope='module') +def fut_cc(fp_fut_cc): + """Gives the dataset itself related to fp_fut_cc + + Giving an object in memory makes everything more efficient by avoiding I/O + reading files and overhead for multiple uses. + + Note that ``Resources`` modifies the dataset so we cannot just load the + NetCDF. Here we run a couple of checks to confirm that the output + dataset is as expected by sup3r. + + To use time as expected by sup3r: time = pd.to_datetime(da.time) + To use latlon as expected by sup3r: + latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), + axis=-1).astype('float32') + """ + ds = xr.open_dataset(fp_fut_cc) + # This compute here is required. + da = ds['rsds'].compute().transpose('lat', 'lon', 'time') + # Unfortunatelly, _get_factors() assume latitude as descending + da = da.sortby('lat', ascending=False) + # data = da.data + time = pd.to_datetime(da.time) + latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), + axis=-1).astype('float32') + # latlon = np.stack(np.meshgrid(da['lon'] - 360, da['lat']), axis=-1)[ + # :, :, ::-1 + # ].astype('float32') + for ii in range(4): + for jj in range(4): + assert np.allclose( + da.sel(lat=latlon[ii, jj, 0], method='nearest').sel( + lon=latlon[ii, jj, 1] + 360, method='nearest' + )[0], + da.data[ii, jj, 0], + ) + assert np.allclose(latlon, FP_CC_LAT_LON) + + return da.compute() + + @pytest.fixture(scope='module') def fp_fut_cc_notrend(tmpdir_factory): """Sample future CC dataset identical to historical CC From 13533504d470e8fd4ec3c3269ff27e8641d91bed Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 13:09:44 -0600 Subject: [PATCH 31/92] test: Standard presrat transform test with dataset from memory --- tests/bias/test_presrat_bias_correction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 2b9d34564..0883d7e77 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -428,16 +428,16 @@ def test_presrat(fp_fut_cc): ), f'For each location of {v} it should be all finite or nonte' -def test_presrat_transform(presrat_params, fp_fut_cc): +def test_presrat_transform(presrat_params, fut_cc): """ WIP: Confirm it runs, but don't verify anything yet. """ - data = np.ones((*FP_CC_LAT_LON.shape[:-1], 3)) - time = pd.to_datetime( - ['2015-01-01 12:00:00', '2015-01-02 12:00:00', '2015-01-01 12:00:00'] - ) + data = fut_cc.values + time = pd.to_datetime(fut_cc.time) + latlon = np.stack(xr.broadcast(fut_cc["lat"], fut_cc["lon"] - 360), axis=-1).astype('float32') + corrected = local_presrat_bc( - data, time, FP_CC_LAT_LON, 'ghi', 'rsds', presrat_params + data, time, latlon, 'ghi', 'rsds', presrat_params ) assert not np.isnan(corrected).all(), "Can't compare if only NaN" From 59d8b2d1058b1ead1d6ebbd804e443852d5f5dab Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 13:54:52 -0600 Subject: [PATCH 32/92] test: test_presrat_transform_nozerochanges() --- tests/bias/test_presrat_bias_correction.py | 27 +++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 0883d7e77..1595f316b 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -487,6 +487,27 @@ def test_presrat_transform_nochanges(presrat_nochanges_params, fp_fut_cc_notrend assert np.allclose(data, corrected, equal_nan=False) -""" -Test concepts to implement: -""" +def test_presrat_transform_nozerochanges( + presrat_nozeros_params, fut_cc +): + """No changes if the three fraction of zeros + + Same correction as the standard case, but no values are forced to zero, + i.e. corrected to dry + """ + data = fut_cc.values + time = pd.to_datetime(fut_cc.time) + latlon = np.stack(xr.broadcast(fut_cc["lat"], fut_cc["lon"] - 360), axis=-1).astype('float32') + + corrected = local_presrat_bc( + data, + time, + latlon, + 'ghi', + 'rsds', + presrat_nozeros_params, + ) + + assert np.isfinite(data).any(), "Can't compare if only NaN" + assert not np.allclose(data, corrected, equal_nan=False), "Expected changes due to bias correction" + assert not ((data != 0) & (corrected == 0)).any(), "Unexpected value corrected (zero_rate) to zero (dry day)" From 1fa54db1505b5512af95e94250be89968f5a2d92 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 14:12:50 -0600 Subject: [PATCH 33/92] test: test_presrat_transform_nochanges() working --- tests/bias/test_presrat_bias_correction.py | 66 ++++++++++------------ 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 1595f316b..b75fdd7ce 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -116,6 +116,27 @@ def fp_fut_cc_notrend(tmpdir_factory): return fn +@pytest.fixture(scope='module') +def fut_cc_notrend(fp_fut_cc_notrend): + ds = xr.open_dataset(fp_fut_cc_notrend) + da = ds['rsds'].compute().transpose('lat', 'lon', 'time') + # Unfortunatelly, _get_factors() assume latitude as descending + da = da.sortby('lat', ascending=False) + latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), + axis=-1).astype('float32') + for ii in range(4): + for jj in range(4): + np.allclose( + da.sel(lat=latlon[ii, jj, 0], method='nearest').sel( + lon=latlon[ii, jj, 1] + 360, method='nearest' + )[0], + da.data[ii, jj, 0], + ) + assert np.allclose(latlon, FP_CC_LAT_LON) + + return da.compute() + + @pytest.fixture(scope='module') def presrat_params(tmpdir_factory, fp_fut_cc): """PresRat parameters for standard datasets @@ -444,47 +465,20 @@ def test_presrat_transform(presrat_params, fut_cc): assert not np.allclose(data, corrected, equal_nan=False) -def test_presrat_transform_nochanges(presrat_nochanges_params, fp_fut_cc_notrend): +def test_presrat_transform_nochanges( + presrat_nochanges_params, fut_cc_notrend +): """No changes if the three datasets are the same and no zeros""" - # data = np.ones((*FP_CC_LAT_LON.shape[:-1], 3)) - # time = pd.to_datetime( ['2015-01-01 12:00:00', '2015-01-02 12:00:00', '2015-01-01 12:00:00']) - breakpoint() - ds = xr.open_dataset(fp_fut_cc_notrend) - da = ds['rsds'].compute().transpose('lat', 'lon', 'time') - # Unfortunatelly, _get_factors() assume latitude as descending - da = da.sortby('lat', ascending=False) - data = da.data - time = pd.to_datetime(da.time) - # latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), axis=-1).astype('float32') - latlon = np.stack(np.meshgrid(da['lon'] - 360, da['lat']), axis=-1)[ - :, :, ::-1 - ].astype('float32') - # da.sel(lat=latlon[ii,jj,0], method="nearest").sel(lon=latlon[ii,jj,1], method="nearest")[0] - data[ii,jj,0] - for ii in range(4): - for jj in range(4): - np.allclose( - da.sel(lat=latlon[ii, jj, 0], method='nearest').sel( - lon=latlon[ii, jj, 1] + 360, method='nearest' - )[0], - data[ii, jj, 0], - ) - np.allclose(latlon, FP_CC_LAT_LON) + data = fut_cc_notrend.values + time = pd.to_datetime(fut_cc_notrend.time) + latlon = np.stack(xr.broadcast(fut_cc_notrend["lat"], fut_cc_notrend["lon"] - 360), axis=-1).astype('float32') - # data = np.ones((*FP_CC_LAT_LON.shape[:-1], 3)) - # time = pd.to_datetime( - # ['2015-01-01 12:00:00', '2015-01-02 12:00:00', '2015-01-01 12:00:00'] - # ) corrected = local_presrat_bc( - data, - time, - latlon, - 'ghi', - 'rsds', - presrat_nochanges_params, + data, time, latlon, 'ghi', 'rsds', presrat_nochanges_params ) - assert not np.isnan(corrected).all(), "Can't compare if only NaN" - assert np.allclose(data, corrected, equal_nan=False) + assert np.isfinite(corrected).any(), "Can't compare if only NaN" + assert np.allclose(data, corrected, equal_nan=False), "This case shouldn't modify the data" def test_presrat_transform_nozerochanges( From 5968726ef4cbba16e9ba330c980dc5ddc5aebade Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 14:16:36 -0600 Subject: [PATCH 34/92] style: --- tests/bias/test_presrat_bias_correction.py | 47 +++++++++++++--------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index b75fdd7ce..acfc41425 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -84,9 +84,10 @@ def fut_cc(fp_fut_cc): # Unfortunatelly, _get_factors() assume latitude as descending da = da.sortby('lat', ascending=False) # data = da.data - time = pd.to_datetime(da.time) - latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), - axis=-1).astype('float32') + # time = pd.to_datetime(da.time) + latlon = np.stack( + xr.broadcast(da['lat'], da['lon'] - 360), axis=-1 + ).astype('float32') # latlon = np.stack(np.meshgrid(da['lon'] - 360, da['lat']), axis=-1)[ # :, :, ::-1 # ].astype('float32') @@ -122,8 +123,9 @@ def fut_cc_notrend(fp_fut_cc_notrend): da = ds['rsds'].compute().transpose('lat', 'lon', 'time') # Unfortunatelly, _get_factors() assume latitude as descending da = da.sortby('lat', ascending=False) - latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), - axis=-1).astype('float32') + latlon = np.stack( + xr.broadcast(da['lat'], da['lon'] - 360), axis=-1 + ).astype('float32') for ii in range(4): for jj in range(4): np.allclose( @@ -331,7 +333,7 @@ def test_presrat_zero_rate(fp_fut_cc, threshold): assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' if threshold == 0: - assert np.all(zero_rate >= 0), "It should be rate 0 for threhold==0" + assert np.all(zero_rate >= 0), 'It should be rate 0 for threshold==0' """ calc = PresRat( @@ -455,7 +457,9 @@ def test_presrat_transform(presrat_params, fut_cc): """ data = fut_cc.values time = pd.to_datetime(fut_cc.time) - latlon = np.stack(xr.broadcast(fut_cc["lat"], fut_cc["lon"] - 360), axis=-1).astype('float32') + latlon = np.stack( + xr.broadcast(fut_cc['lat'], fut_cc['lon'] - 360), axis=-1 + ).astype('float32') corrected = local_presrat_bc( data, time, latlon, 'ghi', 'rsds', presrat_params @@ -465,25 +469,26 @@ def test_presrat_transform(presrat_params, fut_cc): assert not np.allclose(data, corrected, equal_nan=False) -def test_presrat_transform_nochanges( - presrat_nochanges_params, fut_cc_notrend -): +def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): """No changes if the three datasets are the same and no zeros""" data = fut_cc_notrend.values time = pd.to_datetime(fut_cc_notrend.time) - latlon = np.stack(xr.broadcast(fut_cc_notrend["lat"], fut_cc_notrend["lon"] - 360), axis=-1).astype('float32') + latlon = np.stack( + xr.broadcast(fut_cc_notrend['lat'], fut_cc_notrend['lon'] - 360), + axis=-1, + ).astype('float32') corrected = local_presrat_bc( data, time, latlon, 'ghi', 'rsds', presrat_nochanges_params ) assert np.isfinite(corrected).any(), "Can't compare if only NaN" - assert np.allclose(data, corrected, equal_nan=False), "This case shouldn't modify the data" + assert np.allclose( + data, corrected, equal_nan=False + ), "This case shouldn't modify the data" -def test_presrat_transform_nozerochanges( - presrat_nozeros_params, fut_cc -): +def test_presrat_transform_nozerochanges(presrat_nozeros_params, fut_cc): """No changes if the three fraction of zeros Same correction as the standard case, but no values are forced to zero, @@ -491,7 +496,9 @@ def test_presrat_transform_nozerochanges( """ data = fut_cc.values time = pd.to_datetime(fut_cc.time) - latlon = np.stack(xr.broadcast(fut_cc["lat"], fut_cc["lon"] - 360), axis=-1).astype('float32') + latlon = np.stack( + xr.broadcast(fut_cc['lat'], fut_cc['lon'] - 360), axis=-1 + ).astype('float32') corrected = local_presrat_bc( data, @@ -503,5 +510,9 @@ def test_presrat_transform_nozerochanges( ) assert np.isfinite(data).any(), "Can't compare if only NaN" - assert not np.allclose(data, corrected, equal_nan=False), "Expected changes due to bias correction" - assert not ((data != 0) & (corrected == 0)).any(), "Unexpected value corrected (zero_rate) to zero (dry day)" + assert not np.allclose( + data, corrected, equal_nan=False + ), 'Expected changes due to bias correction' + assert not ( + (data != 0) & (corrected == 0) + ).any(), 'Unexpected value corrected (zero_rate) to zero (dry day)' From 88ce7c89f7b97f57b75b170265fbd3b787bb80e7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Thu, 20 Jun 2024 16:04:18 -0600 Subject: [PATCH 35/92] test, doc: test_presrat_transform_nochanges() --- tests/bias/test_presrat_bias_correction.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index acfc41425..637b5c82c 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -489,10 +489,11 @@ def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): def test_presrat_transform_nozerochanges(presrat_nozeros_params, fut_cc): - """No changes if the three fraction of zeros + """No adjustment to zero - Same correction as the standard case, but no values are forced to zero, - i.e. corrected to dry + Correction procedure results in some changes, but since the zero_rate + correction is all set to zero percent, there are no values adjusted to + zero. """ data = fut_cc.values time = pd.to_datetime(fut_cc.time) From 68ab5ab507c343b71f14216b102268cb0ff4c23e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 07:58:36 -0600 Subject: [PATCH 36/92] Saving used threshold in metadata --- sup3r/bias/qdm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 13c8741f4..9992a00d3 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -714,7 +714,8 @@ def run( self.out, fill_extend, smooth_extend, smooth_interior ) - self.write_outputs(fp_out, self.out) + self.zero_rate_threshold = zero_rate_threshold + self.write_outputs(fp_out, self.out, extra_attrs={'zero_rate_threshold': zero_rate_threshold}) return copy.deepcopy(self.out) From 65f54c9db8f6a9301dd39c7c0d1d5cafed2cf11c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 08:00:37 -0600 Subject: [PATCH 37/92] Entry point for presrat functions --- sup3r/bias/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sup3r/bias/__init__.py b/sup3r/bias/__init__.py index 877f92e8c..c77171070 100644 --- a/sup3r/bias/__init__.py +++ b/sup3r/bias/__init__.py @@ -1,15 +1,19 @@ """Bias calculation and correction modules.""" -from .bias_transforms import (global_linear_bc, local_linear_bc, - local_qdm_bc, monthly_local_linear_bc) from .bias_calc import (LinearCorrection, MonthlyLinearCorrection, MonthlyScalarCorrection, SkillAssessment) from .qdm import PresRat, QuantileDeltaMappingCorrection +from .bias_transforms import (apply_zero_precipitation_rate, + global_linear_bc, local_linear_bc, + local_qdm_bc, local_presrat_bc, + monthly_local_linear_bc) __all__ = [ + "apply_zero_precipitation_rate", "global_linear_bc", "local_linear_bc", "local_qdm_bc", + "local_presrat_bc", "monthly_local_linear_bc", "LinearCorrection", "MonthlyLinearCorrection", From 98c839d167004a3ee2fb11e8313c1aaf84fbba68 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 08:05:52 -0600 Subject: [PATCH 38/92] feat: apply_zero_precipitation_rate() Rolling back to pass the tests before moving again to the newer formulation. --- sup3r/bias/bias_transforms.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index de20fe28d..cbf27083c 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -520,3 +520,45 @@ def local_qdm_bc(data: np.array, tmp = QDM(tmp) # Reorgnize array back from (time, space) to (spatial, spatial, temporal) return tmp.T.reshape(data.shape) + +def apply_zero_precipitation_rate(arr: np.ndarray, rate): + """Enforce the zero precipitation rate + Replace lowest values by zero to satisfy the given rate of zero + precipitation. + + Parameters + ---------- + arr : np.array + An array of values to be analyzed. Usually precipitation but it + rate : float or np.array + Rate of zero, or negligible, days of precipitation. + + Returns + ------- + corrected : np.array + A copy of given array that satisfies the rate of zero precipitation + days, i.e. the lowest values of precipitation are changed to zero + to satisfy that rate. + + Examples + -------- + >>> data = np.array([5, 0.1, np.nan, 0.2, 1]) + >>> apply_zero_precipitation_rate(data, 0.30) + array([5. , 0. , nan, 0.2, 1. ]) + """ + assert arr.ndim == 3 + assert rate.ndim >= 2 + assert arr.shape[:2] == rate.shape[:2] + + I, J = rate.shape[:2] + for i in range(I): + for j in range(J): + r = rate[i, j, 0] + if np.isfinite(r): + a = arr[i, j] + valid = np.isfinite(a) + idx = np.argsort(a)[valid][:round(r * valid.astype('i').sum())] + a[idx] = 0 + arr[i, j] = a + + return arr From 6a766d77c886ad37dd047b2f64e1220a0585efba Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 08:12:44 -0600 Subject: [PATCH 39/92] feat: get_spatial_bc_presrat() Hopefuly we won't need to use it, but let's lock what is working now to help to figure out where is the problem. --- sup3r/bias/bias_transforms.py | 100 ++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index cbf27083c..816ff42fb 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -562,3 +562,103 @@ def apply_zero_precipitation_rate(arr: np.ndarray, rate): arr[i, j] = a return arr + + +def get_spatial_bc_presrat(lat_lon: np.array, + base_dset: str, + feature_name: str, + bias_fp: str, + threshold: float = 0.1): + """Statistical distributions previously estimated for given lat/lon points + + Recover the parameters that describe the statistical distribution + previously estimated with + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for three + datasets: ``base`` (historical reference), ``bias`` (historical biased + reference), and ``bias_fut`` (the future biased dataset, usually the data + to correct). + + Parameters + ---------- + lat_lon : ndarray + Array of latitudes and longitudes for the domain to bias correct + (n_lats, n_lons, 2) + base_dset : str + Name of feature used as historical reference. A Dataset with name + "base_{base_dset}_params" will be retrieved from ``bias_fp``. + feature_name : str + Name of the feature that is being corrected. Datasets with names + "bias_{feature_name}_params" and "bias_fut_{feature_name}_params" will + be retrieved from ``bias_fp``. + bias_fp : str + Filepath to bias correction file from the bias calc module. Must have + datasets "base_{base_dset}_params", "bias_{feature_name}_params", and + "bias_fut_{feature_name}_params" that define the statistical + distributions. + threshold : float + Nearest neighbor euclidean distance threshold. If the coordinates are + more than this value away from the bias correction lat/lon, an error + is raised. + + Returns + ------- + base : np.array + Parameters used to define the statistical distribution estimated for + the ``base_dset``. It has a shape of (I, J, P), where (I, J) are the + same first two dimensions of the given `lat_lon` and P is the number + of parameters and depends on the type of distribution. See + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for more + details. + bias : np.array + Parameters used to define the statistical distribution estimated for + (historical) ``feature_name``. It has a shape of (I, J, P), where + (I, J) are the same first two dimensions of the given `lat_lon` and P + is the number of parameters and depends on the type of distribution. + See :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for + more details. + bias_fut : np.array + Parameters used to define the statistical distribution estimated for + (future) ``feature_name``. It has a shape of (I, J, P), where (I, J) + are the same first two dimensions of the given `lat_lon` and P is the + number of parameters used and depends on the type of distribution. See + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for more + details. + cfg : dict + Metadata used to guide how to use of the previous parameters on + reconstructing the statistical distributions. For instance, + `cfg['dist']` defines the type of distribution. See + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for more + details, including which metadata is saved. + + Warnings + -------- + Be careful selecting which `bias_fp` to use. In particular, if + "bias_fut_{feature_name}_params" is representative for the desired target + period. + + See Also + -------- + sup3r.bias.bias_calc.QuantileDeltaMappingCorrection + Estimate the statistical distributions loaded here. + + Examples + -------- + >>> lat_lon = np.array([ + ... [39.649033, -105.46875 ], + ... [39.649033, -104.765625]]) + >>> params, cfg = get_spatial_bc_quantiles( + ... lat_lon, "ghi", "rsds", "./dist_params.hdf") + """ + ds = {'base': f'base_{base_dset}_params', + 'base_zero_rate': f'{base_dset}_zero_rate', + 'bias': f'bias_{feature_name}_params', + 'bias_fut': f'bias_fut_{feature_name}_params', + 'bias_mean_mh': f'{feature_name}_mean_mh', + 'bias_mean_mf': f'{feature_name}_mean_mf', + } + params = _get_factors(lat_lon, ds, bias_fp, threshold) + + with Resource(bias_fp) as res: + cfg = res.global_attrs + + return params, cfg From b0c5bebd053727e4641318261ba316076d01d7eb Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 13:13:07 -0600 Subject: [PATCH 40/92] fix, test: test_presrat_transform_nochanges() Fixed this test. The issue is that gridpoints missing historical references are skipped, and then there is an implicit interpolation resulting in approximated values that fail precise comparisons. --- tests/bias/test_presrat_bias_correction.py | 26 +++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 637b5c82c..046be9ae5 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -470,11 +470,23 @@ def test_presrat_transform(presrat_params, fut_cc): def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): - """No changes if the three datasets are the same and no zeros""" + """The correction should result in no changes at all + + Note that there are a lot of implicit transformations, so we can't expect + to be able to esily compare all gridpoints. + + The corrected output must be the same if: + - The three CDFs are the same, so no change due to QDM. There is one + caveat here. The data to be corrected is compared with the mf's CDF, and + if out of distribution, it would lead to differences; + - All zero rate set to zero percent, so no value is forced to zero; + - The value to be corrected is the same used to estimate the means for the + K factor; + """ data = fut_cc_notrend.values time = pd.to_datetime(fut_cc_notrend.time) latlon = np.stack( - xr.broadcast(fut_cc_notrend['lat'], fut_cc_notrend['lon'] - 360), + xr.broadcast(fut_cc_notrend['lat'], fut_cc_notrend['lon']), axis=-1, ).astype('float32') @@ -483,8 +495,16 @@ def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): ) assert np.isfinite(corrected).any(), "Can't compare if only NaN" + + # The calculations are set in such a way that `run()` only applies to + # gripoints where there are historical reference and biased data. This is + # hidden by an implicit interpolation procedure, which results in values + # that can't be easily reproduced for validation. One solution is to + # allow the implicit interpolation but compare only where non-interpolated + # values are available. Let's call it the 'Magic index'. + idx = (slice(1,3), slice(0,3)) assert np.allclose( - data, corrected, equal_nan=False + data[idx], corrected[idx], equal_nan=False ), "This case shouldn't modify the data" From bdab03d69a7ad45cf89ee9181458c36b3ceed4f9 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 13:22:53 -0600 Subject: [PATCH 41/92] feat: Saving requirements to estimate K factor Saving mf and mh explicitly since we should be able to simplify the operation. For now, saving monthly but it should transitioned later. --- sup3r/bias/qdm.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 9992a00d3..ade7a20d0 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -518,6 +518,15 @@ class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): # - Estimate K = / """ + def _init_out(self): + super()._init_out() + + shape = (*self.bias_gid_raster.shape, 1) + self.out[f'{self.base_dset}_zero_rate'] = np.full(shape, np.nan, np.float32) + shape = (*self.bias_gid_raster.shape, 12) + self.out[f'{self.bias_feature}_mean_mh'] = np.full(shape, np.nan, np.float32) + self.out[f'{self.bias_feature}_mean_mf'] = np.full(shape, np.nan, np.float32) + # pylint: disable=W0613 @classmethod def _run_single( @@ -537,6 +546,7 @@ def _run_single( *, zero_rate_threshold, base_dh_inst=None, + ): """Estimate probability distributions at a single site @@ -569,6 +579,19 @@ def _run_single( zero_rate_threshold, ) + # Let's save the means for mh and mf instead of the `x` ratio. It + # seems that we should be able to simplify the mh component from + # the `K` coefficient. + # For now it is monthly but later it will be modified for a generic + # time window. + mh = np.full(12, np.nan, np.float32) + mf = np.full(12, np.nan, np.float32) + for m in range(12): + mh[m] = bias_data[bias_ti.month==(m + 1)].mean() + mf[m] = bias_fut_data[bias_fut_ti.month==(m + 1)].mean() + out[f'{bias_feature}_mean_mh'] = mh + out[f'{bias_feature}_mean_mf'] = mf + return out def run( From f8c88e37bce5183a9bec1d961fa09d7d9ba66e4d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 13:28:57 -0600 Subject: [PATCH 42/92] Adding time indices Coordinate and time are decoupled from the data. To be able to run monthly means we shall include it. To keep consistent with the rest of the library, let's add a couple more arguments. --- sup3r/bias/qdm.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index ade7a20d0..c2b077087 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -544,6 +544,8 @@ def _run_single( n_samples, log_base, *, + bias_ti, + bias_fut_ti, zero_rate_threshold, base_dh_inst=None, @@ -673,6 +675,8 @@ def run( log_base=self.log_base, base_dh_inst=self.base_dh, zero_rate_threshold=zero_rate_threshold, + bias_ti = self.bias_fut_dh.time_index, + bias_fut_ti = self.bias_fut_dh.time_index, ) for key, arr in single_out.items(): self.out[key][raster_loc] = arr @@ -716,6 +720,8 @@ def run( n_samples=self.n_quantiles, log_base=self.log_base, zero_rate_threshold=zero_rate_threshold, + bias_ti = self.bias_fut_dh.time_index, + bias_fut_ti = self.bias_fut_dh.time_index, ) futures[future] = raster_loc From 7cafe23e3ac2439c98b3d077e48429c412e8dc28 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 13:57:00 -0600 Subject: [PATCH 43/92] fix: zero_precipitation_rate() dealing with NaN The non-finite values should be ignored in the statistics. --- sup3r/bias/mixins.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index 12e400627..68be0ac2c 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -108,7 +108,7 @@ class ZeroRateMixin(): 16(6), 2421-2442. """ @staticmethod - def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): + def zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): """Rate of (nearly) zero precipitation days Estimate the rate of values less than a given ``threshold``. In concept @@ -127,15 +127,33 @@ def _zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): Returns ------- rate : float - Rate of days with negligible precipitation. (see Z_gf in - [Pierce2015]_) + Rate of days with negligible precipitation (see Z_gf in + [Pierce2015]_). Notes ----- The ``NaN`` are ignored for the rate estimate. Therefore, a large - number of ``NaN`` might mislead this rate estimate. + number of ``NaN`` might compromise the confidence of the estimator. + + If the input values are all non-finite, it returns NaN. + + Examples + -------- + >>> ZeroRateMixin().zero_precipitation_rate([2, 3, 4], 1) + 0.0 + + >>> ZeroRateMixin().zero_precipitation_rate([0, 1, 2, 3], 1) + 0.25 + + >>> ZeroRateMixin().zero_precipitation_rate([0, 1, 2, 3, np.nan], 1) + 0.25 """ - return np.nanmean((arr < threshold).astype('i')) + idx = np.isfinite(arr) + if not idx.any(): + return np.nan + + return np.nanmean((arr[idx] < threshold).astype('i')) + @staticmethod def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): From b17b8d14e788dea510cde53a528071717be9899a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 14:01:52 -0600 Subject: [PATCH 44/92] refact: Moved apply_zero_precipitation_rate() into bias_tranforms --- sup3r/bias/mixins.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index 68be0ac2c..be225b4d1 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -153,35 +153,3 @@ def zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): return np.nan return np.nanmean((arr[idx] < threshold).astype('i')) - - - @staticmethod - def apply_zero_precipitation_rate(arr: np.ndarray, rate: float): - """Enforce the zero precipitation rate - - Replace lowest values by zero to satisfy the given rate of zero - precipitation. - - Parameters - ---------- - arr : np.array - An array of values to be analyzed. Usually precipitation but it - rate : float - Rate of zero, or negligible, days of precipitation. - - Returns - ------- - corrected : np.array - A copy of given array that satisfies the rate of zero precipitation - days, i.e. the lowest values of precipitation are changed to zero - to satisfy that rate. - - Examples - -------- - >>> data = np.array([5, 0.1, np.nan, 0.2, 1] - >>> apply_zero_precipitation_rate(data, 0.30) - array([5. , 0. , nan, 0.2, 1. ]) - """ - valid = arr[np.isfinite(arr)] - threshold = np.sort(valid)[round(rate * len(valid))] - return np.where(arr < threshold, 0, arr) From 9140482da7b1d244021d73c33f64adc51b6e1263 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Fri, 21 Jun 2024 14:18:56 -0600 Subject: [PATCH 45/92] fix: Missing to load xarray It's actually only used in the prototype of the monthly means replacement, but let's add it already anyways. --- sup3r/bias/qdm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index c2b077087..d137b2bfd 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -17,6 +17,7 @@ sample_q_linear, sample_q_log, ) +import xarray as xr from sup3r.preprocessing.data_handling.base import DataHandler from sup3r.utilities.utilities import expand_paths From 0a696389b4c5d3c79464130cf62e9f00d4092600 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 23 Jun 2024 18:15:27 -0600 Subject: [PATCH 46/92] refact: get_spatial_bc_quantiles returing params as dict This avoids too many explicit arguments. For instance, PresRat requires variables related to the K parameter. --- sup3r/bias/bias_transforms.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index 816ff42fb..8e1f9cc64 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -158,18 +158,19 @@ def get_spatial_bc_quantiles(lat_lon: np.array, >>> lat_lon = np.array([ ... [39.649033, -105.46875 ], ... [39.649033, -104.765625]]) - >>> params = get_spatial_bc_quantiles( - ... lat_lon, "ghi", "rsds", "./dist_params.hdf") + >>> params, cfg = get_spatial_bc_quantiles( + ... lat_lon, "ghi", "rsds", "./dist_params.hdf") """ ds = {'base': f'base_{base_dset}_params', 'bias': f'bias_{feature_name}_params', - 'bias_fut': f'bias_fut_{feature_name}_params'} - out = _get_factors(lat_lon, ds, bias_fp, threshold) + 'bias_fut': f'bias_fut_{feature_name}_params', + } + params = _get_factors(lat_lon, ds, bias_fp, threshold) with Resource(bias_fp) as res: cfg = res.global_attrs - return out["base"], out["bias"], out["bias_fut"], cfg + return params, cfg def global_linear_bc(input, scalar, adder, out_range=None): @@ -488,11 +489,15 @@ def local_qdm_bc(data: np.array, >>> unbiased = local_qdm_bc(biased_array, lat_lon_array, "ghi", "rsds", ... "./dist_params.hdf") """ - base, bias, bias_fut, cfg = get_spatial_bc_quantiles(lat_lon, - base_dset, - feature_name, - bias_fp, - threshold) + params, cfg = get_spatial_bc_quantiles(lat_lon, + base_dset, + feature_name, + bias_fp, + threshold) + base = params["base"] + bias = params["bias"] + bias_fut = params["bias_fut"] + if lr_padded_slice is not None: spatial_slice = (lr_padded_slice[0], lr_padded_slice[1]) base = base[spatial_slice] From a68fa7a6231938223a0c574306feb4c3bc55216a Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 23 Jun 2024 18:32:11 -0600 Subject: [PATCH 47/92] feat: local_presrat_bc() Pass on all (current) tests. Recording it before refactoring. --- sup3r/bias/bias_transforms.py | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index 8e1f9cc64..7a726262a 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -667,3 +667,63 @@ def get_spatial_bc_presrat(lat_lon: np.array, cfg = res.global_attrs return params, cfg + + +def local_presrat_bc(data: np.array, + time: np.array, + lat_lon: np.array, + base_dset: str, + feature_name: str, + bias_fp, + lr_padded_slice=None, + threshold=0.1, + relative=True, + no_trend=False): + + params, cfg = get_spatial_bc_presrat(lat_lon, + base_dset, + feature_name, + bias_fp, + threshold) + base = params["base"] + bias = params["bias"] + bias_fut = params["bias_fut"] + + if lr_padded_slice is not None: + spatial_slice = (lr_padded_slice[0], lr_padded_slice[1]) + base = base[spatial_slice] + bias = bias[spatial_slice] + bias_fut = bias_fut[spatial_slice] + + if no_trend: + mf = None + else: + mf = bias_fut.reshape(-1, bias_fut.shape[-1]) + # The distributions are 3D (space, space, N-params) + # Collapse 3D (space, space, N) into 2D (space**2, N) + QDM = QuantileDeltaMapping(base.reshape(-1, base.shape[-1]), + bias.reshape(-1, bias.shape[-1]), + mf, + dist=cfg['dist'], + relative=relative, + sampling=cfg["sampling"], + log_base=cfg["log_base"]) + + # input 3D shape (spatial, spatial, temporal) + # QDM expects input arr with shape (time, space) + tmp = data.reshape(-1, data.shape[-1]).T + # Apply QDM correction + tmp = QDM(tmp) + # Reorgnize array back from (time, space) to (spatial, spatial, temporal) + tmp = tmp.T.reshape(data.shape) + + tmp = apply_zero_precipitation_rate(tmp, params["base_zero_rate"]) + + month = time.month + for m in range(12): + idx = month == m + 1 + x_hat = tmp[:, :, idx].mean(axis=-1) + k = params["bias_mean_mf"][:, :, m] / x_hat + tmp[:, :, idx] *= k[:, :, np.newaxis] + + return tmp From 757e0b0d3440204e2c1427905491b06f231f6745 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 23 Jun 2024 18:39:59 -0600 Subject: [PATCH 48/92] style: Mostly moving to single quote which is the standard used in sup3r --- sup3r/bias/bias_transforms.py | 1 + sup3r/bias/qdm.py | 7 +- tests/bias/test_qdm_bias_correction.py | 113 +++++++++++++------------ 3 files changed, 65 insertions(+), 56 deletions(-) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index 7a726262a..189de0cff 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -526,6 +526,7 @@ def local_qdm_bc(data: np.array, # Reorgnize array back from (time, space) to (spatial, spatial, temporal) return tmp.T.reshape(data.shape) + def apply_zero_precipitation_rate(arr: np.ndarray, rate): """Enforce the zero precipitation rate Replace lowest values by zero to satisfy the given rate of zero diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index d137b2bfd..24af6df2e 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -62,9 +62,10 @@ def __init__(self, decimals=None, match_zero_rate=False, n_quantiles=101, - dist="empirical", - sampling="linear", - log_base=10): + dist='empirical', + sampling='linear', + log_base=10, + ): """ Parameters ---------- diff --git a/tests/bias/test_qdm_bias_correction.py b/tests/bias/test_qdm_bias_correction.py index 5eff2cae5..76eb804b6 100644 --- a/tests/bias/test_qdm_bias_correction.py +++ b/tests/bias/test_qdm_bias_correction.py @@ -20,9 +20,9 @@ ) from sup3r.preprocessing.data_handling import DataHandlerNC, DataHandlerNCforCC -FP_NSRDB = os.path.join(TEST_DATA_DIR, "test_nsrdb_co_2018.h5") -FP_CC = os.path.join(TEST_DATA_DIR, "rsds_test.nc") -FP_CC_LAT_LON = DataHandlerNC(FP_CC, "rsds").lat_lon +FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') +FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') +FP_CC_LAT_LON = DataHandlerNC(FP_CC, 'rsds').lat_lon with xr.open_dataset(FP_CC) as fh: MIN_LAT = np.min(fh.lat.values.astype(np.float32)) @@ -31,13 +31,13 @@ SHAPE = (len(fh.lat.values), len(fh.lon.values)) -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def fp_fut_cc(tmpdir_factory): """Sample future CC dataset The same CC but with an offset (75.0) and negligible noise. """ - fn = tmpdir_factory.mktemp("data").join("test_mf.nc") + fn = tmpdir_factory.mktemp('data').join('test_mf.nc') ds = xr.open_dataset(FP_CC) # Adding an offset ds['rsds'] += 75.0 @@ -49,39 +49,38 @@ def fp_fut_cc(tmpdir_factory): return fn -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def fp_fut_cc_notrend(tmpdir_factory): """Sample future CC dataset identical to historical CC This is currently a copy of FP_CC, thus no trend on time. """ - fn = tmpdir_factory.mktemp("data").join("test_mf_notrend.nc") + fn = tmpdir_factory.mktemp('data').join('test_mf_notrend.nc') shutil.copyfile(FP_CC, fn) # DataHandlerNCforCC requires a string fn = str(fn) return fn -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def dist_params(tmpdir_factory, fp_fut_cc): """Distribution parameters for standard datasets Use the standard datasets to estimate the distributions and save in a temporary place to be re-used """ - calc = QuantileDeltaMappingCorrection( - FP_NSRDB, - FP_CC, - fp_fut_cc, - "ghi", - "rsds", - target=TARGET, - shape=SHAPE, - distance_upper_bound=0.7, - bias_handler="DataHandlerNCforCC", - ) - fn = tmpdir_factory.mktemp("params").join("standard.h5") - _ = calc.run(max_workers=1, fp_out=fn) + calc = QuantileDeltaMappingCorrection(FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC', + ) + fn = tmpdir_factory.mktemp('params').join('standard.h5') + _ = calc.run(fp_out=fn) # DataHandlerNCforCC requires a string fn = str(fn) @@ -96,29 +95,34 @@ def test_qdm_bc(fp_fut_cc): something fundamental is wrong. """ - calc = QuantileDeltaMappingCorrection(FP_NSRDB, FP_CC, fp_fut_cc, - 'ghi', 'rsds', - target=TARGET, shape=SHAPE, - bias_handler='DataHandlerNCforCC') + calc = QuantileDeltaMappingCorrection(FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) out = calc.run() # Guarantee that we have some actual values, otherwise most of the # remaining tests would be useless for v in out: - assert np.isfinite(out[v]).any(), "Something wrong, all CDFs are NaN." + assert np.isfinite(out[v]).any(), 'Something wrong, all CDFs are NaN.' # Check possible range for v in out: - assert np.nanmin(out[v]) > 0, f"{v} should be all greater than zero." - assert np.nanmax(out[v]) < 1300, f"{v} should be all less than 1300." + assert np.nanmin(out[v]) > 0, f'{v} should be all greater than zero.' + assert np.nanmax(out[v]) < 1300, f'{v} should be all less than 1300.' # Each location can be all finite or all NaN, but not both for v in out: tmp = np.isfinite(out[v].reshape(-1, out[v].shape[-1])) assert np.all( np.all(tmp, axis=1) == ~np.all(~tmp, axis=1) - ), f"For each location of {v} it should be all finite or nonte" + ), f'For each location of {v} it should be all finite or nonte' def test_parallel(fp_fut_cc): @@ -142,10 +146,10 @@ def test_parallel(fp_fut_cc): out_p = p.run(max_workers=2) for k in out_s.keys(): - assert k in out_p, f"Missing {k} in parallel run" + assert k in out_p, f'Missing {k} in parallel run' assert np.allclose( out_s[k], out_p[k], equal_nan=True - ), f"Different results for {k}" + ), f'Different results for {k}' def test_fill_nan(fp_fut_cc): @@ -159,14 +163,14 @@ def test_fill_nan(fp_fut_cc): # Without filling, at least one NaN or this test is useless. out = c.run(fill_extend=False) - assert np.all([np.isnan(v).any() for v in out.values()]), ( - "Assume at least one NaN value for each param" - ) + assert np.all( + [np.isnan(v).any() for v in out.values()] + ), 'Assume at least one NaN value for each param' out = c.run() - assert ~np.any([np.isnan(v) for v in out.values()]), ( - "All NaN values where supposed to be filled" - ) + assert ~np.any( + [np.isnan(v) for v in out.values()] + ), 'All NaN values where supposed to be filled' def test_save_file(tmp_path, fp_fut_cc): @@ -181,14 +185,14 @@ def test_save_file(tmp_path, fp_fut_cc): distance_upper_bound=0.7, bias_handler='DataHandlerNCforCC') - filename = os.path.join(tmp_path, "test_saving.hdf") + filename = os.path.join(tmp_path, 'test_saving.hdf') _ = calc.run(filename) # File was created os.path.isfile(filename) # A valid HDF5, can open and read - with h5py.File(filename, "r") as f: - assert "latitude" in f.keys() + with h5py.File(filename, 'r') as f: + assert 'latitude' in f.keys() def test_qdm_transform(dist_params): @@ -196,7 +200,7 @@ def test_qdm_transform(dist_params): WIP: Confirm it runs, but don't verify anything yet. """ data = np.ones((*FP_CC_LAT_LON.shape[:-1], 2)) - corrected = local_qdm_bc(data, FP_CC_LAT_LON, "ghi", "rsds", dist_params) + corrected = local_qdm_bc(data, FP_CC_LAT_LON, 'ghi', 'rsds', dist_params) assert not np.isnan(corrected).all(), "Can't compare if only NaN" assert not np.allclose(data, corrected, equal_nan=False) @@ -219,7 +223,7 @@ def test_qdm_transform_notrend(tmp_path, dist_params): no_trend=True) # Creates a new distribution with mo == mf - notrend_params = os.path.join(tmp_path, "notrend.hdf") + notrend_params = os.path.join(tmp_path, 'notrend.hdf') shutil.copyfile(dist_params, notrend_params) with h5py.File(notrend_params, 'r+') as f: f['bias_fut_rsds_params'][:] = f['bias_rsds_params'][:] @@ -256,7 +260,7 @@ def test_bc_identity(tmp_path, fp_fut_cc, dist_params): anything. Note that NaNs in any component, i.e. any dataset, would propagate into a NaN transformation. """ - ident_params = os.path.join(tmp_path, "identity.hdf") + ident_params = os.path.join(tmp_path, 'identity.hdf') shutil.copyfile(dist_params, ident_params) with h5py.File(ident_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] @@ -280,7 +284,7 @@ def test_bc_identity_absolute(tmp_path, fp_fut_cc, dist_params): anything. Note that NaNs in any component, i.e. any dataset, would propagate into a NaN transformation. """ - ident_params = os.path.join(tmp_path, "identity.hdf") + ident_params = os.path.join(tmp_path, 'identity.hdf') shutil.copyfile(dist_params, ident_params) with h5py.File(ident_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] @@ -304,7 +308,7 @@ def test_bc_model_constant(tmp_path, fp_fut_cc, dist_params): has an offset with historical observed, that same offset should be corrected in the target (future modeled). """ - offset_params = os.path.join(tmp_path, "offset.hdf") + offset_params = os.path.join(tmp_path, 'offset.hdf') shutil.copyfile(dist_params, offset_params) with h5py.File(offset_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] - 10 @@ -328,7 +332,7 @@ def test_bc_trend(tmp_path, fp_fut_cc, dist_params): is a trend between modeled historical vs future, that same trend should be applied to correct """ - offset_params = os.path.join(tmp_path, "offset.hdf") + offset_params = os.path.join(tmp_path, 'offset.hdf') shutil.copyfile(dist_params, offset_params) with h5py.File(offset_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] @@ -351,7 +355,7 @@ def test_bc_trend_same_hist(tmp_path, fp_fut_cc, dist_params): If there was no bias in historical (obs vs mod), there is nothing to correct, but trust the forecast. """ - offset_params = os.path.join(tmp_path, "offset.hdf") + offset_params = os.path.join(tmp_path, 'offset.hdf') shutil.copyfile(dist_params, offset_params) with h5py.File(offset_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] - 10 @@ -385,7 +389,8 @@ def test_fwp_integration(tmp_path): input_files = [os.path.join(TEST_DATA_DIR, 'ua_test.nc'), os.path.join(TEST_DATA_DIR, 'va_test.nc'), os.path.join(TEST_DATA_DIR, 'orog_test.nc'), - os.path.join(TEST_DATA_DIR, 'zg_test.nc')] + os.path.join(TEST_DATA_DIR, 'zg_test.nc'), + ] n_samples = 101 quantiles = np.linspace(0, 1, n_samples) @@ -415,9 +420,9 @@ def test_fwp_integration(tmp_path): out_dir = os.path.join(tmp_path, 'st_gan') model.save(out_dir) - with h5py.File(bias_fp, "w") as f: - f.create_dataset("latitude", data=lat_lon[..., 0]) - f.create_dataset("longitude", data=lat_lon[..., 1]) + with h5py.File(bias_fp, 'w') as f: + f.create_dataset('latitude', data=lat_lon[..., 0]) + f.create_dataset('longitude', data=lat_lon[..., 1]) s = lat_lon.shape[:2] for k, v in params.items(): @@ -443,7 +448,8 @@ def test_fwp_integration(tmp_path): worker_kwargs=dict(max_workers=1)), out_pattern=os.path.join(tmp_path, 'out_{file_id}.nc'), worker_kwargs=dict(max_workers=1), - input_handler='DataHandlerNCforCC') + input_handler='DataHandlerNCforCC', + ) bc_strat = ForwardPassStrategy( input_files, model_kwargs={'model_dir': out_dir}, @@ -456,7 +462,8 @@ def test_fwp_integration(tmp_path): worker_kwargs=dict(max_workers=1), input_handler='DataHandlerNCforCC', bias_correct_method='local_qdm_bc', - bias_correct_kwargs=bias_correct_kwargs) + bias_correct_kwargs=bias_correct_kwargs, + ) for ichunk in range(strat.chunks): fwp = ForwardPass(strat, chunk_index=ichunk) From e0d8c568b13ff91afb241a7547ce88bd0586f29d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Sun, 23 Jun 2024 18:51:23 -0600 Subject: [PATCH 49/92] test, clean: PresRat tests were already moved into its own module --- tests/bias/test_qdm_bias_correction.py | 87 -------------------------- 1 file changed, 87 deletions(-) diff --git a/tests/bias/test_qdm_bias_correction.py b/tests/bias/test_qdm_bias_correction.py index 76eb804b6..96df52503 100644 --- a/tests/bias/test_qdm_bias_correction.py +++ b/tests/bias/test_qdm_bias_correction.py @@ -12,10 +12,7 @@ from sup3r.models import Sup3rGan from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy from sup3r.bias import ( - apply_zero_precipitation_rate, local_qdm_bc, - local_presrat_bc, - PresRat, QuantileDeltaMappingCorrection, ) from sup3r.preprocessing.data_handling import DataHandlerNC, DataHandlerNCforCC @@ -480,87 +477,3 @@ def test_fwp_integration(tmp_path): delta = bc_fwp.run_chunk() - fwp.run_chunk() assert delta[..., 0].mean() < 0, 'Predicted U should trend <0' assert delta[..., 1].mean() > 0, 'Predicted V should trend >0' - - -def test_apply_zero_precipitation_rate(): - data = np.array([5, 0.1, 3, 0.2, 1]) - out = apply_zero_precipitation_rate(data, 0.25) - - assert np.allclose(np.array([5.0, 0.0, 3, 0.2, 1.0]), out, equal_nan=True) - - -def test_apply_zero_precipitation_rate_nan(): - data = np.array([5, 0.1, np.nan, 0.2, 1]) - out = apply_zero_precipitation_rate(data, 0.25) - - assert np.allclose( - np.array([5.0, 0.0, np.nan, 0.2, 1.0]), out, equal_nan=True - ) - - -def test_apply_zero_precipitation_rate_2D(): - data = np.array( - [ - [5, 0.1, np.nan, 0.2, 1], - [5, 0.1, 3, 0.2, 1], - ] - ) - out = apply_zero_precipitation_rate(data, [0.25, 0.41]) - - assert np.allclose( - np.array([[5.0, 0.0, np.nan, 0.2, 1.0], [5.0, 0.0, 3, 0.0, 1.0]]), - out, - equal_nan=True, - ) - -@pytest.mark.skip() -def test_presrat(fp_fut_cc): - """Test PresRat correction procedure - - Basic standard run. Using only required arguments. If this fails, - something fundamental is wrong. - """ - calc = PresRat( - FP_NSRDB, - FP_CC, - fp_fut_cc, - 'ghi', - 'rsds', - target=TARGET, - shape=SHAPE, - bias_handler='DataHandlerNCforCC', - ) - - # A high zero_rate_threshold to gets at least something. - out = calc.run(max_workers=1, zero_rate_threshold=50) - - # Guarantee that we have some actual values, otherwise most of the - # remaining tests would be useless - for v in out: - assert np.isfinite(out[v]).any(), 'Something wrong, all CDFs are NaN.' - - # Check possible range - for v in out: - assert np.nanmin(out[v]) > 0, f'{v} should be all greater than zero.' - assert np.nanmax(out[v]) < 1300, f'{v} should be all less than 1300.' - - # Each location can be all finite or all NaN, but not both - for v in (v for v in out if len(out[v].shape) > 2): - tmp = np.isfinite(out[v].reshape(-1, *out[v].shape[2:])) - assert np.all( - np.all(tmp, axis=1) == ~np.all(~tmp, axis=1) - ), f'For each location of {v} it should be all finite or nonte' - - -@pytest.mark.skip() -def test_presrat_transform(presrat_params): - """ - WIP: Confirm it runs, but don't verify anything yet. - """ - data = np.ones((*FP_CC_LAT_LON.shape[:-1], 2)) - corrected = local_presrat_bc( - data, FP_CC_LAT_LON, 'ghi', 'rsds', presrat_params - ) - - assert not np.isnan(corrected).all(), "Can't compare if only NaN" - assert not np.allclose(data, corrected, equal_nan=False) From dd26ec421adbb6407245cda65fa9be4f4568a5ad Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 07:16:17 -0600 Subject: [PATCH 50/92] Locking numpy to < 2.0 It's safer to hold for now and wait for the ecosystem to stabilize before moving. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ce1988db8..1920c4b53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -259,7 +259,7 @@ dask = ">=2022.0" h5netcdf = ">=1.1.0" pillow = ">=10.0" matplotlib = ">=3.1" -numpy = ">=1.7.0" +numpy = "~=1.7" pandas = ">=2.0" scipy = ">=1.0.0" tensorflow = ">2.4,<2.16" From ca9123a4f3707b499895cfa50c267241b554cb47 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 07:20:30 -0600 Subject: [PATCH 51/92] doc: Updating one-line description of PresRat --- sup3r/bias/qdm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 24af6df2e..11ec18304 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -608,7 +608,7 @@ def run( smooth_interior=0, zero_rate_threshold=0.01, ): - """Estimate the statistical distributions for each location + """Estimate the required information for PresRat correction Parameters ---------- @@ -630,7 +630,6 @@ def run( for each of the three given datasets. Each value has dimensions (lat, lon, n-parameters). """ - logger.debug('Calculate CDF parameters for QDM') logger.info( From 24e47d858256c52026b958c570eeef3c983ccec5 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 07:21:55 -0600 Subject: [PATCH 52/92] test: Updating fut_cc_notrend() to match the 'resource' Despite using the very same file, sup3r manipulate the dataset and expect some patterns without reinforcing it, leading to quiet errors. The modifications here match the expected dataset by sup3r so we can validate results. --- tests/bias/test_presrat_bias_correction.py | 41 ++++++++++++++++------ 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 046be9ae5..d8d5e2e94 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -119,24 +119,43 @@ def fp_fut_cc_notrend(tmpdir_factory): @pytest.fixture(scope='module') def fut_cc_notrend(fp_fut_cc_notrend): + """Extract the dataset from fp_fut_cc_notrend + + The internal process to read such dataset is way more complex than just + reading it and there are some transformations. This function must provide + a dataset compatible with the one expected from the standard processing. + """ ds = xr.open_dataset(fp_fut_cc_notrend) + + # Although it is the same file, somewhere in the data reading process + # the longitude is tranformed to the standard [-180 to 180] and it is + # expected to be like that everywhere. + ds['lon'] = ds['lon'] - 360 + + # Operating with numpy arrays impose a fixed dimensions order + # Somehow this compute is required here. da = ds['rsds'].compute().transpose('lat', 'lon', 'time') - # Unfortunatelly, _get_factors() assume latitude as descending + + # The _get_factors() assume latitude as descending and it will + # silently return wrong values otherwise. da = da.sortby('lat', ascending=False) + latlon = np.stack( - xr.broadcast(da['lat'], da['lon'] - 360), axis=-1 - ).astype('float32') - for ii in range(4): - for jj in range(4): + xr.broadcast(da['lat'], da['lon']), axis=-1 + ) + # Confirm that dataset order is consistent + # Somewhere in pipeline latlon are downgraded to f32 + assert np.allclose(latlon.astype('float32'), FP_CC_LAT_LON) + + # Verify data alignment in comparison with expected for FP_CC + for ii in range(ds.lat.size): + for jj in range(ds.lon.size): np.allclose( - da.sel(lat=latlon[ii, jj, 0], method='nearest').sel( - lon=latlon[ii, jj, 1] + 360, method='nearest' - )[0], - da.data[ii, jj, 0], + da.sel(lat=latlon[ii, jj, 0]).sel( lon=latlon[ii, jj, 1]), + da.data[ii, jj], ) - assert np.allclose(latlon, FP_CC_LAT_LON) - return da.compute() + return da @pytest.fixture(scope='module') From 3f46c6957df50b287b6fb978fd0ab82eb6e03439 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 08:14:41 -0600 Subject: [PATCH 53/92] test, doc: Better explaing the expected effect of NaNs --- tests/bias/test_presrat_bias_correction.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index d8d5e2e94..42c696a1a 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -247,17 +247,18 @@ def test_zero_precipitation_rate_extremes(): def test_zero_precipitation_rate_nanonly(): """Zero rate estimate with only NaNs gives NaN""" f = ZeroRateMixin().zero_precipitation_rate - arr = np.arange(10) + arr = np.nan * np.zeros(10) # All NaN gives NaN rate - rate = f(np.nan * arr) + rate = f(arr) assert np.isnan(rate) def test_zero_precipitation_rate_nan(): """Zero rate estimate with NaNs - NaN shouldn't be counted to find the rate. + NaN shouldn't be counted to find the rate. Thus an array with NaNs should + give the same results if the NaN were removed before the calculation. """ f = ZeroRateMixin().zero_precipitation_rate arr = np.arange(10) From 0571a14a82d3e148336790ddec8715ca54ca2839 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 08:35:38 -0600 Subject: [PATCH 54/92] feat: PresRat.write_outputs() Recording it before the refactoring and integration with QDM. --- sup3r/bias/qdm.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 11ec18304..7eb8f9b34 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -17,6 +17,7 @@ sample_q_linear, sample_q_log, ) +from typing import Optional import xarray as xr from sup3r.preprocessing.data_handling.base import DataHandler @@ -784,3 +785,54 @@ def windowed_quantiles( for d0 in day_of_year ) return xr.merge(q) + + + def write_outputs(self, fp_out: str, + out: dict = None, + extra_attrs: Optional[dict] = None): + """Write outputs to an .h5 file. + + Parameters + ---------- + fp_out : str | None + An HDF5 filename to write the estimated statistical distributions. + out : dict, optional + A dictionary with the three statistical distribution parameters. + If not given, it uses :attr:`.out`. + extra_attrs: dict, optional + Extra attributes to be exported together with the dataset. + + Examples + -------- + >>> mycalc = PresRat(...) + >>> mycalc.write_outputs(fp_out="myfile.h5", out=mydictdataset, + ... extra_attrs={'zero_rate_threshold': 80}) + """ + + out = out or self.out + + if fp_out is not None: + if not os.path.exists(os.path.dirname(fp_out)): + os.makedirs(os.path.dirname(fp_out), exist_ok=True) + + with h5py.File(fp_out, 'w') as f: + # pylint: disable=E1136 + lat = self.bias_dh.lat_lon[..., 0] + lon = self.bias_dh.lat_lon[..., 1] + f.create_dataset('latitude', data=lat) + f.create_dataset('longitude', data=lon) + for dset, data in out.items(): + f.create_dataset(dset, data=data) + + for k, v in self.meta.items(): + f.attrs[k] = json.dumps(v) + f.attrs['dist'] = self.dist + f.attrs['sampling'] = self.sampling + f.attrs['log_base'] = self.log_base + f.attrs['base_fps'] = self.base_fps + f.attrs['bias_fps'] = self.bias_fps + f.attrs['bias_fut_fps'] = self.bias_fut_fps + if extra_attrs is not None: + for a, v in extra_attrs.items(): + f.attrs[a] = v + logger.info('Wrote quantiles to file: {}'.format(fp_out)) From d869c3106f33672f926a72a8b4566e19364f32ed Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 08:49:58 -0600 Subject: [PATCH 55/92] fix, doc: Weird string literal issue It looks like a bug in pylinter, but it's easier to just avoid it. --- tests/bias/test_presrat_bias_correction.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 42c696a1a..ed04f77cd 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -472,8 +472,9 @@ def test_presrat(fp_fut_cc): def test_presrat_transform(presrat_params, fut_cc): - """ - WIP: Confirm it runs, but don't verify anything yet. + """A standard run with local_presrat_bc + + WIP: Confirm it runs only. """ data = fut_cc.values time = pd.to_datetime(fut_cc.time) @@ -492,7 +493,7 @@ def test_presrat_transform(presrat_params, fut_cc): def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): """The correction should result in no changes at all - Note that there are a lot of implicit transformations, so we can't expect + Note that there are a lot of implicit transformations, so we cannot expect to be able to esily compare all gridpoints. The corrected output must be the same if: From 2fae27d1f68702cb7fc40c1626e304040b9651bb Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 09:25:11 -0600 Subject: [PATCH 56/92] style: Sorting imports --- sup3r/bias/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sup3r/bias/__init__.py b/sup3r/bias/__init__.py index c77171070..ac5ad9f34 100644 --- a/sup3r/bias/__init__.py +++ b/sup3r/bias/__init__.py @@ -2,11 +2,11 @@ from .bias_calc import (LinearCorrection, MonthlyLinearCorrection, MonthlyScalarCorrection, SkillAssessment) -from .qdm import PresRat, QuantileDeltaMappingCorrection from .bias_transforms import (apply_zero_precipitation_rate, global_linear_bc, local_linear_bc, local_qdm_bc, local_presrat_bc, monthly_local_linear_bc) +from .qdm import PresRat, QuantileDeltaMappingCorrection __all__ = [ "apply_zero_precipitation_rate", From 82a98140e59251865d7e29aec0d82051fb0d2f97 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 10:29:20 -0600 Subject: [PATCH 57/92] test, doc: More info in PresRat tests --- tests/bias/test_presrat_bias_correction.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index ed04f77cd..7429c4347 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -1,13 +1,16 @@ """Validating PresRat correction procedures -Relevant sources used in the tests: -- fp_fut_cc: Future dataset based on FP_CC + an offset + small noise -- fp_fut_cc_notrend: Future dataset identical to FP_CC -- presrat_params: Parameters of reference to test PresRat -- presrat_notrend_params: Bias historical is identical to bias reference - (historical). Thus, there is no trend in the model. -- presrat_identity_params: All distributions are identical (oh & mf) to mh, +Relevant resources used in the tests: +- FP_CC: Filename of standard biased dataset. +- fut_cc: Future dataset sample based on FP_CC + an offset + small noise +- fp_fut_cc: Filname to `fut_cc`. +- fut_cc_notrend: Future dataset identical to FP_CC, i.e. no trend. +- fp_fut_cc_notrend: Filename to fut_cc_notrend. +- presrat_params: Parameters of reference to test PresRat (using fp_fut_cc). +- presrat_notrend_params: Quantiles of future (mf) are identical to bias + reference (mh). Thus, there is no trend in the model. +- presrat_identity_params: All distributions (oh & mf) are identical to mh, i.e. observations equal to model that doesn't change on time. - presrat_nochanges_params: Like presrat_identity_params, but also all zero_rate are zeros, i.e. no values should be forced to be zero. @@ -293,7 +296,7 @@ def test_zero_precipitation_rate_nan(): def test_parallel(fp_fut_cc, threshold): """Running in parallel must not alter results - Check with different thresholds that will result in different zero rates. + Check with different thresholds, which will result in different zero rates. """ s = PresRat( FP_NSRDB, From 8b402eaeede271e07e436b8e1099d071cd6eab2f Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 10:41:51 -0600 Subject: [PATCH 58/92] test: Improving test_presrat_calc() Checking more details in the output. --- tests/bias/test_presrat_bias_correction.py | 28 +++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 7429c4347..ac81ac910 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -39,6 +39,9 @@ FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') FP_CC_LAT_LON = DataHandlerNC(FP_CC, 'rsds').lat_lon +# A reference zero rate threshold. This might change in the future to result +# in edge cases. +ZR_THRESHOLD = 80 with xr.open_dataset(FP_CC) as fh: MIN_LAT = np.min(fh.lat.values.astype(np.float32)) @@ -331,11 +334,12 @@ def test_parallel(fp_fut_cc, threshold): ), f'Different results for {k}' -@pytest.mark.parametrize('threshold', [0, 50, 1e6]) -def test_presrat_zero_rate(fp_fut_cc, threshold): - """Estimate zero_rate within PresRat.run() +def test_presrat_calc(fp_fut_cc): + """Standard PresRat (pre) calculation + + Estimate the required parameters with a standard setup. - Use thresholds that gives 0%, 100%, and something between. + WIP: Just confirm it runs, but not checking much yet. """ calc = PresRat( FP_NSRDB, @@ -348,15 +352,21 @@ def test_presrat_zero_rate(fp_fut_cc, threshold): bias_handler='DataHandlerNCforCC', ) - out = calc.run(zero_rate_threshold=threshold) + out = calc.run(zero_rate_threshold=ZR_THRESHOLD) + + expected_vars = ['bias_rsds_params', 'bias_fut_rsds_params', + 'base_ghi_params', 'ghi_zero_rate', 'rsds_mean_mh', + 'rsds_mean_mf'] + sref = FP_CC_LAT_LON.shape[:2] + for v in expected_vars: + assert v in out, f"Missing {v} in the calculated output" + assert out[v].shape[:2] == sref, "Doesn't match expected spatial shape" + # This is only true because fill and extend are applied by default. + assert np.all(np.isfinite(out[v])), f"Unexpected NaN for {v}" - assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' - if threshold == 0: - assert np.all(zero_rate >= 0), 'It should be rate 0 for threshold==0' """ calc = PresRat( From 9dd55e9093a6be0accf9a05c25add1f01b011cba Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 11:18:11 -0600 Subject: [PATCH 59/92] test, refact: Combining tests --- tests/bias/test_presrat_bias_correction.py | 40 ++++++++-------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index ac81ac910..9136227ed 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -368,31 +368,14 @@ def test_presrat_calc(fp_fut_cc): assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' - """ - calc = PresRat( - FP_NSRDB, - FP_CC, - fp_fut_cc, - 'ghi', - 'rsds', - target=TARGET, - shape=SHAPE, - bias_handler='DataHandlerNCforCC', - ) - - # Physically non sense threshold. - out = calc.run(zero_rate_threshold=0) - - assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' - zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' - assert np.all(zero_rate == 0), 'Threshold=0, rate should be 0' - +@pytest.mark.parametrize('threshold', [0, 50, 1e6]) +def test_presrat_zero_rate(fp_fut_cc, threshold): + """Estimate zero_rate within PresRat.run() -def test_presrat_zero_rate_threshold_1e9(fp_fut_cc): - """Estimate zero_rate within PresRat.run(), zero threshold + Use thresholds that gives 0%, 100%, and something between. - This should give a zero rate answer, since all values are lower. + Notes: + - Rate should be zero if threshold is zero for this dataset. """ calc = PresRat( FP_NSRDB, @@ -405,13 +388,18 @@ def test_presrat_zero_rate_threshold_1e9(fp_fut_cc): bias_handler='DataHandlerNCforCC', ) - # Physically non sense threshold. - out = calc.run(zero_rate_threshold=1e9) + out = calc.run(zero_rate_threshold=threshold) assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' zero_rate = out['ghi_zero_rate'] + # True for this dataset because fill and extend are applied by default. assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' - assert np.all(zero_rate == 1), 'Threshold=0, rate should be 0' + assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' + + if threshold <= 0: + assert np.all(zero_rate == 0), 'It should be rate 0 for threshold==0' + elif threshold >= 1e4: + assert np.all(zero_rate == 1), 'It should be rate 1 for threshold>=1e4' def test_apply_zero_precipitation_rate(): From 5a439ac5506c52119c90ba5c8ac3d2e1357744ff Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 11:22:23 -0600 Subject: [PATCH 60/92] style: Code formating --- sup3r/bias/qdm.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 7eb8f9b34..b05702150 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -192,7 +192,8 @@ class to be retrieved from the rex/sup3r library. If a base_handler_kwargs=base_handler_kwargs, bias_handler_kwargs=bias_handler_kwargs, decimals=decimals, - match_zero_rate=match_zero_rate) + match_zero_rate=match_zero_rate, + ) self.bias_fut_fps = bias_fut_fps @@ -203,7 +204,8 @@ class to be retrieved from the rex/sup3r library. If a target=self.target, shape=self.shape, val_split=0.0, - **self.bias_handler_kwargs) + **self.bias_handler_kwargs, + ) def _init_out(self): """Initialize output arrays `self.out` @@ -377,7 +379,8 @@ def run(self, daily_reduction='avg', fill_extend=True, smooth_extend=0, - smooth_interior=0): + smooth_interior=0, + ): """Estimate the statistical distributions for each location Parameters From 7ebbe877f0035b851df44cc766da5c911b19bcef Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 11:48:31 -0600 Subject: [PATCH 61/92] test, refact: Conforming normalizing dataset sup3r expects data in a certain way and implicitly apply some transformations, so it is required to conform with that to be able to compare answers. --- tests/bias/test_presrat_bias_correction.py | 40 ++++++++++++---------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 9136227ed..a6729a389 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -85,29 +85,33 @@ def fut_cc(fp_fut_cc): axis=-1).astype('float32') """ ds = xr.open_dataset(fp_fut_cc) - # This compute here is required. + + # Operating with numpy arrays impose a fixed dimensions order + # This compute is required here. da = ds['rsds'].compute().transpose('lat', 'lon', 'time') - # Unfortunatelly, _get_factors() assume latitude as descending + + # The _get_factors() assume latitude as descending and it will + # silently return wrong values otherwise. da = da.sortby('lat', ascending=False) - # data = da.data - # time = pd.to_datetime(da.time) + latlon = np.stack( xr.broadcast(da['lat'], da['lon'] - 360), axis=-1 - ).astype('float32') - # latlon = np.stack(np.meshgrid(da['lon'] - 360, da['lat']), axis=-1)[ - # :, :, ::-1 - # ].astype('float32') - for ii in range(4): - for jj in range(4): + ) + # Confirm that dataset order is consistent + # Somewhere in pipeline latlon are downgraded to f32 + assert np.allclose(latlon.astype('float32'), FP_CC_LAT_LON) + + # Verify data alignment in comparison with expected for FP_CC + for ii in range(ds.lat.size): + for jj in range(ds.lon.size): assert np.allclose( - da.sel(lat=latlon[ii, jj, 0], method='nearest').sel( - lon=latlon[ii, jj, 1] + 360, method='nearest' - )[0], - da.data[ii, jj, 0], + da.sel(lat=latlon[ii, jj, 0]).sel( + lon=latlon[ii, jj, 1] + 360 + ), + da.data[ii, jj], ) - assert np.allclose(latlon, FP_CC_LAT_LON) - return da.compute() + return da @pytest.fixture(scope='module') @@ -139,7 +143,7 @@ def fut_cc_notrend(fp_fut_cc_notrend): ds['lon'] = ds['lon'] - 360 # Operating with numpy arrays impose a fixed dimensions order - # Somehow this compute is required here. + # This compute is required here. da = ds['rsds'].compute().transpose('lat', 'lon', 'time') # The _get_factors() assume latitude as descending and it will @@ -157,7 +161,7 @@ def fut_cc_notrend(fp_fut_cc_notrend): for ii in range(ds.lat.size): for jj in range(ds.lon.size): np.allclose( - da.sel(lat=latlon[ii, jj, 0]).sel( lon=latlon[ii, jj, 1]), + da.sel(lat=latlon[ii, jj, 0]).sel(lon=latlon[ii, jj, 1]), da.data[ii, jj], ) From 0657edcb0c3bdcc5c629db6b5215484f9f302b15 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 12:47:27 -0600 Subject: [PATCH 62/92] style: Missing space --- tests/bias/test_presrat_bias_correction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index a6729a389..304f34b15 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -528,7 +528,7 @@ def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): # that can't be easily reproduced for validation. One solution is to # allow the implicit interpolation but compare only where non-interpolated # values are available. Let's call it the 'Magic index'. - idx = (slice(1,3), slice(0,3)) + idx = (slice(1, 3), slice(0, 3)) assert np.allclose( data[idx], corrected[idx], equal_nan=False ), "This case shouldn't modify the data" From a98838a7a2a6718a10c610f9da15e339292be4d7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 13:54:02 -0600 Subject: [PATCH 63/92] fix, feat: Missing to include presrat_notrend_params() --- tests/bias/test_presrat_bias_correction.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 304f34b15..8a7ce30ee 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -198,16 +198,20 @@ def presrat_params(tmpdir_factory, fp_fut_cc): @pytest.fixture(scope='module') -def presrat_params(tmpdir_factory, fp_fut_cc): - """PresRat parameters for standard datasets +def presrat_notrend_params(tmpdir_factory, fp_fut_cc_notrend): + """No change in time - Use the standard datasets to estimate the distributions and save - in a temporary place to be re-used + The bias_fut distribution is equal to bias (mod_his), so no change in + time. + + We could save some overhead here copying fp_fut_cc and replacing some + values there. That was done before but missed some variables resulting + in errors. """ calc = PresRat( FP_NSRDB, FP_CC, - fp_fut_cc, + fp_fut_cc_notrend, 'ghi', 'rsds', target=TARGET, @@ -215,10 +219,8 @@ def presrat_params(tmpdir_factory, fp_fut_cc): distance_upper_bound=0.7, bias_handler='DataHandlerNCforCC', ) - fn = tmpdir_factory.mktemp('params').join('presrat.h5') - # Physically non-sense threshold choosed to result in gridpoints with and - # without zero rate correction for the given testing dataset. - _ = calc.run(zero_rate_threshold=80, fp_out=fn) + fn = tmpdir_factory.mktemp('params').join('presrat_notrend.h5') + _ = calc.run(zero_rate_threshold=ZR_THRESHOLD, fp_out=fn) # DataHandlerNCforCC requires a string fn = str(fn) From de068e54602dc507ceaf3d41930159c8e877e567 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 13:59:07 -0600 Subject: [PATCH 64/92] style: Unused import --- tests/bias/test_presrat_bias_correction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 8a7ce30ee..da8e7e875 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -21,7 +21,6 @@ import os import shutil -import h5py import numpy as np import pandas as pd import pytest From b87c7472b4690e83e016694fa6ca97c0a8317324 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 14:40:32 -0600 Subject: [PATCH 65/92] test, fix: Missing a few fixtures Figuring out the minimum requirements to run. --- tests/bias/test_presrat_bias_correction.py | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index da8e7e875..025790c09 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -21,6 +21,7 @@ import os import shutil +import h5py import numpy as np import pandas as pd import pytest @@ -227,6 +228,63 @@ def presrat_notrend_params(tmpdir_factory, fp_fut_cc_notrend): return fn +@pytest.fixture(scope='module') +def presrat_identity_params(tmpdir_factory, presrat_params): + """Identical distribution""" + + fn = tmpdir_factory.mktemp('params').join('presrat_identity.h5') + shutil.copyfile(presrat_params, fn) + + with h5py.File(fn, 'r+') as f: + f['bias_rsds_params'][:] = f['bias_fut_rsds_params'][:] + f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] + f.flush() + + return str(fn) + + +@pytest.fixture(scope='module') +def presrat_nochanges_params(tmpdir_factory, presrat_params): + """Identical distribution and no zero rate + + All distributions are identical and zero rate changes are all zero, + therefore, the PresRat correction should not change anything. + + Note that distributions are based on bias_fut, so it is assumed that the + test cases will be datasets coherent with that bias_fut distribution, + otherwise it could lead to differences if out of that scale. + """ + fn = tmpdir_factory.mktemp('params').join('presrat_nochanges.h5') + shutil.copyfile(presrat_params, fn) + + with h5py.File(fn, 'r+') as f: + f['bias_fut_rsds_params'][:] = f['bias_rsds_params'][:] + f['base_ghi_params'][:] = f['bias_rsds_params'][:] + f['ghi_zero_rate'][:] *= 0 + f['rsds_mean_mf'][:] = f['rsds_mean_mh'][:] + f.flush() + + return str(fn) + + +@pytest.fixture(scope='module') +def presrat_nozeros_params(tmpdir_factory, presrat_params): + """PresRat parameters without zero rate correction + + The same standard parameters but all zero_rate values are equal to zero, + which means that zero percent, i.e. none, of the values should be forced + to be zero. + """ + fn = tmpdir_factory.mktemp('params').join('presrat_nozeros.h5') + shutil.copyfile(presrat_params, fn) + + with h5py.File(fn, 'r+') as f: + f['ghi_zero_rate'][:] *= 0 + f.flush() + + return str(fn) + + def test_zero_precipitation_rate(): """Zero rate estimate with extremme thresholds""" f = ZeroRateMixin().zero_precipitation_rate From fa32ff89ec903a90757754806fc7f2b6874496f1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 14:56:38 -0600 Subject: [PATCH 66/92] doc: Missing documentation on testing functions --- tests/bias/test_presrat_bias_correction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 025790c09..a1ad72ed2 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -466,6 +466,7 @@ def test_presrat_zero_rate(fp_fut_cc, threshold): def test_apply_zero_precipitation_rate(): + """Reinforce the zero precipitation rate, standard run""" data = np.array([[[5, 0.1, 3, 0.2, 1]]]) out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) @@ -473,6 +474,7 @@ def test_apply_zero_precipitation_rate(): def test_apply_zero_precipitation_rate_nan(): + """Validate with NaN in the input""" data = np.array([[[5, 0.1, np.nan, 0.2, 1]]]) out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) @@ -480,6 +482,7 @@ def test_apply_zero_precipitation_rate_nan(): def test_apply_zero_precipitation_rate_2D(): + """Validate a 2D input""" data = np.array( [ [ From c9169c4464a132e91a54f6a3ae232cb769f1fde7 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 14:58:04 -0600 Subject: [PATCH 67/92] test: Global zero rate threshold (ZR_THRESHOLD) --- tests/bias/test_presrat_bias_correction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index a1ad72ed2..2357ededc 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -39,8 +39,8 @@ FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') FP_CC_LAT_LON = DataHandlerNC(FP_CC, 'rsds').lat_lon -# A reference zero rate threshold. This might change in the future to result -# in edge cases. +# A reference zero rate threshold that might not make sense physically but for +# testing purposes only. This might change in the future to force edge cases. ZR_THRESHOLD = 80 with xr.open_dataset(FP_CC) as fh: @@ -189,7 +189,7 @@ def presrat_params(tmpdir_factory, fp_fut_cc): fn = tmpdir_factory.mktemp('params').join('presrat.h5') # Physically non-sense threshold choosed to result in gridpoints with and # without zero rate correction for the given testing dataset. - _ = calc.run(max_workers=1, zero_rate_threshold=80, fp_out=fn) + _ = calc.run(max_workers=1, zero_rate_threshold=ZR_THRESHOLD, fp_out=fn) # DataHandlerNCforCC requires a string fn = str(fn) From b5f6d94168cf667f9973c44d446212b13f7a7002 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 14:58:57 -0600 Subject: [PATCH 68/92] clean: Removing ideas for later --- tests/bias/test_presrat_bias_correction.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 2357ededc..e5b38d522 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -337,27 +337,6 @@ def test_zero_precipitation_rate_nan(): assert r1 == r2 -""" - breakpoint() - - # Physically non sense threshold. - out = calc.run(zero_rate_threshold=0) - - assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' - zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" - assert np.all(zero_rate==0), "It should be all zero percent" - - # Physically non sense threshold. - out = calc.run(zero_rate_threshold=1e6) - - assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' - zero_rate = out['ghi_zero_rate'] - assert np.all(np.isfinite(zero_rate)), "Unexpected NaN for ghi_zero_rate" - assert np.all(zero_rate==1), "It should be all zero percent" -""" - - @pytest.mark.parametrize('threshold', [0, 50, 1e6]) def test_parallel(fp_fut_cc, threshold): """Running in parallel must not alter results From 9647c530f76a372db69e22fd91abb27e238d531d Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 24 Jun 2024 20:50:45 -0600 Subject: [PATCH 69/92] Bump Python to 3.11.9 It should work fine with any Python 3.11. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1920c4b53..67832510e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -253,7 +253,7 @@ channels = ["conda-forge", "anaconda", "main"] platforms = ["osx-arm64", "linux-64"] [tool.pixi.dependencies] -python = "==3.11" +python = "~=3.11.0" cftime = ">=1.6.2" dask = ">=2022.0" h5netcdf = ">=1.1.0" From 66376f253ba0f003f76fc233c860c739dcf733f5 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Wed, 26 Jun 2024 16:01:44 -0600 Subject: [PATCH 70/92] style: Conforming with sup3r style --- sup3r/bias/qdm.py | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index b05702150..ac757e2f1 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -364,14 +364,13 @@ def write_outputs(self, fp_out, out=None): for k, v in self.meta.items(): f.attrs[k] = json.dumps(v) - f.attrs["dist"] = self.dist - f.attrs["sampling"] = self.sampling - f.attrs["log_base"] = self.log_base - f.attrs["base_fps"] = self.base_fps - f.attrs["bias_fps"] = self.bias_fps - f.attrs["bias_fut_fps"] = self.bias_fut_fps - logger.info( - 'Wrote quantiles to file: {}'.format(fp_out)) + f.attrs['dist'] = self.dist + f.attrs['sampling'] = self.sampling + f.attrs['log_base'] = self.log_base + f.attrs['base_fps'] = self.base_fps + f.attrs['bias_fps'] = self.bias_fps + f.attrs['bias_fut_fps'] = self.bias_fut_fps + logger.info('Wrote quantiles to file: {}'.format(fp_out)) def run(self, fp_out=None, @@ -528,10 +527,16 @@ def _init_out(self): super()._init_out() shape = (*self.bias_gid_raster.shape, 1) - self.out[f'{self.base_dset}_zero_rate'] = np.full(shape, np.nan, np.float32) + self.out[f'{self.base_dset}_zero_rate'] = np.full(shape, + np.nan, + np.float32) shape = (*self.bias_gid_raster.shape, 12) - self.out[f'{self.bias_feature}_mean_mh'] = np.full(shape, np.nan, np.float32) - self.out[f'{self.bias_feature}_mean_mf'] = np.full(shape, np.nan, np.float32) + self.out[f'{self.bias_feature}_mean_mh'] = np.full(shape, + np.nan, + np.float32) + self.out[f'{self.bias_feature}_mean_mf'] = np.full(shape, + np.nan, + np.float32) # pylint: disable=W0613 @classmethod @@ -595,8 +600,8 @@ def _run_single( mh = np.full(12, np.nan, np.float32) mf = np.full(12, np.nan, np.float32) for m in range(12): - mh[m] = bias_data[bias_ti.month==(m + 1)].mean() - mf[m] = bias_fut_data[bias_fut_ti.month==(m + 1)].mean() + mh[m] = bias_data[bias_ti.month == (m + 1)].mean() + mf[m] = bias_fut_data[bias_fut_ti.month == (m + 1)].mean() out[f'{bias_feature}_mean_mh'] = mh out[f'{bias_feature}_mean_mf'] = mf @@ -680,8 +685,8 @@ def run( log_base=self.log_base, base_dh_inst=self.base_dh, zero_rate_threshold=zero_rate_threshold, - bias_ti = self.bias_fut_dh.time_index, - bias_fut_ti = self.bias_fut_dh.time_index, + bias_ti=self.bias_fut_dh.time_index, + bias_fut_ti=self.bias_fut_dh.time_index, ) for key, arr in single_out.items(): self.out[key][raster_loc] = arr @@ -725,8 +730,8 @@ def run( n_samples=self.n_quantiles, log_base=self.log_base, zero_rate_threshold=zero_rate_threshold, - bias_ti = self.bias_fut_dh.time_index, - bias_fut_ti = self.bias_fut_dh.time_index, + bias_ti=self.bias_fut_dh.time_index, + bias_fut_ti=self.bias_fut_dh.time_index, ) futures[future] = raster_loc @@ -749,7 +754,11 @@ def run( ) self.zero_rate_threshold = zero_rate_threshold - self.write_outputs(fp_out, self.out, extra_attrs={'zero_rate_threshold': zero_rate_threshold}) + extra_attrs = {'zero_rate_threshold': zero_rate_threshold} + self.write_outputs(fp_out, + self.out, + extra_attrs=extra_attrs, + ) return copy.deepcopy(self.out) From bd026d11d58edc5468fe9b244b06fca255cbdfb2 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 13:04:49 -0600 Subject: [PATCH 71/92] style: UP039 --- sup3r/bias/mixins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index be225b4d1..f047b6875 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -94,7 +94,7 @@ def fill_and_smooth(self, return out -class ZeroRateMixin(): +class ZeroRateMixin: """Estimate zero rate From c35f911b74b5c12da2b8c97c7a6191c67b091431 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 13:10:17 -0600 Subject: [PATCH 72/92] fix: Wrong type Thanks to @bnb32 for noticing that! --- sup3r/bias/bias_transforms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index 189de0cff..1010ab73e 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -670,8 +670,8 @@ def get_spatial_bc_presrat(lat_lon: np.array, return params, cfg -def local_presrat_bc(data: np.array, - time: np.array, +def local_presrat_bc(data: np.ndarray, + time: np.ndarray, lat_lon: np.array, base_dset: str, feature_name: str, From 6679323f27b8e479a23296d61d8061c666be5fd0 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 13:18:19 -0600 Subject: [PATCH 73/92] doc: local_presrat_bc() Initializing documentation but more work to be done here. --- sup3r/bias/bias_transforms.py | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index 1010ab73e..a894b42b8 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -680,6 +680,45 @@ def local_presrat_bc(data: np.ndarray, threshold=0.1, relative=True, no_trend=False): + """Bias correction using PresRat + + Parameters + ---------- + data : np.ndarray + Sup3r input data to be bias corrected, assumed to be 3D with shape + (spatial, spatial, temporal) for a single feature. + lat_lon : ndarray + Array of latitudes and longitudes for the domain to bias correct + (n_lats, n_lons, 2) + base_dset : + Name of feature that is used as (historical) reference. Dataset with + names "base_{base_dset}_params" will be retrieved. + feature_name : str + Name of feature that is being corrected. Datasets with names + "bias_{feature_name}_params" and "bias_fut_{feature_name}_params" will + be retrieved. + bias_fp : str + Filepath to statistical distributions file from the bias calc module. + Must have datasets "bias_{feature_name}_params", + "bias_fut_{feature_name}_params", and "base_{base_dset}_params" that + are the parameters to define the statistical distributions to be used + to correct the given `data`. + lr_padded_slice : tuple | None + Tuple of length four that slices (spatial_1, spatial_2, temporal, + features) where each tuple entry is a slice object for that axes. + Note that if this method is called as part of a sup3r forward pass, the + lr_padded_slice will be included automatically in the kwargs for the + active chunk. If this is None, no slicing will be done and the full + bias correction source shape will be used. + no_trend: bool, default=False + An option to ignore the trend component of the correction, thus + resulting in an ordinary Quantile Mapping, i.e. corrects the bias by + comparing the distributions of the biased dataset with a reference + datasets. See + ``params_mf`` of :class:`rex.utilities.bc_utils.QuantileDeltaMapping`. + Note that this assumes that params_mh is the data distribution + representative for the target data. + """ params, cfg = get_spatial_bc_presrat(lat_lon, base_dset, From d6ffc84d4c005e24bf83516c35a57daedca7b695 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 13:20:37 -0600 Subject: [PATCH 74/92] style: E741 - ambiguous-variable-name --- sup3r/bias/bias_transforms.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index a894b42b8..e96890351 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -556,9 +556,8 @@ def apply_zero_precipitation_rate(arr: np.ndarray, rate): assert rate.ndim >= 2 assert arr.shape[:2] == rate.shape[:2] - I, J = rate.shape[:2] - for i in range(I): - for j in range(J): + for i in range(rate.shape[0]): + for j in range(rate.shape[1]): r = rate[i, j, 0] if np.isfinite(r): a = arr[i, j] From 5822e3a998bcbf15dd7374af6f47e2786b81fa87 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 13:21:34 -0600 Subject: [PATCH 75/92] cfg: Adding UP039 as fixable Simple enough to have now doubts, so take advantage of `--fix`. --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 67832510e..8120bce2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,9 @@ exclude = [ ] [tool.ruff.lint] -fixable = [] +fixable = [ + "UP039", # unnecessary-class-parentheses + ] # preview = true # logger-objects = [] task-tags = ["TODO", "FIXME", "XXX"] From ec261012b31a250cb62c562a662987765ec71930 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 13:45:05 -0600 Subject: [PATCH 76/92] style: Fixing notes to conform with numpydoc --- tests/bias/test_presrat_bias_correction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index e5b38d522..7e52fb801 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -416,7 +416,8 @@ def test_presrat_zero_rate(fp_fut_cc, threshold): Use thresholds that gives 0%, 100%, and something between. - Notes: + Notes + ----- - Rate should be zero if threshold is zero for this dataset. """ calc = PresRat( From b1ef3b9a5c11f23d8694b6585aed367a391e0218 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 14:36:49 -0600 Subject: [PATCH 77/92] doc: Adding TODO label As suggested by @bnb32. --- sup3r/bias/qdm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index ac757e2f1..bd3e6c303 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -595,8 +595,8 @@ def _run_single( # Let's save the means for mh and mf instead of the `x` ratio. It # seems that we should be able to simplify the mh component from # the `K` coefficient. - # For now it is monthly but later it will be modified for a generic - # time window. + # TODO: For now it is monthly but later it will be modified to a + # generic time window. mh = np.full(12, np.nan, np.float32) mf = np.full(12, np.nan, np.float32) for m in range(12): From 8a5d7b7010c676c7d0e76b39c688e3015d0899c8 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 1 Jul 2024 14:46:52 -0600 Subject: [PATCH 78/92] style: missing-trailing-comma --- sup3r/bias/qdm.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index bd3e6c303..0c84b0700 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -247,7 +247,8 @@ def _run_single(cls, base_handler, daily_reduction=daily_reduction, decimals=decimals, - base_dh_inst=base_dh_inst) + base_dh_inst=base_dh_inst, + ) out = cls.get_qdm_params(bias_data, bias_fut_data, @@ -256,7 +257,9 @@ def _run_single(cls, base_dset, sampling, n_samples, - log_base) + log_base, + ) + return out @staticmethod @@ -267,7 +270,8 @@ def get_qdm_params(bias_data, base_dset, sampling, n_samples, - log_base): + log_base, + ): """Get quantiles' cut point for given datasets Estimate the quantiles' cut points for each of the three given @@ -323,7 +327,8 @@ def get_qdm_params(bias_data, quantiles = sample_q_invlog(n_samples, log_base) else: msg = ('sampling option must be linear, log, or invlog, but ' - 'received: {}'.format(sampling)) + 'received: {}'.format(sampling) + ) logger.error(msg) raise KeyError(msg) @@ -331,7 +336,8 @@ def get_qdm_params(bias_data, f'bias_{bias_feature}_params': np.quantile(bias_data, quantiles), f'bias_fut_{bias_feature}_params': np.quantile(bias_fut_data, quantiles), - f'base_{base_dset}_params': np.quantile(base_data, quantiles)} + f'base_{base_dset}_params': np.quantile(base_data, quantiles), + } return out From a47595ce12424a2f1a8d2d65a5b3fe0eebf0827c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 2 Jul 2024 10:27:16 -0600 Subject: [PATCH 79/92] style: Conforming with sup3r coding style --- sup3r/bias/qdm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 0c84b0700..31eb7712a 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -428,8 +428,8 @@ def run(self, if not base_gid.any(): self.bad_bias_gids.append(bias_gid) - logger.debug(f"No base data for bias_gid: {bias_gid}. " - "Adding it to bad_bias_gids") + logger.debug(f'No base data for bias_gid: {bias_gid}. ' + 'Adding it to bad_bias_gids') else: bias_data = self.get_bias_data(bias_gid) bias_fut_data = self.get_bias_data(bias_gid, From cd5020c0327dd733bbe6238f436ebf6885805659 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 2 Jul 2024 13:57:58 -0600 Subject: [PATCH 80/92] fix: Wrong type --- sup3r/bias/mixins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index f047b6875..75f5bb6d2 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -118,7 +118,7 @@ def zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): Parameters ---------- - arr : np.array + arr : np.ndarray An array of values to be analyzed. Usually precipitation but it could be applied to other quantities. threshold : float From a87527cc4ea03970924804f1dbd75058da46acbe Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 2 Jul 2024 13:58:58 -0600 Subject: [PATCH 81/92] fix: Renaming variable to conform with requirements from forward pass --- sup3r/bias/bias_transforms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index e96890351..06d483ae7 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -670,8 +670,8 @@ def get_spatial_bc_presrat(lat_lon: np.array, def local_presrat_bc(data: np.ndarray, - time: np.ndarray, - lat_lon: np.array, + time_index: np.ndarray, + lat_lon: np.ndarray, base_dset: str, feature_name: str, bias_fp, @@ -758,7 +758,7 @@ def local_presrat_bc(data: np.ndarray, tmp = apply_zero_precipitation_rate(tmp, params["base_zero_rate"]) - month = time.month + month = time_index.month for m in range(12): idx = month == m + 1 x_hat = tmp[:, :, idx].mean(axis=-1) From d8ed99c194fec7a269e264695e10b444356ce43f Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 2 Jul 2024 14:00:59 -0600 Subject: [PATCH 82/92] cfg: Adding E303 as auto fix --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 8120bce2d..8dcf3484f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,7 @@ exclude = [ [tool.ruff.lint] fixable = [ + "E303", # too-many-blank-lines "UP039", # unnecessary-class-parentheses ] # preview = true From 3a6c500e5dbd9a12e55edd31c66b76779b29d3e1 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Tue, 2 Jul 2024 14:15:57 -0600 Subject: [PATCH 83/92] test: Prototype of foward pass test --- tests/bias/test_presrat_bias_correction.py | 86 +++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 7e52fb801..d084ce930 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -27,7 +27,9 @@ import pytest import xarray as xr -from sup3r import TEST_DATA_DIR +from sup3r import CONFIG_DIR, TEST_DATA_DIR +from sup3r.models import Sup3rGan +from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy from sup3r.bias import ( apply_zero_precipitation_rate, local_presrat_bc, @@ -605,3 +607,85 @@ def test_presrat_transform_nozerochanges(presrat_nozeros_params, fut_cc): assert not ( (data != 0) & (corrected == 0) ).any(), 'Unexpected value corrected (zero_rate) to zero (dry day)' + + +def precip_sample(tmpdir_factory): + t_start = np.datetime64('2015-01-01') + t_end = np.datetime64('2015-01-20') + nt = 20 + + lat = np.linspace(13.66, 31.57, 20) + long = np.linspace(125.0, 148.75, 20) + t0 = np.datetime64('2015-01-01') + time = t0 + np.linspace(0, 19, 20, dtype='timedelta64[D]') + + ds = xr.Dataset() + + +def test_fwp_integration(tmp_path, fp_fut_cc, presrat_params): + """Integration of the PresRat correction method into the forward pass + + Validate two aspects: + - We should be able to run a forward pass with unbiased data. + - The bias trend should be observed in the predicted output. + """ + fp_gen = os.path.join(CONFIG_DIR, 'spatiotemporal/gen_3x_4x_2f.json') + fp_disc = os.path.join(CONFIG_DIR, 'spatiotemporal/disc.json') + features = ['rsds'] + target = (39.0, -104.5) + # shape = (8, 8) + shape = (2, 2) + temporal_slice = slice(None, None, 1) + fwp_chunk_shape = (4, 4, 150) + input_files = [fp_fut_cc] + + Sup3rGan.seed() + model = Sup3rGan(fp_gen, fp_disc, learning_rate=1e-4) + _ = model.generate(np.ones((4, 10, 10, 6, len(features)))) + model.meta['lr_features'] = features + model.meta['hr_out_features'] = features + model.meta['s_enhance'] = 3 + model.meta['t_enhance'] = 4 + + out_dir = os.path.join(tmp_path, 'st_gan') + model.save(out_dir) + + bias_correct_kwargs = { + 'rsds': {'feature_name': 'rsds', + 'base_dset': 'ghi', + 'bias_fp': presrat_params}} + + strat = ForwardPassStrategy( + input_files, + model_kwargs={'model_dir': out_dir}, + fwp_chunk_shape=fwp_chunk_shape, + spatial_pad=0, temporal_pad=0, + input_handler_kwargs=dict(target=target, shape=shape, + temporal_slice=temporal_slice, + worker_kwargs=dict(max_workers=1)), + out_pattern=os.path.join(tmp_path, 'out_{file_id}.nc'), + worker_kwargs=dict(max_workers=1), + input_handler='DataHandlerNCforCC', + ) + bc_strat = ForwardPassStrategy( + input_files, + model_kwargs={'model_dir': out_dir}, + fwp_chunk_shape=fwp_chunk_shape, + spatial_pad=0, temporal_pad=0, + input_handler_kwargs=dict(target=target, shape=shape, + temporal_slice=temporal_slice, + worker_kwargs=dict(max_workers=1)), + out_pattern=os.path.join(tmp_path, 'out_{file_id}.nc'), + worker_kwargs=dict(max_workers=1), + input_handler='DataHandlerNCforCC', + bias_correct_method='local_presrat_bc', + bias_correct_kwargs=bias_correct_kwargs, + ) + + for ichunk in range(strat.chunks): + fwp = ForwardPass(strat, chunk_index=ichunk) + bc_fwp = ForwardPass(bc_strat, chunk_index=ichunk) + + delta = bc_fwp.input_data - fwp.input_data + + delta = bc_fwp.run_chunk() - fwp.run_chunk() From 46740f514d6748348c7e19cb991d33e277b56e23 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 09:21:20 -0600 Subject: [PATCH 84/92] test, refact: test_parallel working with new test dataset --- tests/bias/test_presrat_bias_correction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index d084ce930..95bb326da 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -340,15 +340,15 @@ def test_zero_precipitation_rate_nan(): @pytest.mark.parametrize('threshold', [0, 50, 1e6]) -def test_parallel(fp_fut_cc, threshold): +def test_parallel(fp_precip, fp_precip_fut, threshold): """Running in parallel must not alter results Check with different thresholds, which will result in different zero rates. """ s = PresRat( FP_NSRDB, - FP_CC, - fp_fut_cc, + fp_precip, + fp_precip_fut, 'ghi', 'rsds', target=TARGET, @@ -360,8 +360,8 @@ def test_parallel(fp_fut_cc, threshold): p = PresRat( FP_NSRDB, - FP_CC, - fp_fut_cc, + fp_precip, + fp_precip_fut, 'ghi', 'rsds', target=TARGET, From ed72336a65b2b72d3e2fecfdcc4b5e4d1c629b68 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 09:24:00 -0600 Subject: [PATCH 85/92] test, reafct: test_presrat_calc() with new testing dataset --- tests/bias/test_presrat_bias_correction.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 95bb326da..d2d4bd6be 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -378,7 +378,7 @@ def test_parallel(fp_precip, fp_precip_fut, threshold): ), f'Different results for {k}' -def test_presrat_calc(fp_fut_cc): +def test_presrat_calc(fp_precip, fp_precip_fut): """Standard PresRat (pre) calculation Estimate the required parameters with a standard setup. @@ -387,8 +387,8 @@ def test_presrat_calc(fp_fut_cc): """ calc = PresRat( FP_NSRDB, - FP_CC, - fp_fut_cc, + fp_precip, + fp_precip_fut, 'ghi', 'rsds', target=TARGET, @@ -535,7 +535,8 @@ def test_presrat_transform(presrat_params, fut_cc): data, time, latlon, 'ghi', 'rsds', presrat_params ) - assert not np.isnan(corrected).all(), "Can't compare if only NaN" + assert np.isfinite(corrected).any(), "Can't compare if only NaN" + # Confirm that there were changes, but at this point stop there. assert not np.allclose(data, corrected, equal_nan=False) From c1cb1e7eabb7191e08c5ad042b36115319366c9c Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 09:29:22 -0600 Subject: [PATCH 86/92] test, refact: test_presrat_zero_rate() using new sample test --- tests/bias/test_presrat_bias_correction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index d2d4bd6be..2a76215af 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -413,7 +413,7 @@ def test_presrat_calc(fp_precip, fp_precip_fut): @pytest.mark.parametrize('threshold', [0, 50, 1e6]) -def test_presrat_zero_rate(fp_fut_cc, threshold): +def test_presrat_zero_rate(fp_precip, fp_precip_fut, threshold): """Estimate zero_rate within PresRat.run() Use thresholds that gives 0%, 100%, and something between. @@ -424,8 +424,8 @@ def test_presrat_zero_rate(fp_fut_cc, threshold): """ calc = PresRat( FP_NSRDB, - FP_CC, - fp_fut_cc, + fp_precip, + fp_precip_fut, 'ghi', 'rsds', target=TARGET, From b55ba71cee4f11fe8e9bfa8e7379cc44c89d1097 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 09:41:12 -0600 Subject: [PATCH 87/92] test, refact: test_presrat() using new sample data --- tests/bias/test_presrat_bias_correction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 2a76215af..243911e8a 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -482,7 +482,7 @@ def test_apply_zero_precipitation_rate_2D(): ) -def test_presrat(fp_fut_cc): +def test_presrat(fp_precip, fp_precip_fut): """Test PresRat correction procedure Basic standard run. Using only required arguments. If this fails, @@ -490,8 +490,8 @@ def test_presrat(fp_fut_cc): """ calc = PresRat( FP_NSRDB, - FP_CC, - fp_fut_cc, + fp_precip, + fp_precip_fut, 'ghi', 'rsds', target=TARGET, @@ -509,8 +509,8 @@ def test_presrat(fp_fut_cc): # Check possible range for v in out: - assert np.nanmin(out[v]) > 0, f'{v} should be all greater than zero.' - assert np.nanmax(out[v]) < 1300, f'{v} should be all less than 1300.' + assert np.nanmin(out[v]) >= VAR_MIN, f'{v} should be all greater than zero.' + assert np.nanmax(out[v]) < VAR_MAX, f'{v} should be all less than 1300.' # Each location can be all finite or all NaN, but not both for v in (v for v in out if len(out[v].shape) > 2): From 82a4625f91e7af7c0c803da352551516a16bd39e Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 10:11:45 -0600 Subject: [PATCH 88/92] test, refact: presrat_params with new sample data --- tests/bias/test_presrat_bias_correction.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 243911e8a..926d50b32 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -117,13 +117,13 @@ def fut_cc(fp_fut_cc): @pytest.fixture(scope='module') -def fp_fut_cc_notrend(tmpdir_factory): +def fp_fut_cc_notrend(tmpdir_factory, fp_precip): """Sample future CC dataset identical to historical CC This is currently a copy of FP_CC, thus no trend on time. """ fn = tmpdir_factory.mktemp('data').join('test_mf_notrend.nc') - shutil.copyfile(FP_CC, fn) + shutil.copyfile(fp_precip, fn) # DataHandlerNCforCC requires a string fn = str(fn) return fn @@ -171,7 +171,7 @@ def fut_cc_notrend(fp_fut_cc_notrend): @pytest.fixture(scope='module') -def presrat_params(tmpdir_factory, fp_fut_cc): +def presrat_params(tmpdir_factory, fp_precip, fp_precip_fut): """PresRat parameters for standard datasets Use the standard datasets to estimate the distributions and save @@ -179,8 +179,8 @@ def presrat_params(tmpdir_factory, fp_fut_cc): """ calc = PresRat( FP_NSRDB, - FP_CC, - fp_fut_cc, + fp_precip, + fp_precip_fut, 'ghi', 'rsds', target=TARGET, @@ -200,7 +200,7 @@ def presrat_params(tmpdir_factory, fp_fut_cc): @pytest.fixture(scope='module') -def presrat_notrend_params(tmpdir_factory, fp_fut_cc_notrend): +def presrat_notrend_params(tmpdir_factory, fp_precip, fp_fut_cc_notrend): """No change in time The bias_fut distribution is equal to bias (mod_his), so no change in @@ -212,7 +212,7 @@ def presrat_notrend_params(tmpdir_factory, fp_fut_cc_notrend): """ calc = PresRat( FP_NSRDB, - FP_CC, + fp_precip, fp_fut_cc_notrend, 'ghi', 'rsds', From 4fb76b74103438f17936397f766e380828179e04 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 11:27:07 -0600 Subject: [PATCH 89/92] clean: Removing functions in development Future implementation for moving windows that are not ready yet. --- sup3r/bias/qdm.py | 38 -------------------------------------- 1 file changed, 38 deletions(-) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 31eb7712a..f04214728 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -18,7 +18,6 @@ sample_q_log, ) from typing import Optional -import xarray as xr from sup3r.preprocessing.data_handling.base import DataHandler from sup3r.utilities.utilities import expand_paths @@ -768,43 +767,6 @@ def run( return copy.deepcopy(self.out) - @staticmethod - def _window_mask(d, d0, window_size=31): - d_start = d0 - window_size - d_end = d0 + window_size - if d_start < 0: - idx = (d >= 365 + d_start) | (d <= d_end) - elif d_end > 365: - idx = (d >= d_start) | (d <= 365 - d_end) - else: - idx = (d >= d_start) & (d <= d_end) - return idx - - @staticmethod - def _single_quantile(da, d0, window_size, quantiles): - idx = self._window_mask( - da.time.dt.dayofyear, d0, window_size=window_size - ) - q = ( - da.where(idx, drop=True) - .chunk(dict(time=-1)) - .quantile(quantiles, dim=['time']) - ) - return q.expand_dims({'day_of_year': [d0]}) - - @staticmethod - def windowed_quantiles( - da: xr.DataArray, day_of_year, quantiles, window_size=90 - ): - q = ( - _single_quantile( - da, d0, window_size=window_size, quantiles=quantiles - ) - for d0 in day_of_year - ) - return xr.merge(q) - - def write_outputs(self, fp_out: str, out: dict = None, extra_attrs: Optional[dict] = None): From 9e8ce0474fe254c9d68fb7ed8db1756d154ec187 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 11:35:41 -0600 Subject: [PATCH 90/92] Temporary test data built on the fly --- tests/bias/test_presrat_bias_correction.py | 76 +++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 926d50b32..0a42a9db3 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -36,7 +36,7 @@ PresRat, ) from sup3r.bias.mixins import ZeroRateMixin -from sup3r.preprocessing.data_handling import DataHandlerNC +from sup3r.preprocessing.data_handling import DataHandlerNC, DataHandlerNCforCC FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') @@ -51,6 +51,80 @@ TARGET = (float(MIN_LAT), float(MIN_LON)) SHAPE = (len(fh.lat.values), len(fh.lon.values)) +VAR_MIN = 0 +# Fix this max +VAR_MAX = 1300 + +@pytest.fixture(scope='module') +def precip(): + # lat = np.linspace(13.66, 31.57, 20) + # lat = np.linspace(38.245528, 40.350785, 20) + lat = np.array([38.2455282337738, 38.9472804370071, 39.649032596592, 40.3507847105177]) + # lon = np.linspace(254.53125, 256.640625, 20) + lon = np.array([254.53125, 255.234375, 255.9375, 256.640625]) + t0 = np.datetime64('2015-01-01T12:00:00') + time = t0 + np.linspace(0, 364, 365, dtype='timedelta64[D]') + bnds = (-np.timedelta64(12,'h'), np.timedelta64(12, 'h')) + time_bnds = time[:,np.newaxis] + bnds + rng = np.random.default_rng() + # pr = rng.lognormal(3., 1., (time.size, lat.size, lon.size)) + # pr = rng.uniform(0, 1., (time.size, lat.size, lon.size)) + # Transitioning + pr = rng.normal(210, 87., (time.size, lat.size, lon.size)) + pr = np.where(pr>0, pr, 0) + + ds = xr.Dataset( + data_vars={ + "rsds": (["time", "lat", "lon"], pr) + }, + coords={ + "time": ("time", time), + "time_bnds": (["time", "bnds"], time_bnds), + "lat": ("lat", lat), + "lon": ("lon", lon), + }) + + return ds + +# FP_CC +@pytest.fixture(scope='module') +def fp_precip(tmpdir_factory, precip): + """Precipitation sample filename + + DataHandlerNCforCC requires a string + """ + fn = tmpdir_factory.mktemp('data').join('precip_mh.nc') + precip.to_netcdf(fn) + fn = str(fn) + return fn + +# fut_cc +@pytest.fixture(scope='module') +def precip_fut(tmpdir_factory, precip): + ds = precip.copy(deep=True) + + time = ds['time'] + np.timedelta64(18263,'D') + time.attrs = ds['time'].attrs + ds['time'] = time + # Adding an offset + # ds['pr'] += 1 + ds['rsds'] += 75 + # adding a small noise + # ds['pr'] += 1e-6 * np.random.randn(*ds['pr'].shape) + ds['rsds'] += 1e-4 * np.random.randn(*ds['rsds'].shape) + + return ds['rsds'] + +@pytest.fixture(scope='module') +def fp_precip_fut(tmpdir_factory, precip_fut): + """Future precipitation sample filename + + DataHandlerNCforCC requires a string + """ + fn = tmpdir_factory.mktemp('data').join('precip_mf.nc') + precip_fut.to_netcdf(fn) + fn = str(fn) + return fn @pytest.fixture(scope='module') def fp_fut_cc(tmpdir_factory): From 393b381bf4770b687b72da46c0d35c934e403fc8 Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 15:37:32 -0600 Subject: [PATCH 91/92] test, feat: Replacement for FP_NSRDB on the fly Creating support for custom datasets for training created on the fly. --- tests/bias/test_presrat_bias_correction.py | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 0a42a9db3..8201ab789 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -27,6 +27,8 @@ import pytest import xarray as xr +from rex import Outputs, Resource + from sup3r import CONFIG_DIR, TEST_DATA_DIR from sup3r.models import Sup3rGan from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy @@ -55,6 +57,52 @@ # Fix this max VAR_MAX = 1300 + +@pytest.fixture(scope='module') +def fp_resource(tmpdir_factory): + fn = tmpdir_factory.mktemp('data').join('precip_oh.h5') + + time = pd.date_range('2018-01-01 00:00:00+0000', '2018-03-26 23:30:00+0000', freq='30m') + time = pd.DatetimeIndex(np.arange(np.datetime64('2018-01-01 00:00:00+00:00'), np.datetime64('2018-03-26 23:31:00+00:00'), np.timedelta64(30, 'm'))) + lat = np.array([39.01, 39.05, 39.09, 39.13, 39.17, 39.21, 39.25, 39.29, 39.33, 39.37, 39.41, 39.45, 39.49, 39.53, 39.57, 39.61, 39.65, 39.69, 39.73, 39.77]) + lon = np.array([-105.14, -105.1, -105.06, -105.02, -104.98, -104.94, -104.9, -104.86, -104.82, -104.78, -104.74, -104.7, -104.66, -104.62, -104.58, -104.54, -104.5, -104.46, -104.42, -104.38]) + rng = np.random.default_rng() + ghi = rng.normal(210, 87., (time.size, lat.size, lon.size)) + + ds = xr.Dataset( + data_vars={ + "ghi": (["time", "lat", "lon"], ghi) + }, + coords={ + "time": ("time", time), + #"time_bnds": (["time", "bnds"], time_bnds), + "lat": ("lat", lat), + "lon": ("lon", lon), + }) + + + ds = ds.sortby('lat', ascending=False) + lat_2d, lon_2d = xr.broadcast(ds['lat'], ds['lon']) + meta = pd.DataFrame({'latitude': lat_2d.values.flatten(), + 'longitude': lon_2d.values.flatten(), + }) + + shapes = {"ghi": (len(ds.ghi.time), np.prod(ds.ghi.isel(time=0).shape))} + attrs = {"ghi": None} + chunks = {"ghi": None} + dtypes = {"ghi": "float32"} + + Outputs.init_h5(fn, ["ghi"], shapes, attrs, chunks, dtypes, + meta=meta, + time_index=pd.DatetimeIndex(ds.time)) + with Outputs(fn, 'a') as out: + out["ghi"] = ds.stack(flat=("lat", "lon"))["ghi"] + + # DataHandlerNCforCC requires a string + fn = str(fn) + return fn + + @pytest.fixture(scope='module') def precip(): # lat = np.linspace(13.66, 31.57, 20) From f6ed01a436afff066246352823d434db9702ae9b Mon Sep 17 00:00:00 2001 From: Gui Castelao Date: Mon, 8 Jul 2024 15:39:46 -0600 Subject: [PATCH 92/92] test, refact: Diverting tests to use custom testing dataset --- tests/bias/test_presrat_bias_correction.py | 26 +++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py index 8201ab789..e2befcfbf 100644 --- a/tests/bias/test_presrat_bias_correction.py +++ b/tests/bias/test_presrat_bias_correction.py @@ -293,14 +293,14 @@ def fut_cc_notrend(fp_fut_cc_notrend): @pytest.fixture(scope='module') -def presrat_params(tmpdir_factory, fp_precip, fp_precip_fut): +def presrat_params(tmpdir_factory, fp_resource, fp_precip, fp_precip_fut): """PresRat parameters for standard datasets Use the standard datasets to estimate the distributions and save in a temporary place to be re-used """ calc = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_precip_fut, 'ghi', @@ -322,7 +322,7 @@ def presrat_params(tmpdir_factory, fp_precip, fp_precip_fut): @pytest.fixture(scope='module') -def presrat_notrend_params(tmpdir_factory, fp_precip, fp_fut_cc_notrend): +def presrat_notrend_params(tmpdir_factory, fp_resource, fp_precip, fp_fut_cc_notrend): """No change in time The bias_fut distribution is equal to bias (mod_his), so no change in @@ -333,7 +333,7 @@ def presrat_notrend_params(tmpdir_factory, fp_precip, fp_fut_cc_notrend): in errors. """ calc = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_fut_cc_notrend, 'ghi', @@ -462,13 +462,13 @@ def test_zero_precipitation_rate_nan(): @pytest.mark.parametrize('threshold', [0, 50, 1e6]) -def test_parallel(fp_precip, fp_precip_fut, threshold): +def test_parallel(fp_resource, fp_precip, fp_precip_fut, threshold): """Running in parallel must not alter results Check with different thresholds, which will result in different zero rates. """ s = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_precip_fut, 'ghi', @@ -481,7 +481,7 @@ def test_parallel(fp_precip, fp_precip_fut, threshold): out_s = s.run(max_workers=1, zero_rate_threshold=threshold) p = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_precip_fut, 'ghi', @@ -500,7 +500,7 @@ def test_parallel(fp_precip, fp_precip_fut, threshold): ), f'Different results for {k}' -def test_presrat_calc(fp_precip, fp_precip_fut): +def test_presrat_calc(fp_resource, fp_precip, fp_precip_fut): """Standard PresRat (pre) calculation Estimate the required parameters with a standard setup. @@ -508,7 +508,7 @@ def test_presrat_calc(fp_precip, fp_precip_fut): WIP: Just confirm it runs, but not checking much yet. """ calc = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_precip_fut, 'ghi', @@ -535,7 +535,7 @@ def test_presrat_calc(fp_precip, fp_precip_fut): @pytest.mark.parametrize('threshold', [0, 50, 1e6]) -def test_presrat_zero_rate(fp_precip, fp_precip_fut, threshold): +def test_presrat_zero_rate(fp_resource, fp_precip, fp_precip_fut, threshold): """Estimate zero_rate within PresRat.run() Use thresholds that gives 0%, 100%, and something between. @@ -545,7 +545,7 @@ def test_presrat_zero_rate(fp_precip, fp_precip_fut, threshold): - Rate should be zero if threshold is zero for this dataset. """ calc = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_precip_fut, 'ghi', @@ -604,14 +604,14 @@ def test_apply_zero_precipitation_rate_2D(): ) -def test_presrat(fp_precip, fp_precip_fut): +def test_presrat(fp_resource, fp_precip, fp_precip_fut): """Test PresRat correction procedure Basic standard run. Using only required arguments. If this fails, something fundamental is wrong. """ calc = PresRat( - FP_NSRDB, + fp_resource, fp_precip, fp_precip_fut, 'ghi',