diff --git a/pixi.lock b/pixi.lock index 82d2b6519..7da0dc1b9 100644 --- a/pixi.lock +++ b/pixi.lock @@ -624,6 +624,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/distributed-2024.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/docutils-0.20.1-py311h38be061_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/flatbuffers-23.5.26-h59595ed_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2 @@ -666,9 +667,11 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-8.0.0-hd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-8.24.0-pyh707e725_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.classes-3.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.context-5.3.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.functools-4.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jeepney-0.8.0-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/keras-2.15.0-pyhd8ed1ab_0.conda @@ -788,10 +791,13 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/pkginfo-1.10.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.42-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/protobuf-4.24.4-py311h46cbc50_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/psutil-6.0.0-py311h331c9d8_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.2-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/pyarrow-15.0.0-py311h39c9aba_0_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.0-pyhd8ed1ab_0.conda @@ -843,6 +849,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tensorboard-2.15.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/tensorboard-data-server-0.7.0-py311h63ff55d_1.conda @@ -928,6 +935,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aiohttp-3.9.5-py311h05b510d_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/aiosignal-1.3.1-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-2.4.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/astunparse-1.6.3-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-23.2.0-pyh71513ae_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/aws-c-auth-0.7.11-ha4ce7b8_1.conda @@ -976,6 +984,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/distributed-2024.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/docutils-0.20.1-py311h267d04e_3.conda - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.0-pyhd8ed1ab_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/executing-2.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/flatbuffers-23.5.26-h13dd4ca_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/fonttools-4.53.0-py311hd3f4193_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/freetype-2.12.1-hadb7bae_2.conda @@ -1002,6 +1011,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-8.0.0-hd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ipython-8.24.0-pyh707e725_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.classes-3.4.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.context-5.3.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/jaraco.functools-4.0.0-pyhd8ed1ab_0.conda @@ -1087,9 +1097,12 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pillow-10.3.0-py311hd7951ec_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pkginfo-1.10.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.42-pyha770c72_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/protobuf-4.24.4-py311h4d1eceb_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/psutil-6.0.0-py311hd3f4193_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pthread-stubs-0.4-h27ca646_1001.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 + - conda: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.2-pyhd8ed1ab_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/pyarrow-15.0.0-py311hd7bc329_0_cpu.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyarrow-hotfix-0.6-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/pyasn1-0.6.0-pyhd8ed1ab_0.conda @@ -1134,6 +1147,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.7-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tblib-3.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tensorboard-2.15.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tensorboard-data-server-0.7.0-py311h5fb2c35_1.conda @@ -2467,6 +2481,24 @@ packages: requires_dist: - typing-extensions>=4.0.0 ; python_version < '3.11' requires_python: '>=3.8.0' +- kind: conda + name: asttokens + version: 2.4.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/asttokens-2.4.1-pyhd8ed1ab_0.conda + sha256: 708168f026df19a0344983754d27d1f7b28bb21afc7b97a82f02c4798a3d2111 + md5: 5f25798dcefd8252ce5f9dc494d5f571 + depends: + - python >=3.5 + - six >=1.12.0 + license: Apache-2.0 + license_family: Apache + purls: + - pkg:pypi/asttokens + size: 28922 + timestamp: 1698341257884 - kind: conda name: astunparse version: 1.6.3 @@ -3850,6 +3882,23 @@ packages: purls: [] size: 618596 timestamp: 1640112124844 +- kind: conda + name: decorator + version: 5.1.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/decorator-5.1.1-pyhd8ed1ab_0.tar.bz2 + sha256: 328a6a379f9bdfd0230e51de291ce858e6479411ea4b0545fb377c71662ef3e2 + md5: 43afe5ab04e35e17ba28649471dd7364 + depends: + - python >=3.5 + license: BSD-2-Clause + license_family: BSD + purls: + - pkg:pypi/decorator + size: 12072 + timestamp: 1641555714315 - kind: pypi name: dill version: 0.3.8 @@ -3951,6 +4000,23 @@ packages: - pkg:pypi/exceptiongroup?source=conda-forge-mapping size: 20551 timestamp: 1704921321122 +- kind: conda + name: executing + version: 2.0.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/executing-2.0.1-pyhd8ed1ab_0.conda + sha256: c738804ab1e6376f8ea63372229a04c8d658dc90fd5a218c6273a2eaf02f4057 + md5: e16be50e378d8a4533b989035b196ab8 + depends: + - python >=2.7 + license: MIT + license_family: MIT + purls: + - pkg:pypi/executing + size: 27689 + timestamp: 1698580072627 - kind: conda name: expat version: 2.6.2 @@ -4971,6 +5037,35 @@ packages: - pkg:pypi/iniconfig?source=conda-forge-mapping size: 11101 timestamp: 1673103208955 +- kind: conda + name: ipython + version: 8.24.0 + build: pyh707e725_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/ipython-8.24.0-pyh707e725_0.conda + sha256: d3ce492dac53a8f1c6cd682a25313f02993a1333b5e4787a15259a6e7cb28562 + md5: 1fb1f1fcbe053a762748dbf0ae4cfd0d + depends: + - __unix + - decorator + - exceptiongroup + - jedi >=0.16 + - matplotlib-inline + - pexpect >4.3 + - pickleshare + - prompt-toolkit >=3.0.41,<3.1.0 + - pygments >=2.4.0 + - python >=3.10 + - stack_data + - traitlets >=5.13.0 + - typing_extensions >=4.6 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/ipython + size: 596366 + timestamp: 1715263505659 - kind: pypi name: isort version: 5.13.2 @@ -5035,6 +5130,24 @@ packages: - pkg:pypi/jaraco-functools?source=conda-forge-mapping size: 15192 timestamp: 1701695329516 +- kind: conda + name: jedi + version: 0.19.1 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/jedi-0.19.1-pyhd8ed1ab_0.conda + sha256: 362f0936ef37dfd1eaa860190e42a6ebf8faa094eaa3be6aa4d9ace95f40047a + md5: 81a3be0b2023e1ea8555781f0ad904a2 + depends: + - parso >=0.8.3,<0.9.0 + - python >=3.6 + license: MIT + license_family: MIT + purls: + - pkg:pypi/jedi + size: 841312 + timestamp: 1696326218364 - kind: conda name: jeepney version: 0.8.0 @@ -8483,6 +8596,23 @@ packages: - pkg:pypi/pandas?source=conda-forge-mapping size: 14742444 timestamp: 1715898315491 +- kind: conda + name: parso + version: 0.8.4 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/parso-0.8.4-pyhd8ed1ab_0.conda + sha256: bfe404eebb930cc41782d34f8fc04c0388ea692eeebe2c5fc28df8ec8d4d61ae + md5: 81534b420deb77da8833f2289b8d47ac + depends: + - python >=3.6 + license: MIT + license_family: MIT + purls: + - pkg:pypi/parso + size: 75191 + timestamp: 1712320447201 - kind: conda name: partd version: 1.4.2 @@ -8537,6 +8667,41 @@ packages: - pkg:pypi/pep517?source=conda-forge-mapping size: 19044 timestamp: 1667916747996 +- kind: conda + name: pexpect + version: 4.9.0 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/pexpect-4.9.0-pyhd8ed1ab_0.conda + sha256: 90a09d134a4a43911b716d4d6eb9d169238aff2349056f7323d9db613812667e + md5: 629f3203c99b32e0988910c93e77f3b6 + depends: + - ptyprocess >=0.5 + - python >=3.7 + license: ISC + purls: + - pkg:pypi/pexpect + size: 53600 + timestamp: 1706113273252 +- kind: conda + name: pickleshare + version: 0.7.5 + build: py_1003 + build_number: 1003 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/pickleshare-0.7.5-py_1003.tar.bz2 + sha256: a1ed1a094dd0d1b94a09ed85c283a0eb28943f2e6f22161fb45e128d35229738 + md5: 415f0ebb6198cc2801c73438a9fb5761 + depends: + - python >=3 + license: MIT + license_family: MIT + purls: + - pkg:pypi/pickleshare + size: 9332 + timestamp: 1602536313357 - kind: conda name: pillow version: 10.3.0 @@ -8689,6 +8854,26 @@ packages: - pyyaml>=5.1 - virtualenv>=20.10.0 requires_python: '>=3.9' +- kind: conda + name: prompt-toolkit + version: 3.0.42 + build: pyha770c72_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/prompt-toolkit-3.0.42-pyha770c72_0.conda + sha256: 58525b2a9305fb154b2b0d43a48b9a6495441b80e4fbea44f2a34a597d2cef16 + md5: 0bf64bf10eee21f46ac83c161917fa86 + depends: + - python >=3.7 + - wcwidth + constrains: + - prompt_toolkit 3.0.42 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/prompt-toolkit + size: 270398 + timestamp: 1702399557137 - kind: conda name: protobuf version: 4.24.4 @@ -8803,6 +8988,22 @@ packages: purls: [] size: 5625 timestamp: 1606147468727 +- kind: conda + name: ptyprocess + version: 0.7.0 + build: pyhd3deb0d_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/ptyprocess-0.7.0-pyhd3deb0d_0.tar.bz2 + sha256: fb31e006a25eb2e18f3440eb8d17be44c8ccfae559499199f73584566d0a444a + md5: 359eeb6536da0e687af562ed265ec263 + depends: + - python + license: ISC + purls: + - pkg:pypi/ptyprocess + size: 16546 + timestamp: 1609419417991 - kind: conda name: pulseaudio-client version: '17.0' @@ -8824,6 +9025,23 @@ packages: purls: [] size: 757633 timestamp: 1705690081905 +- kind: conda + name: pure_eval + version: 0.2.2 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/pure_eval-0.2.2-pyhd8ed1ab_0.tar.bz2 + sha256: 72792f9fc2b1820e37cc57f84a27bc819c71088c3002ca6db05a2e56404f9d44 + md5: 6784285c7e55cb7212efabc79e4c2883 + depends: + - python >=3.5 + license: MIT + license_family: MIT + purls: + - pkg:pypi/pure-eval + size: 14551 + timestamp: 1642876055775 - kind: conda name: pyarrow version: 15.0.0 @@ -8986,8 +9204,8 @@ packages: - kind: pypi name: pyjson5 version: 1.6.6 - url: https://files.pythonhosted.org/packages/0c/8b/24c7d8f7785bdaab55481b67e736e892b01e10be2366860d67484f48ad50/pyjson5-1.6.6-cp311-cp311-macosx_11_0_arm64.whl - sha256: 7d1fe850aa1763e8f9945a7120f032f5f707d0372d52c8e0fecda2a79640820e + url: https://files.pythonhosted.org/packages/26/ba/c21b9f0fad649cada64fdfbed8546c74c998f137b487affb1be451b23bb8/pyjson5-1.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl + sha256: 0abc60630bb1601ae9005e251d30cef363cb4d6cef2d2fb7a160329569170e38 requires_python: ~=3.5 - kind: pypi name: pyjson5 @@ -10251,6 +10469,26 @@ packages: - pkg:pypi/sphinxcontrib-serializinghtml?source=conda-forge-mapping size: 28776 timestamp: 1705118378942 +- kind: conda + name: stack_data + version: 0.6.2 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/stack_data-0.6.2-pyhd8ed1ab_0.conda + sha256: a58433e75229bec39f3be50c02efbe9b7083e53a1f31d8ee247564f370191eec + md5: e7df0fdd404616638df5ece6e69ba7af + depends: + - asttokens + - executing + - pure_eval + - python >=3.5 + license: MIT + license_family: MIT + purls: + - pkg:pypi/stack-data + size: 26205 + timestamp: 1669632203115 - kind: pypi name: tabulate version: 0.9.0 @@ -10697,6 +10935,23 @@ packages: - pkg:pypi/tornado?source=conda-forge-mapping size: 857353 timestamp: 1717722957594 +- kind: conda + name: traitlets + version: 5.14.3 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/traitlets-5.14.3-pyhd8ed1ab_0.conda + sha256: 8a64fa0f19022828513667c2c7176cfd125001f3f4b9bc00d33732e627dd2592 + md5: 3df84416a021220d8b5700c613af2dc5 + depends: + - python >=3.8 + license: BSD-3-Clause + license_family: BSD + purls: + - pkg:pypi/traitlets + size: 110187 + timestamp: 1713535244513 - kind: conda name: twine version: 5.1.1 @@ -10825,6 +11080,23 @@ packages: - setuptools>=68 ; extra == 'test' - time-machine>=2.10 ; platform_python_implementation == 'CPython' and extra == 'test' requires_python: '>=3.7' +- kind: conda + name: wcwidth + version: 0.2.13 + build: pyhd8ed1ab_0 + subdir: noarch + noarch: python + url: https://conda.anaconda.org/conda-forge/noarch/wcwidth-0.2.13-pyhd8ed1ab_0.conda + sha256: b6cd2fee7e728e620ec736d8dfee29c6c9e2adbd4e695a31f1d8f834a83e57e3 + md5: 68f0738df502a14213624b288c60c9ad + depends: + - python >=3.8 + license: MIT + license_family: MIT + purls: + - pkg:pypi/wcwidth + size: 32709 + timestamp: 1704731373922 - kind: conda name: werkzeug version: 3.0.3 diff --git a/pyproject.toml b/pyproject.toml index 865a26b2a..8dcf3484f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,10 @@ exclude = [ ] [tool.ruff.lint] -fixable = [] +fixable = [ + "E303", # too-many-blank-lines + "UP039", # unnecessary-class-parentheses + ] # preview = true # logger-objects = [] task-tags = ["TODO", "FIXME", "XXX"] @@ -253,13 +256,13 @@ channels = ["conda-forge", "anaconda", "main"] platforms = ["osx-arm64", "linux-64"] [tool.pixi.dependencies] -python = "==3.11" +python = "~=3.11.0" cftime = ">=1.6.2" dask = ">=2022.0" h5netcdf = ">=1.1.0" pillow = ">=10.0" matplotlib = ">=3.1" -numpy = ">=1.7.0" +numpy = "~=1.7" pandas = ">=2.0" scipy = ">=1.0.0" tensorflow = ">2.4,<2.16" @@ -291,3 +294,4 @@ pytest = ">=5.2" build = ">=0.6" twine = ">=5.0" ruff = ">=0.4" +ipython = ">=8.0" diff --git a/sup3r/bias/__init__.py b/sup3r/bias/__init__.py index 6fd24ddfe..ac5ad9f34 100644 --- a/sup3r/bias/__init__.py +++ b/sup3r/bias/__init__.py @@ -1,19 +1,24 @@ """Bias calculation and correction modules.""" -from .bias_transforms import (global_linear_bc, local_linear_bc, - local_qdm_bc, monthly_local_linear_bc) from .bias_calc import (LinearCorrection, MonthlyLinearCorrection, MonthlyScalarCorrection, SkillAssessment) -from .qdm import QuantileDeltaMappingCorrection +from .bias_transforms import (apply_zero_precipitation_rate, + global_linear_bc, local_linear_bc, + local_qdm_bc, local_presrat_bc, + monthly_local_linear_bc) +from .qdm import PresRat, QuantileDeltaMappingCorrection __all__ = [ + "apply_zero_precipitation_rate", "global_linear_bc", "local_linear_bc", "local_qdm_bc", + "local_presrat_bc", "monthly_local_linear_bc", "LinearCorrection", "MonthlyLinearCorrection", "MonthlyScalarCorrection", + "PresRat", "QuantileDeltaMappingCorrection", "SkillAssessment", ] diff --git a/sup3r/bias/bias_calc.py b/sup3r/bias/bias_calc.py index 05dfd7c7d..5fa77e98d 100644 --- a/sup3r/bias/bias_calc.py +++ b/sup3r/bias/bias_calc.py @@ -984,7 +984,7 @@ def run(self, class MonthlyLinearCorrection(LinearCorrection): """Calculate linear correction *scalar +adder factors to bias correct data - This calculation operates on single bias sites on a montly basis + This calculation operates on single bias sites on a monthly basis """ NT = 12 @@ -1044,7 +1044,7 @@ class MonthlyScalarCorrection(MonthlyLinearCorrection): scalar factors are computed as mean(base_data) / mean(bias_data). Adder factors are still written but are exactly zero. - This calculation operates on single bias sites on a montly basis + This calculation operates on single bias sites on a monthly basis """ @staticmethod diff --git a/sup3r/bias/bias_transforms.py b/sup3r/bias/bias_transforms.py index de20fe28d..06d483ae7 100644 --- a/sup3r/bias/bias_transforms.py +++ b/sup3r/bias/bias_transforms.py @@ -158,18 +158,19 @@ def get_spatial_bc_quantiles(lat_lon: np.array, >>> lat_lon = np.array([ ... [39.649033, -105.46875 ], ... [39.649033, -104.765625]]) - >>> params = get_spatial_bc_quantiles( - ... lat_lon, "ghi", "rsds", "./dist_params.hdf") + >>> params, cfg = get_spatial_bc_quantiles( + ... lat_lon, "ghi", "rsds", "./dist_params.hdf") """ ds = {'base': f'base_{base_dset}_params', 'bias': f'bias_{feature_name}_params', - 'bias_fut': f'bias_fut_{feature_name}_params'} - out = _get_factors(lat_lon, ds, bias_fp, threshold) + 'bias_fut': f'bias_fut_{feature_name}_params', + } + params = _get_factors(lat_lon, ds, bias_fp, threshold) with Resource(bias_fp) as res: cfg = res.global_attrs - return out["base"], out["bias"], out["bias_fut"], cfg + return params, cfg def global_linear_bc(input, scalar, adder, out_range=None): @@ -488,11 +489,15 @@ def local_qdm_bc(data: np.array, >>> unbiased = local_qdm_bc(biased_array, lat_lon_array, "ghi", "rsds", ... "./dist_params.hdf") """ - base, bias, bias_fut, cfg = get_spatial_bc_quantiles(lat_lon, - base_dset, - feature_name, - bias_fp, - threshold) + params, cfg = get_spatial_bc_quantiles(lat_lon, + base_dset, + feature_name, + bias_fp, + threshold) + base = params["base"] + bias = params["bias"] + bias_fut = params["bias_fut"] + if lr_padded_slice is not None: spatial_slice = (lr_padded_slice[0], lr_padded_slice[1]) base = base[spatial_slice] @@ -520,3 +525,244 @@ def local_qdm_bc(data: np.array, tmp = QDM(tmp) # Reorgnize array back from (time, space) to (spatial, spatial, temporal) return tmp.T.reshape(data.shape) + + +def apply_zero_precipitation_rate(arr: np.ndarray, rate): + """Enforce the zero precipitation rate + Replace lowest values by zero to satisfy the given rate of zero + precipitation. + + Parameters + ---------- + arr : np.array + An array of values to be analyzed. Usually precipitation but it + rate : float or np.array + Rate of zero, or negligible, days of precipitation. + + Returns + ------- + corrected : np.array + A copy of given array that satisfies the rate of zero precipitation + days, i.e. the lowest values of precipitation are changed to zero + to satisfy that rate. + + Examples + -------- + >>> data = np.array([5, 0.1, np.nan, 0.2, 1]) + >>> apply_zero_precipitation_rate(data, 0.30) + array([5. , 0. , nan, 0.2, 1. ]) + """ + assert arr.ndim == 3 + assert rate.ndim >= 2 + assert arr.shape[:2] == rate.shape[:2] + + for i in range(rate.shape[0]): + for j in range(rate.shape[1]): + r = rate[i, j, 0] + if np.isfinite(r): + a = arr[i, j] + valid = np.isfinite(a) + idx = np.argsort(a)[valid][:round(r * valid.astype('i').sum())] + a[idx] = 0 + arr[i, j] = a + + return arr + + +def get_spatial_bc_presrat(lat_lon: np.array, + base_dset: str, + feature_name: str, + bias_fp: str, + threshold: float = 0.1): + """Statistical distributions previously estimated for given lat/lon points + + Recover the parameters that describe the statistical distribution + previously estimated with + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for three + datasets: ``base`` (historical reference), ``bias`` (historical biased + reference), and ``bias_fut`` (the future biased dataset, usually the data + to correct). + + Parameters + ---------- + lat_lon : ndarray + Array of latitudes and longitudes for the domain to bias correct + (n_lats, n_lons, 2) + base_dset : str + Name of feature used as historical reference. A Dataset with name + "base_{base_dset}_params" will be retrieved from ``bias_fp``. + feature_name : str + Name of the feature that is being corrected. Datasets with names + "bias_{feature_name}_params" and "bias_fut_{feature_name}_params" will + be retrieved from ``bias_fp``. + bias_fp : str + Filepath to bias correction file from the bias calc module. Must have + datasets "base_{base_dset}_params", "bias_{feature_name}_params", and + "bias_fut_{feature_name}_params" that define the statistical + distributions. + threshold : float + Nearest neighbor euclidean distance threshold. If the coordinates are + more than this value away from the bias correction lat/lon, an error + is raised. + + Returns + ------- + base : np.array + Parameters used to define the statistical distribution estimated for + the ``base_dset``. It has a shape of (I, J, P), where (I, J) are the + same first two dimensions of the given `lat_lon` and P is the number + of parameters and depends on the type of distribution. See + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for more + details. + bias : np.array + Parameters used to define the statistical distribution estimated for + (historical) ``feature_name``. It has a shape of (I, J, P), where + (I, J) are the same first two dimensions of the given `lat_lon` and P + is the number of parameters and depends on the type of distribution. + See :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for + more details. + bias_fut : np.array + Parameters used to define the statistical distribution estimated for + (future) ``feature_name``. It has a shape of (I, J, P), where (I, J) + are the same first two dimensions of the given `lat_lon` and P is the + number of parameters used and depends on the type of distribution. See + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for more + details. + cfg : dict + Metadata used to guide how to use of the previous parameters on + reconstructing the statistical distributions. For instance, + `cfg['dist']` defines the type of distribution. See + :class:`~sup3r.bias.bias_calc.QuantileDeltaMappingCorrection` for more + details, including which metadata is saved. + + Warnings + -------- + Be careful selecting which `bias_fp` to use. In particular, if + "bias_fut_{feature_name}_params" is representative for the desired target + period. + + See Also + -------- + sup3r.bias.bias_calc.QuantileDeltaMappingCorrection + Estimate the statistical distributions loaded here. + + Examples + -------- + >>> lat_lon = np.array([ + ... [39.649033, -105.46875 ], + ... [39.649033, -104.765625]]) + >>> params, cfg = get_spatial_bc_quantiles( + ... lat_lon, "ghi", "rsds", "./dist_params.hdf") + """ + ds = {'base': f'base_{base_dset}_params', + 'base_zero_rate': f'{base_dset}_zero_rate', + 'bias': f'bias_{feature_name}_params', + 'bias_fut': f'bias_fut_{feature_name}_params', + 'bias_mean_mh': f'{feature_name}_mean_mh', + 'bias_mean_mf': f'{feature_name}_mean_mf', + } + params = _get_factors(lat_lon, ds, bias_fp, threshold) + + with Resource(bias_fp) as res: + cfg = res.global_attrs + + return params, cfg + + +def local_presrat_bc(data: np.ndarray, + time_index: np.ndarray, + lat_lon: np.ndarray, + base_dset: str, + feature_name: str, + bias_fp, + lr_padded_slice=None, + threshold=0.1, + relative=True, + no_trend=False): + """Bias correction using PresRat + + Parameters + ---------- + data : np.ndarray + Sup3r input data to be bias corrected, assumed to be 3D with shape + (spatial, spatial, temporal) for a single feature. + lat_lon : ndarray + Array of latitudes and longitudes for the domain to bias correct + (n_lats, n_lons, 2) + base_dset : + Name of feature that is used as (historical) reference. Dataset with + names "base_{base_dset}_params" will be retrieved. + feature_name : str + Name of feature that is being corrected. Datasets with names + "bias_{feature_name}_params" and "bias_fut_{feature_name}_params" will + be retrieved. + bias_fp : str + Filepath to statistical distributions file from the bias calc module. + Must have datasets "bias_{feature_name}_params", + "bias_fut_{feature_name}_params", and "base_{base_dset}_params" that + are the parameters to define the statistical distributions to be used + to correct the given `data`. + lr_padded_slice : tuple | None + Tuple of length four that slices (spatial_1, spatial_2, temporal, + features) where each tuple entry is a slice object for that axes. + Note that if this method is called as part of a sup3r forward pass, the + lr_padded_slice will be included automatically in the kwargs for the + active chunk. If this is None, no slicing will be done and the full + bias correction source shape will be used. + no_trend: bool, default=False + An option to ignore the trend component of the correction, thus + resulting in an ordinary Quantile Mapping, i.e. corrects the bias by + comparing the distributions of the biased dataset with a reference + datasets. See + ``params_mf`` of :class:`rex.utilities.bc_utils.QuantileDeltaMapping`. + Note that this assumes that params_mh is the data distribution + representative for the target data. + """ + + params, cfg = get_spatial_bc_presrat(lat_lon, + base_dset, + feature_name, + bias_fp, + threshold) + base = params["base"] + bias = params["bias"] + bias_fut = params["bias_fut"] + + if lr_padded_slice is not None: + spatial_slice = (lr_padded_slice[0], lr_padded_slice[1]) + base = base[spatial_slice] + bias = bias[spatial_slice] + bias_fut = bias_fut[spatial_slice] + + if no_trend: + mf = None + else: + mf = bias_fut.reshape(-1, bias_fut.shape[-1]) + # The distributions are 3D (space, space, N-params) + # Collapse 3D (space, space, N) into 2D (space**2, N) + QDM = QuantileDeltaMapping(base.reshape(-1, base.shape[-1]), + bias.reshape(-1, bias.shape[-1]), + mf, + dist=cfg['dist'], + relative=relative, + sampling=cfg["sampling"], + log_base=cfg["log_base"]) + + # input 3D shape (spatial, spatial, temporal) + # QDM expects input arr with shape (time, space) + tmp = data.reshape(-1, data.shape[-1]).T + # Apply QDM correction + tmp = QDM(tmp) + # Reorgnize array back from (time, space) to (spatial, spatial, temporal) + tmp = tmp.T.reshape(data.shape) + + tmp = apply_zero_precipitation_rate(tmp, params["base_zero_rate"]) + + month = time_index.month + for m in range(12): + idx = month == m + 1 + x_hat = tmp[:, :, idx].mean(axis=-1) + k = params["bias_mean_mf"][:, :, m] / x_hat + tmp[:, :, idx] *= k[:, :, np.newaxis] + + return tmp diff --git a/sup3r/bias/mixins.py b/sup3r/bias/mixins.py index 1d8c8174a..75f5bb6d2 100644 --- a/sup3r/bias/mixins.py +++ b/sup3r/bias/mixins.py @@ -92,3 +92,64 @@ def fill_and_smooth(self, out[key][~nan_mask, idt] = arr_smooth_int[~nan_mask] return out + + +class ZeroRateMixin: + """Estimate zero rate + + + [Pierce2015]_. + + References + ---------- + .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. + T., & Hegewisch, K. C. (2015). Improved bias correction techniques for + hydrological simulations of climate change. Journal of Hydrometeorology, + 16(6), 2421-2442. + """ + @staticmethod + def zero_precipitation_rate(arr: np.ndarray, threshold: float = 0.01): + """Rate of (nearly) zero precipitation days + + Estimate the rate of values less than a given ``threshold``. In concept + the threshold would be zero (thus the name zero precipitation rate) + but it is often used a small threshold to truncate negligible values. + For instance, [Pierce2015]_ uses 0.01 (mm/day) for PresRat correction. + + Parameters + ---------- + arr : np.ndarray + An array of values to be analyzed. Usually precipitation but it + could be applied to other quantities. + threshold : float + Minimum value accepted. Less than that is assumed to be zero. + + Returns + ------- + rate : float + Rate of days with negligible precipitation (see Z_gf in + [Pierce2015]_). + + Notes + ----- + The ``NaN`` are ignored for the rate estimate. Therefore, a large + number of ``NaN`` might compromise the confidence of the estimator. + + If the input values are all non-finite, it returns NaN. + + Examples + -------- + >>> ZeroRateMixin().zero_precipitation_rate([2, 3, 4], 1) + 0.0 + + >>> ZeroRateMixin().zero_precipitation_rate([0, 1, 2, 3], 1) + 0.25 + + >>> ZeroRateMixin().zero_precipitation_rate([0, 1, 2, 3, np.nan], 1) + 0.25 + """ + idx = np.isfinite(arr) + if not idx.any(): + return np.nan + + return np.nanmean((arr[idx] < threshold).astype('i')) diff --git a/sup3r/bias/qdm.py b/sup3r/bias/qdm.py index 259254f68..f04214728 100644 --- a/sup3r/bias/qdm.py +++ b/sup3r/bias/qdm.py @@ -17,11 +17,12 @@ sample_q_linear, sample_q_log, ) +from typing import Optional from sup3r.preprocessing.data_handling.base import DataHandler from sup3r.utilities.utilities import expand_paths from .bias_calc import DataRetrievalBase -from .mixins import FillAndSmoothMixin +from .mixins import FillAndSmoothMixin, ZeroRateMixin logger = logging.getLogger(__name__) @@ -61,9 +62,10 @@ def __init__(self, decimals=None, match_zero_rate=False, n_quantiles=101, - dist="empirical", - sampling="linear", - log_base=10): + dist='empirical', + sampling='linear', + log_base=10, + ): """ Parameters ---------- @@ -189,7 +191,8 @@ class to be retrieved from the rex/sup3r library. If a base_handler_kwargs=base_handler_kwargs, bias_handler_kwargs=bias_handler_kwargs, decimals=decimals, - match_zero_rate=match_zero_rate) + match_zero_rate=match_zero_rate, + ) self.bias_fut_fps = bias_fut_fps @@ -200,7 +203,8 @@ class to be retrieved from the rex/sup3r library. If a target=self.target, shape=self.shape, val_split=0.0, - **self.bias_handler_kwargs) + **self.bias_handler_kwargs, + ) def _init_out(self): """Initialize output arrays `self.out` @@ -242,7 +246,8 @@ def _run_single(cls, base_handler, daily_reduction=daily_reduction, decimals=decimals, - base_dh_inst=base_dh_inst) + base_dh_inst=base_dh_inst, + ) out = cls.get_qdm_params(bias_data, bias_fut_data, @@ -251,7 +256,9 @@ def _run_single(cls, base_dset, sampling, n_samples, - log_base) + log_base, + ) + return out @staticmethod @@ -262,7 +269,8 @@ def get_qdm_params(bias_data, base_dset, sampling, n_samples, - log_base): + log_base, + ): """Get quantiles' cut point for given datasets Estimate the quantiles' cut points for each of the three given @@ -318,7 +326,8 @@ def get_qdm_params(bias_data, quantiles = sample_q_invlog(n_samples, log_base) else: msg = ('sampling option must be linear, log, or invlog, but ' - 'received: {}'.format(sampling)) + 'received: {}'.format(sampling) + ) logger.error(msg) raise KeyError(msg) @@ -326,7 +335,8 @@ def get_qdm_params(bias_data, f'bias_{bias_feature}_params': np.quantile(bias_data, quantiles), f'bias_fut_{bias_feature}_params': np.quantile(bias_fut_data, quantiles), - f'base_{base_dset}_params': np.quantile(base_data, quantiles)} + f'base_{base_dset}_params': np.quantile(base_data, quantiles), + } return out @@ -359,14 +369,13 @@ def write_outputs(self, fp_out, out=None): for k, v in self.meta.items(): f.attrs[k] = json.dumps(v) - f.attrs["dist"] = self.dist - f.attrs["sampling"] = self.sampling - f.attrs["log_base"] = self.log_base - f.attrs["base_fps"] = self.base_fps - f.attrs["bias_fps"] = self.bias_fps - f.attrs["bias_fut_fps"] = self.bias_fut_fps - logger.info( - 'Wrote quantiles to file: {}'.format(fp_out)) + f.attrs['dist'] = self.dist + f.attrs['sampling'] = self.sampling + f.attrs['log_base'] = self.log_base + f.attrs['base_fps'] = self.base_fps + f.attrs['bias_fps'] = self.bias_fps + f.attrs['bias_fut_fps'] = self.bias_fut_fps + logger.info('Wrote quantiles to file: {}'.format(fp_out)) def run(self, fp_out=None, @@ -374,7 +383,8 @@ def run(self, daily_reduction='avg', fill_extend=True, smooth_extend=0, - smooth_interior=0): + smooth_interior=0, + ): """Estimate the statistical distributions for each location Parameters @@ -417,8 +427,8 @@ def run(self, if not base_gid.any(): self.bad_bias_gids.append(bias_gid) - logger.debug(f"No base data for bias_gid: {bias_gid}. " - "Adding it to bad_bias_gids") + logger.debug(f'No base data for bias_gid: {bias_gid}. ' + 'Adding it to bad_bias_gids') else: bias_data = self.get_bias_data(bias_gid) bias_fut_data = self.get_bias_data(bias_gid, @@ -495,3 +505,314 @@ def run(self, self.write_outputs(fp_out, self.out) return copy.deepcopy(self.out) + + +class PresRat(ZeroRateMixin, QuantileDeltaMappingCorrection): + """PresRat bias correction method (precipitation) + + The PresRat correction [Pierce2015]_ is defined as the combination of + three steps: + * Use the model-predicted change ratio (with the CDFs); + * The treatment of zero-precipitation days (with the fraction of dry days); + * The final correction factor (K) to preserve the mean (ratio between both + estimated means); + + References + ---------- + .. [Pierce2015] Pierce, D. W., Cayan, D. R., Maurer, E. P., Abatzoglou, J. + T., & Hegewisch, K. C. (2015). Improved bias correction techniques for + hydrological simulations of climate change. Journal of Hydrometeorology, + 16(6), 2421-2442. + # Todo: + # - Identify Z_gf. (0.01 mm) Also have to save on output params + # - Estimate K = / + """ + + def _init_out(self): + super()._init_out() + + shape = (*self.bias_gid_raster.shape, 1) + self.out[f'{self.base_dset}_zero_rate'] = np.full(shape, + np.nan, + np.float32) + shape = (*self.bias_gid_raster.shape, 12) + self.out[f'{self.bias_feature}_mean_mh'] = np.full(shape, + np.nan, + np.float32) + self.out[f'{self.bias_feature}_mean_mf'] = np.full(shape, + np.nan, + np.float32) + + # pylint: disable=W0613 + @classmethod + def _run_single( + cls, + bias_data, + bias_fut_data, + base_fps, + bias_feature, + base_dset, + base_gid, + base_handler, + daily_reduction, + decimals, + sampling, + n_samples, + log_base, + *, + bias_ti, + bias_fut_ti, + zero_rate_threshold, + base_dh_inst=None, + + ): + """Estimate probability distributions at a single site + + ATTENTION: This should be refactored. There is too much redundancy in + the code. Let's make it work first, and optimize later. + """ + base_data, base_ti = cls.get_base_data( + base_fps, + base_dset, + base_gid, + base_handler, + daily_reduction=daily_reduction, + decimals=decimals, + base_dh_inst=base_dh_inst, + ) + + out = cls.get_qdm_params( + bias_data, + bias_fut_data, + base_data, + bias_feature, + base_dset, + sampling, + n_samples, + log_base, + ) + + out[f'{base_dset}_zero_rate'] = cls.zero_precipitation_rate( + base_data, + zero_rate_threshold, + ) + + # Let's save the means for mh and mf instead of the `x` ratio. It + # seems that we should be able to simplify the mh component from + # the `K` coefficient. + # TODO: For now it is monthly but later it will be modified to a + # generic time window. + mh = np.full(12, np.nan, np.float32) + mf = np.full(12, np.nan, np.float32) + for m in range(12): + mh[m] = bias_data[bias_ti.month == (m + 1)].mean() + mf[m] = bias_fut_data[bias_fut_ti.month == (m + 1)].mean() + out[f'{bias_feature}_mean_mh'] = mh + out[f'{bias_feature}_mean_mf'] = mf + + return out + + def run( + self, + fp_out=None, + max_workers=None, + daily_reduction='avg', + fill_extend=True, + smooth_extend=0, + smooth_interior=0, + zero_rate_threshold=0.01, + ): + """Estimate the required information for PresRat correction + + Parameters + ---------- + fp_out : str | None + Optional .h5 output file to write scalar and adder arrays. + max_workers : int, optional + Number of workers to run in parallel. 1 is serial and None is all + available. + daily_reduction : None | str + Option to do a reduction of the hourly+ source base data to daily + data. Can be None (no reduction, keep source time frequency), "avg" + (daily average), "max" (daily max), "min" (daily min), + "sum" (daily sum/total) + + Returns + ------- + out : dict + Dictionary with parameters defining the statistical distributions + for each of the three given datasets. Each value has dimensions + (lat, lon, n-parameters). + """ + logger.debug('Calculate CDF parameters for QDM') + + logger.info( + 'Initialized params with shape: {}'.format( + self.bias_gid_raster.shape + ) + ) + self.bad_bias_gids = [] + + # sup3r DataHandler opening base files will load all data in parallel + # during the init and should not be passed in parallel to workers + if isinstance(self.base_dh, DataHandler): + max_workers = 1 + + if max_workers == 1: + logger.debug('Running serial calculation.') + for i, bias_gid in enumerate(self.bias_meta.index): + raster_loc = np.where(self.bias_gid_raster == bias_gid) + _, base_gid = self.get_base_gid(bias_gid) + + if not base_gid.any(): + self.bad_bias_gids.append(bias_gid) + logger.debug( + f'No base data for bias_gid: {bias_gid}. ' + 'Adding it to bad_bias_gids' + ) + else: + bias_data = self.get_bias_data(bias_gid) + bias_fut_data = self.get_bias_data( + bias_gid, self.bias_fut_dh + ) + single_out = self._run_single( + bias_data, + bias_fut_data, + self.base_fps, + self.bias_feature, + self.base_dset, + base_gid, + self.base_handler, + daily_reduction, + self.decimals, + sampling=self.sampling, + n_samples=self.n_quantiles, + log_base=self.log_base, + base_dh_inst=self.base_dh, + zero_rate_threshold=zero_rate_threshold, + bias_ti=self.bias_fut_dh.time_index, + bias_fut_ti=self.bias_fut_dh.time_index, + ) + for key, arr in single_out.items(): + self.out[key][raster_loc] = arr + + logger.info( + 'Completed bias calculations for {} out of {} ' + 'sites'.format(i + 1, len(self.bias_meta)) + ) + + else: + logger.debug( + 'Running parallel calculation with {} workers.'.format( + max_workers + ) + ) + with ProcessPoolExecutor(max_workers=max_workers) as exe: + futures = {} + for bias_gid in self.bias_meta.index: + raster_loc = np.where(self.bias_gid_raster == bias_gid) + _, base_gid = self.get_base_gid(bias_gid) + + if not base_gid.any(): + self.bad_bias_gids.append(bias_gid) + else: + bias_data = self.get_bias_data(bias_gid) + bias_fut_data = self.get_bias_data( + bias_gid, self.bias_fut_dh + ) + future = exe.submit( + self._run_single, + bias_data, + bias_fut_data, + self.base_fps, + self.bias_feature, + self.base_dset, + base_gid, + self.base_handler, + daily_reduction, + self.decimals, + sampling=self.sampling, + n_samples=self.n_quantiles, + log_base=self.log_base, + zero_rate_threshold=zero_rate_threshold, + bias_ti=self.bias_fut_dh.time_index, + bias_fut_ti=self.bias_fut_dh.time_index, + ) + futures[future] = raster_loc + + logger.debug('Finished launching futures.') + for i, future in enumerate(as_completed(futures)): + raster_loc = futures[future] + single_out = future.result() + for key, arr in single_out.items(): + self.out[key][raster_loc] = arr + + logger.info( + 'Completed bias calculations for {} out of {} ' + 'sites'.format(i + 1, len(futures)) + ) + + logger.info('Finished calculating bias correction factors.') + + self.out = self.fill_and_smooth( + self.out, fill_extend, smooth_extend, smooth_interior + ) + + self.zero_rate_threshold = zero_rate_threshold + extra_attrs = {'zero_rate_threshold': zero_rate_threshold} + self.write_outputs(fp_out, + self.out, + extra_attrs=extra_attrs, + ) + + return copy.deepcopy(self.out) + + def write_outputs(self, fp_out: str, + out: dict = None, + extra_attrs: Optional[dict] = None): + """Write outputs to an .h5 file. + + Parameters + ---------- + fp_out : str | None + An HDF5 filename to write the estimated statistical distributions. + out : dict, optional + A dictionary with the three statistical distribution parameters. + If not given, it uses :attr:`.out`. + extra_attrs: dict, optional + Extra attributes to be exported together with the dataset. + + Examples + -------- + >>> mycalc = PresRat(...) + >>> mycalc.write_outputs(fp_out="myfile.h5", out=mydictdataset, + ... extra_attrs={'zero_rate_threshold': 80}) + """ + + out = out or self.out + + if fp_out is not None: + if not os.path.exists(os.path.dirname(fp_out)): + os.makedirs(os.path.dirname(fp_out), exist_ok=True) + + with h5py.File(fp_out, 'w') as f: + # pylint: disable=E1136 + lat = self.bias_dh.lat_lon[..., 0] + lon = self.bias_dh.lat_lon[..., 1] + f.create_dataset('latitude', data=lat) + f.create_dataset('longitude', data=lon) + for dset, data in out.items(): + f.create_dataset(dset, data=data) + + for k, v in self.meta.items(): + f.attrs[k] = json.dumps(v) + f.attrs['dist'] = self.dist + f.attrs['sampling'] = self.sampling + f.attrs['log_base'] = self.log_base + f.attrs['base_fps'] = self.base_fps + f.attrs['bias_fps'] = self.bias_fps + f.attrs['bias_fut_fps'] = self.bias_fut_fps + if extra_attrs is not None: + for a, v in extra_attrs.items(): + f.attrs[a] = v + logger.info('Wrote quantiles to file: {}'.format(fp_out)) diff --git a/tests/bias/test_presrat_bias_correction.py b/tests/bias/test_presrat_bias_correction.py new file mode 100644 index 000000000..e2befcfbf --- /dev/null +++ b/tests/bias/test_presrat_bias_correction.py @@ -0,0 +1,814 @@ +"""Validating PresRat correction procedures + + +Relevant resources used in the tests: +- FP_CC: Filename of standard biased dataset. +- fut_cc: Future dataset sample based on FP_CC + an offset + small noise +- fp_fut_cc: Filname to `fut_cc`. +- fut_cc_notrend: Future dataset identical to FP_CC, i.e. no trend. +- fp_fut_cc_notrend: Filename to fut_cc_notrend. +- presrat_params: Parameters of reference to test PresRat (using fp_fut_cc). +- presrat_notrend_params: Quantiles of future (mf) are identical to bias + reference (mh). Thus, there is no trend in the model. +- presrat_identity_params: All distributions (oh & mf) are identical to mh, + i.e. observations equal to model that doesn't change on time. +- presrat_nochanges_params: Like presrat_identity_params, but also all + zero_rate are zeros, i.e. no values should be forced to be zero. +- presrat_nozeros_params: Same of presrat_params, but no zero_rate, i.e. + all zero_rate values are equal to 0 (percent). +""" + +import os +import shutil + +import h5py +import numpy as np +import pandas as pd +import pytest +import xarray as xr + +from rex import Outputs, Resource + +from sup3r import CONFIG_DIR, TEST_DATA_DIR +from sup3r.models import Sup3rGan +from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy +from sup3r.bias import ( + apply_zero_precipitation_rate, + local_presrat_bc, + PresRat, +) +from sup3r.bias.mixins import ZeroRateMixin +from sup3r.preprocessing.data_handling import DataHandlerNC, DataHandlerNCforCC + +FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') +FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') +FP_CC_LAT_LON = DataHandlerNC(FP_CC, 'rsds').lat_lon +# A reference zero rate threshold that might not make sense physically but for +# testing purposes only. This might change in the future to force edge cases. +ZR_THRESHOLD = 80 + +with xr.open_dataset(FP_CC) as fh: + MIN_LAT = np.min(fh.lat.values.astype(np.float32)) + MIN_LON = np.min(fh.lon.values.astype(np.float32)) - 360 + TARGET = (float(MIN_LAT), float(MIN_LON)) + SHAPE = (len(fh.lat.values), len(fh.lon.values)) + +VAR_MIN = 0 +# Fix this max +VAR_MAX = 1300 + + +@pytest.fixture(scope='module') +def fp_resource(tmpdir_factory): + fn = tmpdir_factory.mktemp('data').join('precip_oh.h5') + + time = pd.date_range('2018-01-01 00:00:00+0000', '2018-03-26 23:30:00+0000', freq='30m') + time = pd.DatetimeIndex(np.arange(np.datetime64('2018-01-01 00:00:00+00:00'), np.datetime64('2018-03-26 23:31:00+00:00'), np.timedelta64(30, 'm'))) + lat = np.array([39.01, 39.05, 39.09, 39.13, 39.17, 39.21, 39.25, 39.29, 39.33, 39.37, 39.41, 39.45, 39.49, 39.53, 39.57, 39.61, 39.65, 39.69, 39.73, 39.77]) + lon = np.array([-105.14, -105.1, -105.06, -105.02, -104.98, -104.94, -104.9, -104.86, -104.82, -104.78, -104.74, -104.7, -104.66, -104.62, -104.58, -104.54, -104.5, -104.46, -104.42, -104.38]) + rng = np.random.default_rng() + ghi = rng.normal(210, 87., (time.size, lat.size, lon.size)) + + ds = xr.Dataset( + data_vars={ + "ghi": (["time", "lat", "lon"], ghi) + }, + coords={ + "time": ("time", time), + #"time_bnds": (["time", "bnds"], time_bnds), + "lat": ("lat", lat), + "lon": ("lon", lon), + }) + + + ds = ds.sortby('lat', ascending=False) + lat_2d, lon_2d = xr.broadcast(ds['lat'], ds['lon']) + meta = pd.DataFrame({'latitude': lat_2d.values.flatten(), + 'longitude': lon_2d.values.flatten(), + }) + + shapes = {"ghi": (len(ds.ghi.time), np.prod(ds.ghi.isel(time=0).shape))} + attrs = {"ghi": None} + chunks = {"ghi": None} + dtypes = {"ghi": "float32"} + + Outputs.init_h5(fn, ["ghi"], shapes, attrs, chunks, dtypes, + meta=meta, + time_index=pd.DatetimeIndex(ds.time)) + with Outputs(fn, 'a') as out: + out["ghi"] = ds.stack(flat=("lat", "lon"))["ghi"] + + # DataHandlerNCforCC requires a string + fn = str(fn) + return fn + + +@pytest.fixture(scope='module') +def precip(): + # lat = np.linspace(13.66, 31.57, 20) + # lat = np.linspace(38.245528, 40.350785, 20) + lat = np.array([38.2455282337738, 38.9472804370071, 39.649032596592, 40.3507847105177]) + # lon = np.linspace(254.53125, 256.640625, 20) + lon = np.array([254.53125, 255.234375, 255.9375, 256.640625]) + t0 = np.datetime64('2015-01-01T12:00:00') + time = t0 + np.linspace(0, 364, 365, dtype='timedelta64[D]') + bnds = (-np.timedelta64(12,'h'), np.timedelta64(12, 'h')) + time_bnds = time[:,np.newaxis] + bnds + rng = np.random.default_rng() + # pr = rng.lognormal(3., 1., (time.size, lat.size, lon.size)) + # pr = rng.uniform(0, 1., (time.size, lat.size, lon.size)) + # Transitioning + pr = rng.normal(210, 87., (time.size, lat.size, lon.size)) + pr = np.where(pr>0, pr, 0) + + ds = xr.Dataset( + data_vars={ + "rsds": (["time", "lat", "lon"], pr) + }, + coords={ + "time": ("time", time), + "time_bnds": (["time", "bnds"], time_bnds), + "lat": ("lat", lat), + "lon": ("lon", lon), + }) + + return ds + +# FP_CC +@pytest.fixture(scope='module') +def fp_precip(tmpdir_factory, precip): + """Precipitation sample filename + + DataHandlerNCforCC requires a string + """ + fn = tmpdir_factory.mktemp('data').join('precip_mh.nc') + precip.to_netcdf(fn) + fn = str(fn) + return fn + +# fut_cc +@pytest.fixture(scope='module') +def precip_fut(tmpdir_factory, precip): + ds = precip.copy(deep=True) + + time = ds['time'] + np.timedelta64(18263,'D') + time.attrs = ds['time'].attrs + ds['time'] = time + # Adding an offset + # ds['pr'] += 1 + ds['rsds'] += 75 + # adding a small noise + # ds['pr'] += 1e-6 * np.random.randn(*ds['pr'].shape) + ds['rsds'] += 1e-4 * np.random.randn(*ds['rsds'].shape) + + return ds['rsds'] + +@pytest.fixture(scope='module') +def fp_precip_fut(tmpdir_factory, precip_fut): + """Future precipitation sample filename + + DataHandlerNCforCC requires a string + """ + fn = tmpdir_factory.mktemp('data').join('precip_mf.nc') + precip_fut.to_netcdf(fn) + fn = str(fn) + return fn + +@pytest.fixture(scope='module') +def fp_fut_cc(tmpdir_factory): + """Sample future CC dataset + + The same CC but with an offset (75.0) and negligible noise. + """ + fn = tmpdir_factory.mktemp('data').join('test_mf.nc') + ds = xr.open_dataset(FP_CC) + # Adding an offset + ds['rsds'] += 75.0 + # adding a small noise + ds['rsds'] += 1e-4 * np.random.randn(*ds['rsds'].shape) + ds.to_netcdf(fn) + # DataHandlerNCforCC requires a string + fn = str(fn) + return fn + + +@pytest.fixture(scope='module') +def fut_cc(fp_fut_cc): + """Gives the dataset itself related to fp_fut_cc + + Giving an object in memory makes everything more efficient by avoiding I/O + reading files and overhead for multiple uses. + + Note that ``Resources`` modifies the dataset so we cannot just load the + NetCDF. Here we run a couple of checks to confirm that the output + dataset is as expected by sup3r. + + To use time as expected by sup3r: time = pd.to_datetime(da.time) + To use latlon as expected by sup3r: + latlon = np.stack(xr.broadcast(da["lat"], da["lon"] - 360), + axis=-1).astype('float32') + """ + ds = xr.open_dataset(fp_fut_cc) + + # Operating with numpy arrays impose a fixed dimensions order + # This compute is required here. + da = ds['rsds'].compute().transpose('lat', 'lon', 'time') + + # The _get_factors() assume latitude as descending and it will + # silently return wrong values otherwise. + da = da.sortby('lat', ascending=False) + + latlon = np.stack( + xr.broadcast(da['lat'], da['lon'] - 360), axis=-1 + ) + # Confirm that dataset order is consistent + # Somewhere in pipeline latlon are downgraded to f32 + assert np.allclose(latlon.astype('float32'), FP_CC_LAT_LON) + + # Verify data alignment in comparison with expected for FP_CC + for ii in range(ds.lat.size): + for jj in range(ds.lon.size): + assert np.allclose( + da.sel(lat=latlon[ii, jj, 0]).sel( + lon=latlon[ii, jj, 1] + 360 + ), + da.data[ii, jj], + ) + + return da + + +@pytest.fixture(scope='module') +def fp_fut_cc_notrend(tmpdir_factory, fp_precip): + """Sample future CC dataset identical to historical CC + + This is currently a copy of FP_CC, thus no trend on time. + """ + fn = tmpdir_factory.mktemp('data').join('test_mf_notrend.nc') + shutil.copyfile(fp_precip, fn) + # DataHandlerNCforCC requires a string + fn = str(fn) + return fn + + +@pytest.fixture(scope='module') +def fut_cc_notrend(fp_fut_cc_notrend): + """Extract the dataset from fp_fut_cc_notrend + + The internal process to read such dataset is way more complex than just + reading it and there are some transformations. This function must provide + a dataset compatible with the one expected from the standard processing. + """ + ds = xr.open_dataset(fp_fut_cc_notrend) + + # Although it is the same file, somewhere in the data reading process + # the longitude is tranformed to the standard [-180 to 180] and it is + # expected to be like that everywhere. + ds['lon'] = ds['lon'] - 360 + + # Operating with numpy arrays impose a fixed dimensions order + # This compute is required here. + da = ds['rsds'].compute().transpose('lat', 'lon', 'time') + + # The _get_factors() assume latitude as descending and it will + # silently return wrong values otherwise. + da = da.sortby('lat', ascending=False) + + latlon = np.stack( + xr.broadcast(da['lat'], da['lon']), axis=-1 + ) + # Confirm that dataset order is consistent + # Somewhere in pipeline latlon are downgraded to f32 + assert np.allclose(latlon.astype('float32'), FP_CC_LAT_LON) + + # Verify data alignment in comparison with expected for FP_CC + for ii in range(ds.lat.size): + for jj in range(ds.lon.size): + np.allclose( + da.sel(lat=latlon[ii, jj, 0]).sel(lon=latlon[ii, jj, 1]), + da.data[ii, jj], + ) + + return da + + +@pytest.fixture(scope='module') +def presrat_params(tmpdir_factory, fp_resource, fp_precip, fp_precip_fut): + """PresRat parameters for standard datasets + + Use the standard datasets to estimate the distributions and save + in a temporary place to be re-used + """ + calc = PresRat( + fp_resource, + fp_precip, + fp_precip_fut, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC', + ) + fn = tmpdir_factory.mktemp('params').join('presrat.h5') + # Physically non-sense threshold choosed to result in gridpoints with and + # without zero rate correction for the given testing dataset. + _ = calc.run(max_workers=1, zero_rate_threshold=ZR_THRESHOLD, fp_out=fn) + + # DataHandlerNCforCC requires a string + fn = str(fn) + + return fn + + +@pytest.fixture(scope='module') +def presrat_notrend_params(tmpdir_factory, fp_resource, fp_precip, fp_fut_cc_notrend): + """No change in time + + The bias_fut distribution is equal to bias (mod_his), so no change in + time. + + We could save some overhead here copying fp_fut_cc and replacing some + values there. That was done before but missed some variables resulting + in errors. + """ + calc = PresRat( + fp_resource, + fp_precip, + fp_fut_cc_notrend, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC', + ) + fn = tmpdir_factory.mktemp('params').join('presrat_notrend.h5') + _ = calc.run(zero_rate_threshold=ZR_THRESHOLD, fp_out=fn) + + # DataHandlerNCforCC requires a string + fn = str(fn) + + return fn + + +@pytest.fixture(scope='module') +def presrat_identity_params(tmpdir_factory, presrat_params): + """Identical distribution""" + + fn = tmpdir_factory.mktemp('params').join('presrat_identity.h5') + shutil.copyfile(presrat_params, fn) + + with h5py.File(fn, 'r+') as f: + f['bias_rsds_params'][:] = f['bias_fut_rsds_params'][:] + f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] + f.flush() + + return str(fn) + + +@pytest.fixture(scope='module') +def presrat_nochanges_params(tmpdir_factory, presrat_params): + """Identical distribution and no zero rate + + All distributions are identical and zero rate changes are all zero, + therefore, the PresRat correction should not change anything. + + Note that distributions are based on bias_fut, so it is assumed that the + test cases will be datasets coherent with that bias_fut distribution, + otherwise it could lead to differences if out of that scale. + """ + fn = tmpdir_factory.mktemp('params').join('presrat_nochanges.h5') + shutil.copyfile(presrat_params, fn) + + with h5py.File(fn, 'r+') as f: + f['bias_fut_rsds_params'][:] = f['bias_rsds_params'][:] + f['base_ghi_params'][:] = f['bias_rsds_params'][:] + f['ghi_zero_rate'][:] *= 0 + f['rsds_mean_mf'][:] = f['rsds_mean_mh'][:] + f.flush() + + return str(fn) + + +@pytest.fixture(scope='module') +def presrat_nozeros_params(tmpdir_factory, presrat_params): + """PresRat parameters without zero rate correction + + The same standard parameters but all zero_rate values are equal to zero, + which means that zero percent, i.e. none, of the values should be forced + to be zero. + """ + fn = tmpdir_factory.mktemp('params').join('presrat_nozeros.h5') + shutil.copyfile(presrat_params, fn) + + with h5py.File(fn, 'r+') as f: + f['ghi_zero_rate'][:] *= 0 + f.flush() + + return str(fn) + + +def test_zero_precipitation_rate(): + """Zero rate estimate with extremme thresholds""" + f = ZeroRateMixin().zero_precipitation_rate + arr = np.random.randn(100) + + rate = f(arr, threshold=np.median(arr)) + assert rate == 0.5 + + +def test_zero_precipitation_rate_extremes(): + """Zero rate estimate with extremme thresholds""" + f = ZeroRateMixin().zero_precipitation_rate + arr = np.arange(10) + + rate = f(arr, threshold=-1) + assert rate == 0 + + rate = f(arr, threshold=0) + assert rate == 0 + + # Remember, 9 is the last value, i.e. the 10th value + rate = f(arr, threshold=9) + assert rate == 0.9 + + rate = f(arr, threshold=100) + assert rate == 1 + + +def test_zero_precipitation_rate_nanonly(): + """Zero rate estimate with only NaNs gives NaN""" + f = ZeroRateMixin().zero_precipitation_rate + arr = np.nan * np.zeros(10) + + # All NaN gives NaN rate + rate = f(arr) + assert np.isnan(rate) + + +def test_zero_precipitation_rate_nan(): + """Zero rate estimate with NaNs + + NaN shouldn't be counted to find the rate. Thus an array with NaNs should + give the same results if the NaN were removed before the calculation. + """ + f = ZeroRateMixin().zero_precipitation_rate + arr = np.arange(10) + + r1 = f(arr, threshold=5) + r2 = f(np.concatenate([5 * [np.nan], arr]), threshold=5) + assert r1 == r2 + + +@pytest.mark.parametrize('threshold', [0, 50, 1e6]) +def test_parallel(fp_resource, fp_precip, fp_precip_fut, threshold): + """Running in parallel must not alter results + + Check with different thresholds, which will result in different zero rates. + """ + s = PresRat( + fp_resource, + fp_precip, + fp_precip_fut, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + out_s = s.run(max_workers=1, zero_rate_threshold=threshold) + + p = PresRat( + fp_resource, + fp_precip, + fp_precip_fut, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + out_p = p.run(max_workers=2, zero_rate_threshold=threshold) + + for k in out_s.keys(): + assert k in out_p, f'Missing {k} in parallel run' + assert np.allclose( + out_s[k], out_p[k], equal_nan=True + ), f'Different results for {k}' + + +def test_presrat_calc(fp_resource, fp_precip, fp_precip_fut): + """Standard PresRat (pre) calculation + + Estimate the required parameters with a standard setup. + + WIP: Just confirm it runs, but not checking much yet. + """ + calc = PresRat( + fp_resource, + fp_precip, + fp_precip_fut, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + out = calc.run(zero_rate_threshold=ZR_THRESHOLD) + + expected_vars = ['bias_rsds_params', 'bias_fut_rsds_params', + 'base_ghi_params', 'ghi_zero_rate', 'rsds_mean_mh', + 'rsds_mean_mf'] + sref = FP_CC_LAT_LON.shape[:2] + for v in expected_vars: + assert v in out, f"Missing {v} in the calculated output" + assert out[v].shape[:2] == sref, "Doesn't match expected spatial shape" + # This is only true because fill and extend are applied by default. + assert np.all(np.isfinite(out[v])), f"Unexpected NaN for {v}" + + zero_rate = out['ghi_zero_rate'] + assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' + + +@pytest.mark.parametrize('threshold', [0, 50, 1e6]) +def test_presrat_zero_rate(fp_resource, fp_precip, fp_precip_fut, threshold): + """Estimate zero_rate within PresRat.run() + + Use thresholds that gives 0%, 100%, and something between. + + Notes + ----- + - Rate should be zero if threshold is zero for this dataset. + """ + calc = PresRat( + fp_resource, + fp_precip, + fp_precip_fut, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + out = calc.run(zero_rate_threshold=threshold) + + assert 'ghi_zero_rate' in out, 'Missing ghi_zero_rate in calc output' + zero_rate = out['ghi_zero_rate'] + # True for this dataset because fill and extend are applied by default. + assert np.all(np.isfinite(zero_rate)), 'Unexpected NaN for ghi_zero_rate' + assert np.all((zero_rate >= 0) & (zero_rate <= 1)), 'Out of range [0, 1]' + + if threshold <= 0: + assert np.all(zero_rate == 0), 'It should be rate 0 for threshold==0' + elif threshold >= 1e4: + assert np.all(zero_rate == 1), 'It should be rate 1 for threshold>=1e4' + + +def test_apply_zero_precipitation_rate(): + """Reinforce the zero precipitation rate, standard run""" + data = np.array([[[5, 0.1, 3, 0.2, 1]]]) + out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) + + assert np.allclose([5.0, 0.0, 3, 0.2, 1.0], out, equal_nan=True) + + +def test_apply_zero_precipitation_rate_nan(): + """Validate with NaN in the input""" + data = np.array([[[5, 0.1, np.nan, 0.2, 1]]]) + out = apply_zero_precipitation_rate(data, np.array([[[0.25]]])) + + assert np.allclose([5.0, 0.0, np.nan, 0.2, 1.0], out, equal_nan=True) + + +def test_apply_zero_precipitation_rate_2D(): + """Validate a 2D input""" + data = np.array( + [ + [ + [5, 0.1, np.nan, 0.2, 1], + [5, 0.1, 3, 0.2, 1], + ] + ] + ) + out = apply_zero_precipitation_rate(data, np.array([[[0.25], [0.41]]])) + + assert np.allclose( + [[5.0, 0.0, np.nan, 0.2, 1.0], [5.0, 0.0, 3, 0.0, 1.0]], + out, + equal_nan=True, + ) + + +def test_presrat(fp_resource, fp_precip, fp_precip_fut): + """Test PresRat correction procedure + + Basic standard run. Using only required arguments. If this fails, + something fundamental is wrong. + """ + calc = PresRat( + fp_resource, + fp_precip, + fp_precip_fut, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) + + # A high zero_rate_threshold to gets at least something. + out = calc.run(zero_rate_threshold=50) + + # Guarantee that we have some actual values, otherwise most of the + # remaining tests would be useless + for v in out: + assert np.isfinite(out[v]).any(), 'Something wrong, all CDFs are NaN.' + + # Check possible range + for v in out: + assert np.nanmin(out[v]) >= VAR_MIN, f'{v} should be all greater than zero.' + assert np.nanmax(out[v]) < VAR_MAX, f'{v} should be all less than 1300.' + + # Each location can be all finite or all NaN, but not both + for v in (v for v in out if len(out[v].shape) > 2): + tmp = np.isfinite(out[v].reshape(-1, *out[v].shape[2:])) + assert np.all( + np.all(tmp, axis=1) == ~np.all(~tmp, axis=1) + ), f'For each location of {v} it should be all finite or nonte' + + +def test_presrat_transform(presrat_params, fut_cc): + """A standard run with local_presrat_bc + + WIP: Confirm it runs only. + """ + data = fut_cc.values + time = pd.to_datetime(fut_cc.time) + latlon = np.stack( + xr.broadcast(fut_cc['lat'], fut_cc['lon'] - 360), axis=-1 + ).astype('float32') + + corrected = local_presrat_bc( + data, time, latlon, 'ghi', 'rsds', presrat_params + ) + + assert np.isfinite(corrected).any(), "Can't compare if only NaN" + # Confirm that there were changes, but at this point stop there. + assert not np.allclose(data, corrected, equal_nan=False) + + +def test_presrat_transform_nochanges(presrat_nochanges_params, fut_cc_notrend): + """The correction should result in no changes at all + + Note that there are a lot of implicit transformations, so we cannot expect + to be able to esily compare all gridpoints. + + The corrected output must be the same if: + - The three CDFs are the same, so no change due to QDM. There is one + caveat here. The data to be corrected is compared with the mf's CDF, and + if out of distribution, it would lead to differences; + - All zero rate set to zero percent, so no value is forced to zero; + - The value to be corrected is the same used to estimate the means for the + K factor; + """ + data = fut_cc_notrend.values + time = pd.to_datetime(fut_cc_notrend.time) + latlon = np.stack( + xr.broadcast(fut_cc_notrend['lat'], fut_cc_notrend['lon']), + axis=-1, + ).astype('float32') + + corrected = local_presrat_bc( + data, time, latlon, 'ghi', 'rsds', presrat_nochanges_params + ) + + assert np.isfinite(corrected).any(), "Can't compare if only NaN" + + # The calculations are set in such a way that `run()` only applies to + # gripoints where there are historical reference and biased data. This is + # hidden by an implicit interpolation procedure, which results in values + # that can't be easily reproduced for validation. One solution is to + # allow the implicit interpolation but compare only where non-interpolated + # values are available. Let's call it the 'Magic index'. + idx = (slice(1, 3), slice(0, 3)) + assert np.allclose( + data[idx], corrected[idx], equal_nan=False + ), "This case shouldn't modify the data" + + +def test_presrat_transform_nozerochanges(presrat_nozeros_params, fut_cc): + """No adjustment to zero + + Correction procedure results in some changes, but since the zero_rate + correction is all set to zero percent, there are no values adjusted to + zero. + """ + data = fut_cc.values + time = pd.to_datetime(fut_cc.time) + latlon = np.stack( + xr.broadcast(fut_cc['lat'], fut_cc['lon'] - 360), axis=-1 + ).astype('float32') + + corrected = local_presrat_bc( + data, + time, + latlon, + 'ghi', + 'rsds', + presrat_nozeros_params, + ) + + assert np.isfinite(data).any(), "Can't compare if only NaN" + assert not np.allclose( + data, corrected, equal_nan=False + ), 'Expected changes due to bias correction' + assert not ( + (data != 0) & (corrected == 0) + ).any(), 'Unexpected value corrected (zero_rate) to zero (dry day)' + + +def precip_sample(tmpdir_factory): + t_start = np.datetime64('2015-01-01') + t_end = np.datetime64('2015-01-20') + nt = 20 + + lat = np.linspace(13.66, 31.57, 20) + long = np.linspace(125.0, 148.75, 20) + t0 = np.datetime64('2015-01-01') + time = t0 + np.linspace(0, 19, 20, dtype='timedelta64[D]') + + ds = xr.Dataset() + + +def test_fwp_integration(tmp_path, fp_fut_cc, presrat_params): + """Integration of the PresRat correction method into the forward pass + + Validate two aspects: + - We should be able to run a forward pass with unbiased data. + - The bias trend should be observed in the predicted output. + """ + fp_gen = os.path.join(CONFIG_DIR, 'spatiotemporal/gen_3x_4x_2f.json') + fp_disc = os.path.join(CONFIG_DIR, 'spatiotemporal/disc.json') + features = ['rsds'] + target = (39.0, -104.5) + # shape = (8, 8) + shape = (2, 2) + temporal_slice = slice(None, None, 1) + fwp_chunk_shape = (4, 4, 150) + input_files = [fp_fut_cc] + + Sup3rGan.seed() + model = Sup3rGan(fp_gen, fp_disc, learning_rate=1e-4) + _ = model.generate(np.ones((4, 10, 10, 6, len(features)))) + model.meta['lr_features'] = features + model.meta['hr_out_features'] = features + model.meta['s_enhance'] = 3 + model.meta['t_enhance'] = 4 + + out_dir = os.path.join(tmp_path, 'st_gan') + model.save(out_dir) + + bias_correct_kwargs = { + 'rsds': {'feature_name': 'rsds', + 'base_dset': 'ghi', + 'bias_fp': presrat_params}} + + strat = ForwardPassStrategy( + input_files, + model_kwargs={'model_dir': out_dir}, + fwp_chunk_shape=fwp_chunk_shape, + spatial_pad=0, temporal_pad=0, + input_handler_kwargs=dict(target=target, shape=shape, + temporal_slice=temporal_slice, + worker_kwargs=dict(max_workers=1)), + out_pattern=os.path.join(tmp_path, 'out_{file_id}.nc'), + worker_kwargs=dict(max_workers=1), + input_handler='DataHandlerNCforCC', + ) + bc_strat = ForwardPassStrategy( + input_files, + model_kwargs={'model_dir': out_dir}, + fwp_chunk_shape=fwp_chunk_shape, + spatial_pad=0, temporal_pad=0, + input_handler_kwargs=dict(target=target, shape=shape, + temporal_slice=temporal_slice, + worker_kwargs=dict(max_workers=1)), + out_pattern=os.path.join(tmp_path, 'out_{file_id}.nc'), + worker_kwargs=dict(max_workers=1), + input_handler='DataHandlerNCforCC', + bias_correct_method='local_presrat_bc', + bias_correct_kwargs=bias_correct_kwargs, + ) + + for ichunk in range(strat.chunks): + fwp = ForwardPass(strat, chunk_index=ichunk) + bc_fwp = ForwardPass(bc_strat, chunk_index=ichunk) + + delta = bc_fwp.input_data - fwp.input_data + + delta = bc_fwp.run_chunk() - fwp.run_chunk() diff --git a/tests/bias/test_qdm_bias_correction.py b/tests/bias/test_qdm_bias_correction.py index 457b5411a..96df52503 100644 --- a/tests/bias/test_qdm_bias_correction.py +++ b/tests/bias/test_qdm_bias_correction.py @@ -11,12 +11,15 @@ from sup3r import CONFIG_DIR, TEST_DATA_DIR from sup3r.models import Sup3rGan from sup3r.pipeline.forward_pass import ForwardPass, ForwardPassStrategy -from sup3r.bias import local_qdm_bc, QuantileDeltaMappingCorrection +from sup3r.bias import ( + local_qdm_bc, + QuantileDeltaMappingCorrection, +) from sup3r.preprocessing.data_handling import DataHandlerNC, DataHandlerNCforCC -FP_NSRDB = os.path.join(TEST_DATA_DIR, "test_nsrdb_co_2018.h5") -FP_CC = os.path.join(TEST_DATA_DIR, "rsds_test.nc") -FP_CC_LAT_LON = DataHandlerNC(FP_CC, "rsds").lat_lon +FP_NSRDB = os.path.join(TEST_DATA_DIR, 'test_nsrdb_co_2018.h5') +FP_CC = os.path.join(TEST_DATA_DIR, 'rsds_test.nc') +FP_CC_LAT_LON = DataHandlerNC(FP_CC, 'rsds').lat_lon with xr.open_dataset(FP_CC) as fh: MIN_LAT = np.min(fh.lat.values.astype(np.float32)) @@ -25,13 +28,13 @@ SHAPE = (len(fh.lat.values), len(fh.lon.values)) -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def fp_fut_cc(tmpdir_factory): """Sample future CC dataset The same CC but with an offset (75.0) and negligible noise. """ - fn = tmpdir_factory.mktemp("data").join("test_mf.nc") + fn = tmpdir_factory.mktemp('data').join('test_mf.nc') ds = xr.open_dataset(FP_CC) # Adding an offset ds['rsds'] += 75.0 @@ -43,39 +46,38 @@ def fp_fut_cc(tmpdir_factory): return fn -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def fp_fut_cc_notrend(tmpdir_factory): """Sample future CC dataset identical to historical CC This is currently a copy of FP_CC, thus no trend on time. """ - fn = tmpdir_factory.mktemp("data").join("test_mf_notrend.nc") + fn = tmpdir_factory.mktemp('data').join('test_mf_notrend.nc') shutil.copyfile(FP_CC, fn) # DataHandlerNCforCC requires a string fn = str(fn) return fn -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def dist_params(tmpdir_factory, fp_fut_cc): """Distribution parameters for standard datasets Use the standard datasets to estimate the distributions and save in a temporary place to be re-used """ - calc = QuantileDeltaMappingCorrection( - FP_NSRDB, - FP_CC, - fp_fut_cc, - "ghi", - "rsds", - target=TARGET, - shape=SHAPE, - distance_upper_bound=0.7, - bias_handler="DataHandlerNCforCC", - ) - fn = tmpdir_factory.mktemp("params").join("standard.h5") - _ = calc.run(max_workers=1, fp_out=fn) + calc = QuantileDeltaMappingCorrection(FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + distance_upper_bound=0.7, + bias_handler='DataHandlerNCforCC', + ) + fn = tmpdir_factory.mktemp('params').join('standard.h5') + _ = calc.run(fp_out=fn) # DataHandlerNCforCC requires a string fn = str(fn) @@ -90,29 +92,34 @@ def test_qdm_bc(fp_fut_cc): something fundamental is wrong. """ - calc = QuantileDeltaMappingCorrection(FP_NSRDB, FP_CC, fp_fut_cc, - 'ghi', 'rsds', - target=TARGET, shape=SHAPE, - bias_handler='DataHandlerNCforCC') + calc = QuantileDeltaMappingCorrection(FP_NSRDB, + FP_CC, + fp_fut_cc, + 'ghi', + 'rsds', + target=TARGET, + shape=SHAPE, + bias_handler='DataHandlerNCforCC', + ) out = calc.run() # Guarantee that we have some actual values, otherwise most of the # remaining tests would be useless for v in out: - assert np.isfinite(out[v]).any(), "Something wrong, all CDFs are NaN." + assert np.isfinite(out[v]).any(), 'Something wrong, all CDFs are NaN.' # Check possible range for v in out: - assert np.nanmin(out[v]) > 0, f"{v} should be all greater than zero." - assert np.nanmax(out[v]) < 1300, f"{v} should be all less than 1300." + assert np.nanmin(out[v]) > 0, f'{v} should be all greater than zero.' + assert np.nanmax(out[v]) < 1300, f'{v} should be all less than 1300.' # Each location can be all finite or all NaN, but not both for v in out: tmp = np.isfinite(out[v].reshape(-1, out[v].shape[-1])) assert np.all( np.all(tmp, axis=1) == ~np.all(~tmp, axis=1) - ), f"For each location of {v} it should be all finite or nonte" + ), f'For each location of {v} it should be all finite or nonte' def test_parallel(fp_fut_cc): @@ -136,10 +143,10 @@ def test_parallel(fp_fut_cc): out_p = p.run(max_workers=2) for k in out_s.keys(): - assert k in out_p, f"Missing {k} in parallel run" + assert k in out_p, f'Missing {k} in parallel run' assert np.allclose( out_s[k], out_p[k], equal_nan=True - ), f"Different results for {k}" + ), f'Different results for {k}' def test_fill_nan(fp_fut_cc): @@ -153,14 +160,14 @@ def test_fill_nan(fp_fut_cc): # Without filling, at least one NaN or this test is useless. out = c.run(fill_extend=False) - assert np.all([np.isnan(v).any() for v in out.values()]), ( - "Assume at least one NaN value for each param" - ) + assert np.all( + [np.isnan(v).any() for v in out.values()] + ), 'Assume at least one NaN value for each param' out = c.run() - assert ~np.any([np.isnan(v) for v in out.values()]), ( - "All NaN values where supposed to be filled" - ) + assert ~np.any( + [np.isnan(v) for v in out.values()] + ), 'All NaN values where supposed to be filled' def test_save_file(tmp_path, fp_fut_cc): @@ -175,14 +182,14 @@ def test_save_file(tmp_path, fp_fut_cc): distance_upper_bound=0.7, bias_handler='DataHandlerNCforCC') - filename = os.path.join(tmp_path, "test_saving.hdf") + filename = os.path.join(tmp_path, 'test_saving.hdf') _ = calc.run(filename) # File was created os.path.isfile(filename) # A valid HDF5, can open and read - with h5py.File(filename, "r") as f: - assert "latitude" in f.keys() + with h5py.File(filename, 'r') as f: + assert 'latitude' in f.keys() def test_qdm_transform(dist_params): @@ -190,7 +197,7 @@ def test_qdm_transform(dist_params): WIP: Confirm it runs, but don't verify anything yet. """ data = np.ones((*FP_CC_LAT_LON.shape[:-1], 2)) - corrected = local_qdm_bc(data, FP_CC_LAT_LON, "ghi", "rsds", dist_params) + corrected = local_qdm_bc(data, FP_CC_LAT_LON, 'ghi', 'rsds', dist_params) assert not np.isnan(corrected).all(), "Can't compare if only NaN" assert not np.allclose(data, corrected, equal_nan=False) @@ -213,7 +220,7 @@ def test_qdm_transform_notrend(tmp_path, dist_params): no_trend=True) # Creates a new distribution with mo == mf - notrend_params = os.path.join(tmp_path, "notrend.hdf") + notrend_params = os.path.join(tmp_path, 'notrend.hdf') shutil.copyfile(dist_params, notrend_params) with h5py.File(notrend_params, 'r+') as f: f['bias_fut_rsds_params'][:] = f['bias_rsds_params'][:] @@ -250,7 +257,7 @@ def test_bc_identity(tmp_path, fp_fut_cc, dist_params): anything. Note that NaNs in any component, i.e. any dataset, would propagate into a NaN transformation. """ - ident_params = os.path.join(tmp_path, "identity.hdf") + ident_params = os.path.join(tmp_path, 'identity.hdf') shutil.copyfile(dist_params, ident_params) with h5py.File(ident_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] @@ -274,7 +281,7 @@ def test_bc_identity_absolute(tmp_path, fp_fut_cc, dist_params): anything. Note that NaNs in any component, i.e. any dataset, would propagate into a NaN transformation. """ - ident_params = os.path.join(tmp_path, "identity.hdf") + ident_params = os.path.join(tmp_path, 'identity.hdf') shutil.copyfile(dist_params, ident_params) with h5py.File(ident_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] @@ -298,7 +305,7 @@ def test_bc_model_constant(tmp_path, fp_fut_cc, dist_params): has an offset with historical observed, that same offset should be corrected in the target (future modeled). """ - offset_params = os.path.join(tmp_path, "offset.hdf") + offset_params = os.path.join(tmp_path, 'offset.hdf') shutil.copyfile(dist_params, offset_params) with h5py.File(offset_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] - 10 @@ -322,7 +329,7 @@ def test_bc_trend(tmp_path, fp_fut_cc, dist_params): is a trend between modeled historical vs future, that same trend should be applied to correct """ - offset_params = os.path.join(tmp_path, "offset.hdf") + offset_params = os.path.join(tmp_path, 'offset.hdf') shutil.copyfile(dist_params, offset_params) with h5py.File(offset_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] @@ -345,7 +352,7 @@ def test_bc_trend_same_hist(tmp_path, fp_fut_cc, dist_params): If there was no bias in historical (obs vs mod), there is nothing to correct, but trust the forecast. """ - offset_params = os.path.join(tmp_path, "offset.hdf") + offset_params = os.path.join(tmp_path, 'offset.hdf') shutil.copyfile(dist_params, offset_params) with h5py.File(offset_params, 'r+') as f: f['base_ghi_params'][:] = f['bias_fut_rsds_params'][:] - 10 @@ -379,7 +386,8 @@ def test_fwp_integration(tmp_path): input_files = [os.path.join(TEST_DATA_DIR, 'ua_test.nc'), os.path.join(TEST_DATA_DIR, 'va_test.nc'), os.path.join(TEST_DATA_DIR, 'orog_test.nc'), - os.path.join(TEST_DATA_DIR, 'zg_test.nc')] + os.path.join(TEST_DATA_DIR, 'zg_test.nc'), + ] n_samples = 101 quantiles = np.linspace(0, 1, n_samples) @@ -409,9 +417,9 @@ def test_fwp_integration(tmp_path): out_dir = os.path.join(tmp_path, 'st_gan') model.save(out_dir) - with h5py.File(bias_fp, "w") as f: - f.create_dataset("latitude", data=lat_lon[..., 0]) - f.create_dataset("longitude", data=lat_lon[..., 1]) + with h5py.File(bias_fp, 'w') as f: + f.create_dataset('latitude', data=lat_lon[..., 0]) + f.create_dataset('longitude', data=lat_lon[..., 1]) s = lat_lon.shape[:2] for k, v in params.items(): @@ -437,7 +445,8 @@ def test_fwp_integration(tmp_path): worker_kwargs=dict(max_workers=1)), out_pattern=os.path.join(tmp_path, 'out_{file_id}.nc'), worker_kwargs=dict(max_workers=1), - input_handler='DataHandlerNCforCC') + input_handler='DataHandlerNCforCC', + ) bc_strat = ForwardPassStrategy( input_files, model_kwargs={'model_dir': out_dir}, @@ -450,7 +459,8 @@ def test_fwp_integration(tmp_path): worker_kwargs=dict(max_workers=1), input_handler='DataHandlerNCforCC', bias_correct_method='local_qdm_bc', - bias_correct_kwargs=bias_correct_kwargs) + bias_correct_kwargs=bias_correct_kwargs, + ) for ichunk in range(strat.chunks): fwp = ForwardPass(strat, chunk_index=ichunk) @@ -465,5 +475,5 @@ def test_fwp_integration(tmp_path): ), "V reference offset is 1" delta = bc_fwp.run_chunk() - fwp.run_chunk() - assert delta[..., 0].mean() < 0, "Predicted U should trend <0" - assert delta[..., 1].mean() > 0, "Predicted V should trend >0" + assert delta[..., 0].mean() < 0, 'Predicted U should trend <0' + assert delta[..., 1].mean() > 0, 'Predicted V should trend >0'