diff --git a/pcmdi_metrics/io/__init__.py b/pcmdi_metrics/io/__init__.py index 9822a2637..a81b7b8b6 100644 --- a/pcmdi_metrics/io/__init__.py +++ b/pcmdi_metrics/io/__init__.py @@ -4,3 +4,20 @@ from .base import MV2Json # noqa from .default_regions_define import load_regions_specs # noqa from .default_regions_define import region_subset # noqa +from .xcdat_dataset_io import ( # noqa + get_axis_list, + get_data_list, + get_latitude_bounds_key, + get_latitude_key, + get_latitude, + get_latitude_bounds, + get_longitude_bounds_key, + get_longitude_key, + get_longitude, + get_longitude_bounds, + get_time, + get_time_bounds, + get_time_bounds_key, + get_time_key, + select_subset, +) diff --git a/pcmdi_metrics/io/xcdat_dataset_io.py b/pcmdi_metrics/io/xcdat_dataset_io.py new file mode 100644 index 000000000..72af41e4e --- /dev/null +++ b/pcmdi_metrics/io/xcdat_dataset_io.py @@ -0,0 +1,169 @@ +from typing import Union + +import xarray as xr +import xcdat as xc + +# Internal function + + +def _find_key( + ds: Union[xr.Dataset, xr.DataArray], axis: str, potential_names: list +) -> str: + try: + key = xc.get_dim_keys(ds, axis) + except Exception as e: + axes = get_axis_list(ds) + key_candidates = [k for k in axes if k.lower() in potential_names] + if len(key_candidates) > 0: + key = key_candidates[0] + else: + data_keys = get_data_list(ds) + key_candidates = [k for k in data_keys if k.lower() in potential_names] + if len(key_candidates) > 0: + key = key_candidates[0] + else: + all_keys = ", ".join(axes + data_keys) + print( + f"Error: Cannot find a proper key name for {axis} from keys:{all_keys} {e}" + ) + return key + + +# Retrieve coordinate key names + + +def get_axis_list(ds: Union[xr.Dataset, xr.DataArray]) -> list[str]: + axes = list(ds.coords.keys()) + return axes + + +def get_data_list(ds: Union[xr.Dataset, xr.DataArray]) -> list[str]: + return list(ds.data_vars.keys()) + + +def get_time_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + axis = "T" + potential_names = ["time", "t"] + return _find_key(ds, axis, potential_names) + + +def get_latitude_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + axis = "Y" + potential_names = ["lat", "latitude"] + return _find_key(ds, axis, potential_names) + + +def get_longitude_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + axis = "X" + potential_names = ["lon", "longitude"] + return _find_key(ds, axis, potential_names) + + +# Retrieve bounds key names + + +def get_time_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lat_key = get_time_key(ds) + return ds[lat_key].attrs["bounds"] + + +def get_latitude_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lat_key = get_latitude_key(ds) + return ds[lat_key].attrs["bounds"] + + +def get_longitude_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lon_key = get_longitude_key(ds) + return ds[lon_key].attrs["bounds"] + + +# Extract coordinate data + + +def get_time(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + time_key = get_time_key(ds) + time = ds[time_key] + return time + + +def get_longitude(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lon_key = get_longitude_key(ds) + lon = ds[lon_key] + return lon + + +def get_latitude(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lat_key = get_latitude_key(ds) + lat = ds[lat_key] + return lat + + +# Extract coordinate bounds data + + +def get_time_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + time_bounds_key = get_time_bounds_key(ds) + time_bounds = ds[time_bounds_key] + return time_bounds + + +def get_longitude_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lon_bounds_key = get_longitude_bounds_key(ds) + lon_bounds = ds[lon_bounds_key] + return lon_bounds + + +def get_latitude_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lat_bounds_key = get_latitude_bounds_key(ds) + lat_bounds = ds[lat_bounds_key] + return lat_bounds + + +# Select subset + + +def select_subset( + ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tuple = None +) -> xr.Dataset: + """ + Selects a subset of the given xarray dataset based on specified latitude, longitude, and time ranges. + + Parameters: + - ds (xr.Dataset): The input xarray dataset. + - lat (tuple, optional): Latitude range in the form of (min, max). + - lon (tuple, optional): Longitude range in the form of (min, max). + - time (tuple, optional): Time range. If time is specified, it should be in the form of (start_time, end_time), + where start_time and end_time can be integers, floats, or cftime.DatetimeProlepticGregorian objects. + + Returns: + - xr.Dataset: Subset of the input dataset based on the specified latitude, longitude, and time ranges. + + Example Usage: + ``` + import cftime + + # Define latitude, longitude, and time ranges + lat_tuple = (30, 50) # Latitude range + lon_tuple = (110, 130) # Longitude range + time_tuple = (cftime.datetime(1850, 1, 1, 0, 0, 0, 0), + cftime.datetime(1851, 12, 31, 23, 59, 59, 0)) # Time range + + # Load your xarray dataset (ds) here + + # Select subset based on specified ranges + ds_subset = select_subset(ds, lat=lat_tuple, lon=lon_tuple, time=time_tuple) + ``` + """ + sel_keys = {} + if lat is not None: + lat_key = get_latitude_key(ds) + sel_keys[lat_key] = slice(*lat) + if lon is not None: + lon_key = get_longitude_key(ds) + sel_keys[lon_key] = slice(*lon) + if time is not None: + time_key = get_time_key(ds) + sel_keys[time_key] = slice(*time) + + ds = ds.sel(**sel_keys) + return ds diff --git a/pcmdi_metrics/mean_climate/lib/compute_metrics.py b/pcmdi_metrics/mean_climate/lib/compute_metrics.py index 33080f379..169db1233 100644 --- a/pcmdi_metrics/mean_climate/lib/compute_metrics.py +++ b/pcmdi_metrics/mean_climate/lib/compute_metrics.py @@ -1,6 +1,6 @@ from collections import OrderedDict -import pcmdi_metrics +from pcmdi_metrics import stats def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): @@ -9,25 +9,19 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): # Did we send data? Or do we just want the info? if dm is None and do is None: metrics_defs = OrderedDict() - metrics_defs["rms_xyt"] = pcmdi_metrics.mean_climate.lib.rms_xyt(None, None) - metrics_defs["rms_xy"] = pcmdi_metrics.mean_climate.lib.rms_xy(None, None) - metrics_defs["rmsc_xy"] = pcmdi_metrics.mean_climate.lib.rmsc_xy(None, None) - metrics_defs["bias_xy"] = pcmdi_metrics.mean_climate.lib.bias_xy(None, None) - metrics_defs["mae_xy"] = pcmdi_metrics.mean_climate.lib.meanabs_xy(None, None) - metrics_defs["cor_xy"] = pcmdi_metrics.mean_climate.lib.cor_xy(None, None) - metrics_defs["mean_xy"] = pcmdi_metrics.mean_climate.lib.mean_xy(None) - metrics_defs["std_xy"] = pcmdi_metrics.mean_climate.lib.std_xy(None) - metrics_defs["std_xyt"] = pcmdi_metrics.mean_climate.lib.std_xyt(None) - - metrics_defs["seasonal_mean"] = pcmdi_metrics.mean_climate.lib.seasonal_mean( - None, None - ) - metrics_defs["annual_mean"] = pcmdi_metrics.mean_climate.lib.annual_mean( - None, None - ) - metrics_defs["zonal_mean"] = pcmdi_metrics.mean_climate.lib.zonal_mean( - None, None - ) + metrics_defs["rms_xyt"] = stats.rms_xyt(None, None) + metrics_defs["rms_xy"] = stats.rms_xy(None, None) + metrics_defs["rmsc_xy"] = stats.rmsc_xy(None, None) + metrics_defs["bias_xy"] = stats.bias_xy(None, None) + metrics_defs["mae_xy"] = stats.meanabs_xy(None, None) + metrics_defs["cor_xy"] = stats.cor_xy(None, None) + metrics_defs["mean_xy"] = stats.mean_xy(None) + metrics_defs["std_xy"] = stats.std_xy(None) + metrics_defs["std_xyt"] = stats.std_xyt(None) + + metrics_defs["seasonal_mean"] = stats.seasonal_mean(None, None) + metrics_defs["annual_mean"] = stats.annual_mean(None, None) + metrics_defs["zonal_mean"] = stats.zonal_mean(None, None) return metrics_defs # cdms.setAutoBounds("on") @@ -59,80 +53,80 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): sig_digits = ".3f" # CALCULATE ANNUAL CYCLE SPACE-TIME RMS, CORRELATIONS and STD - print("compute_metrics-CALCULATE ANNUAL CYCLE SPACE-TIME RMS, CORRELATIONS and STD") + print("metrics-CALCULATE ANNUAL CYCLE SPACE-TIME RMS, CORRELATIONS and STD") - print("compute_metrics, rms_xyt") - rms_xyt = pcmdi_metrics.mean_climate.lib.rms_xyt(dm, do, var) - print("compute_metrics, rms_xyt:", rms_xyt) + print("metrics, rms_xyt") + rms_xyt = stats.rms_xyt(dm, do, var) + print("metrics, rms_xyt:", rms_xyt) - print("compute_metrics, stdObs_xyt") - stdObs_xyt = pcmdi_metrics.mean_climate.lib.std_xyt(do, var) - print("compute_metrics, stdObs_xyt:", stdObs_xyt) + print("metrics, stdObs_xyt") + stdObs_xyt = stats.std_xyt(do, var) + print("metrics, stdObs_xyt:", stdObs_xyt) - print("compute_metrics, std_xyt") - std_xyt = pcmdi_metrics.mean_climate.lib.std_xyt(dm, var) - print("compute_metrics, std_xyt:", std_xyt) + print("metrics, std_xyt") + std_xyt = stats.std_xyt(dm, var) + print("metrics, std_xyt:", std_xyt) # CALCULATE ANNUAL MEANS - print("compute_metrics-CALCULATE ANNUAL MEANS") - dm_am, do_am = pcmdi_metrics.mean_climate.lib.annual_mean(dm, do, var) + print("metrics-CALCULATE ANNUAL MEANS") + dm_am, do_am = stats.annual_mean(dm, do, var) # CALCULATE ANNUAL MEAN BIAS - print("compute_metrics-CALCULATE ANNUAL MEAN BIAS") - bias_xy = pcmdi_metrics.mean_climate.lib.bias_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE ANNUAL MEAN BIAS, bias_xy:", bias_xy) + print("metrics-CALCULATE ANNUAL MEAN BIAS") + bias_xy = stats.bias_xy(dm_am, do_am, var) + print("metrics-CALCULATE ANNUAL MEAN BIAS, bias_xy:", bias_xy) # CALCULATE MEAN ABSOLUTE ERROR - print("compute_metrics-CALCULATE MSE") - mae_xy = pcmdi_metrics.mean_climate.lib.meanabs_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE MSE, mae_xy:", mae_xy) + print("metrics-CALCULATE MSE") + mae_xy = stats.meanabs_xy(dm_am, do_am, var) + print("metrics-CALCULATE MSE, mae_xy:", mae_xy) # CALCULATE ANNUAL MEAN RMS (centered and uncentered) - print("compute_metrics-CALCULATE MEAN RMS") - rms_xy = pcmdi_metrics.mean_climate.lib.rms_xy(dm_am, do_am, var) - rmsc_xy = pcmdi_metrics.mean_climate.lib.rmsc_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE MEAN RMS: rms_xy, rmsc_xy: ", rms_xy, rmsc_xy) + print("metrics-CALCULATE MEAN RMS") + rms_xy = stats.rms_xy(dm_am, do_am, var) + rmsc_xy = stats.rmsc_xy(dm_am, do_am, var) + print("metrics-CALCULATE MEAN RMS: rms_xy, rmsc_xy: ", rms_xy, rmsc_xy) # CALCULATE ANNUAL MEAN CORRELATION - print("compute_metrics-CALCULATE MEAN CORR") - cor_xy = pcmdi_metrics.mean_climate.lib.cor_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE MEAN CORR: cor_xy:", cor_xy) + print("metrics-CALCULATE MEAN CORR") + cor_xy = stats.cor_xy(dm_am, do_am, var) + print("metrics-CALCULATE MEAN CORR: cor_xy:", cor_xy) # CALCULATE ANNUAL OBS and MOD STD - print("compute_metrics-CALCULATE ANNUAL OBS AND MOD STD") - stdObs_xy = pcmdi_metrics.mean_climate.lib.std_xy(do_am, var) - std_xy = pcmdi_metrics.mean_climate.lib.std_xy(dm_am, var) + print("metrics-CALCULATE ANNUAL OBS AND MOD STD") + stdObs_xy = stats.std_xy(do_am, var) + std_xy = stats.std_xy(dm_am, var) # CALCULATE ANNUAL OBS and MOD MEAN - print("compute_metrics-CALCULATE ANNUAL OBS AND MOD MEAN") - meanObs_xy = pcmdi_metrics.mean_climate.lib.mean_xy(do_am, var) - mean_xy = pcmdi_metrics.mean_climate.lib.mean_xy(dm_am, var) + print("metrics-CALCULATE ANNUAL OBS AND MOD MEAN") + meanObs_xy = stats.mean_xy(do_am, var) + mean_xy = stats.mean_xy(dm_am, var) # ZONAL MEANS ###### # CALCULATE ANNUAL MEANS - print("compute_metrics-CALCULATE ANNUAL MEANS") - dm_amzm, do_amzm = pcmdi_metrics.mean_climate.lib.zonal_mean(dm_am, do_am, var) + print("metrics-CALCULATE ANNUAL MEANS") + dm_amzm, do_amzm = stats.zonal_mean(dm_am, do_am, var) # CALCULATE ANNUAL AND ZONAL MEAN RMS - print("compute_metrics-CALCULATE ANNUAL AND ZONAL MEAN RMS") - rms_y = pcmdi_metrics.mean_climate.lib.rms_0(dm_amzm, do_amzm, var) + print("metrics-CALCULATE ANNUAL AND ZONAL MEAN RMS") + rms_y = stats.rms_0(dm_amzm, do_amzm, var) # CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN RMS - print("compute_metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN RMS") + print("metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN RMS") dm_am_devzm = dm_am - dm_amzm do_am_devzm = do_am - do_amzm - rms_xy_devzm = pcmdi_metrics.mean_climate.lib.rms_xy( + rms_xy_devzm = stats.rms_xy( dm_am_devzm, do_am_devzm, var, weights=dm.spatial.get_weights(axis=["X", "Y"]) ) # CALCULATE ANNUAL AND ZONAL MEAN STD # CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN STD - print("compute_metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN STD") - stdObs_xy_devzm = pcmdi_metrics.mean_climate.lib.std_xy( + print("metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN STD") + stdObs_xy_devzm = stats.std_xy( do_am_devzm, var, weights=do.spatial.get_weights(axis=["X", "Y"]) ) - std_xy_devzm = pcmdi_metrics.mean_climate.lib.std_xy( + std_xy_devzm = stats.std_xy( dm_am_devzm, var, weights=dm.spatial.get_weights(axis=["X", "Y"]) ) @@ -178,23 +172,23 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): # CALCULATE SEASONAL MEANS for sea in ["djf", "mam", "jja", "son"]: - dm_sea = pcmdi_metrics.mean_climate.lib.seasonal_mean(dm, sea, var) - do_sea = pcmdi_metrics.mean_climate.lib.seasonal_mean(do, sea, var) + dm_sea = stats.seasonal_mean(dm, sea, var) + do_sea = stats.seasonal_mean(do, sea, var) # CALCULATE SEASONAL RMS AND CORRELATION - rms_sea = pcmdi_metrics.mean_climate.lib.rms_xy(dm_sea, do_sea, var) - rmsc_sea = pcmdi_metrics.mean_climate.lib.rmsc_xy(dm_sea, do_sea, var) - cor_sea = pcmdi_metrics.mean_climate.lib.cor_xy(dm_sea, do_sea, var) - mae_sea = pcmdi_metrics.mean_climate.lib.meanabs_xy(dm_sea, do_sea, var) - bias_sea = pcmdi_metrics.mean_climate.lib.bias_xy(dm_sea, do_sea, var) + rms_sea = stats.rms_xy(dm_sea, do_sea, var) + rmsc_sea = stats.rmsc_xy(dm_sea, do_sea, var) + cor_sea = stats.cor_xy(dm_sea, do_sea, var) + mae_sea = stats.meanabs_xy(dm_sea, do_sea, var) + bias_sea = stats.bias_xy(dm_sea, do_sea, var) # CALCULATE SEASONAL OBS and MOD STD - stdObs_xy_sea = pcmdi_metrics.mean_climate.lib.std_xy(do_sea, var) - std_xy_sea = pcmdi_metrics.mean_climate.lib.std_xy(dm_sea, var) + stdObs_xy_sea = stats.std_xy(do_sea, var) + std_xy_sea = stats.std_xy(dm_sea, var) # CALCULATE SEASONAL OBS and MOD MEAN - meanObs_xy_sea = pcmdi_metrics.mean_climate.lib.mean_xy(do_sea, var) - mean_xy_sea = pcmdi_metrics.mean_climate.lib.mean_xy(dm_sea, var) + meanObs_xy_sea = stats.mean_xy(do_sea, var) + mean_xy_sea = stats.mean_xy(dm_sea, var) metrics_dictionary["bias_xy"][sea] = format(bias_sea, sig_digits) metrics_dictionary["rms_xy"][sea] = format(rms_sea, sig_digits) @@ -236,19 +230,19 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): do_mo = do.isel(time=n) # CALCULATE MONTHLY RMS AND CORRELATION - rms_mo = pcmdi_metrics.mean_climate.lib.rms_xy(dm_mo, do_mo, var) - rmsc_mo = pcmdi_metrics.mean_climate.lib.rmsc_xy(dm_mo, do_mo, var) - cor_mo = pcmdi_metrics.mean_climate.lib.cor_xy(dm_mo, do_mo, var) - mae_mo = pcmdi_metrics.mean_climate.lib.meanabs_xy(dm_mo, do_mo, var) - bias_mo = pcmdi_metrics.mean_climate.lib.bias_xy(dm_mo, do_mo, var) + rms_mo = stats.rms_xy(dm_mo, do_mo, var) + rmsc_mo = stats.rmsc_xy(dm_mo, do_mo, var) + cor_mo = stats.cor_xy(dm_mo, do_mo, var) + mae_mo = stats.meanabs_xy(dm_mo, do_mo, var) + bias_mo = stats.bias_xy(dm_mo, do_mo, var) # CALCULATE MONTHLY OBS and MOD STD - stdObs_xy_mo = pcmdi_metrics.mean_climate.lib.std_xy(do_mo, var) - std_xy_mo = pcmdi_metrics.mean_climate.lib.std_xy(dm_mo, var) + stdObs_xy_mo = stats.std_xy(do_mo, var) + std_xy_mo = stats.std_xy(dm_mo, var) # CALCULATE MONTHLY OBS and MOD MEAN - meanObs_xy_mo = pcmdi_metrics.mean_climate.lib.mean_xy(do_mo, var) - mean_xy_mo = pcmdi_metrics.mean_climate.lib.mean_xy(dm_mo, var) + meanObs_xy_mo = stats.mean_xy(do_mo, var) + mean_xy_mo = stats.mean_xy(dm_mo, var) rms_mo_l.append(format(rms_mo, sig_digits)) rmsc_mo_l.append(format(rmsc_mo, sig_digits)) diff --git a/pcmdi_metrics/mean_climate/lib/compute_statistics.py b/pcmdi_metrics/mean_climate/lib/compute_statistics.py index 4ef809b1f..9501e1044 100644 --- a/pcmdi_metrics/mean_climate/lib/compute_statistics.py +++ b/pcmdi_metrics/mean_climate/lib/compute_statistics.py @@ -1,10 +1,14 @@ import math +import warnings import numpy as np def annual_mean(dm, do, var=None): """Computes ANNUAL MEAN""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.annual_mean is deprecated. Please consider use pcmdi_metrics.stats.annual_mean, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Annual Mean", @@ -19,6 +23,9 @@ def annual_mean(dm, do, var=None): def seasonal_mean(d, season, var=None): """Computes SEASONAL MEAN""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.seasonal_mean is deprecated. Please consider use pcmdi_metrics.stats.seasonal_mean, instead" + ) if d is None and season is None: # just want the doc return { "Name": "Seasonal Mean", @@ -57,6 +64,9 @@ def seasonal_mean(d, season, var=None): def bias_xy(dm, do, var=None, weights=None): """Computes bias""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.bias_xy is deprecated. Please consider use pcmdi_metrics.stats.bias_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Bias", @@ -72,6 +82,9 @@ def bias_xy(dm, do, var=None, weights=None): def bias_xyt(dm, do, var=None): """Computes bias""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.bias_xyt is deprecated. Please consider use pcmdi_metrics.stats.bias_xyt, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Bias", @@ -88,6 +101,9 @@ def bias_xyt(dm, do, var=None): def cor_xy(dm, do, var=None, weights=None): """Computes correlation""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.cor_xy is deprecated. Please consider use pcmdi_metrics.stats.cor_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatial Correlation", @@ -115,6 +131,9 @@ def cor_xy(dm, do, var=None, weights=None): def mean_xy(d, var=None, weights=None): """Computes bias""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.mean_xy is deprecated. Please consider use pcmdi_metrics.stats.mean_xy, instead" + ) if d is None: # just want the doc return { "Name": "Mean", @@ -130,6 +149,9 @@ def mean_xy(d, var=None, weights=None): def meanabs_xy(dm, do, var=None, weights=None): """Computes Mean Absolute Error""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.meanabs_xy is deprecated. Please consider use pcmdi_metrics.stats.meanabs_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Mean Absolute Error", @@ -146,6 +168,9 @@ def meanabs_xy(dm, do, var=None, weights=None): def meanabs_xyt(dm, do, var=None): """Computes Mean Absolute Error""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.meanabs_xyt is deprecated. Please consider use pcmdi_metrics.stats.meanabs_xyt, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Mean Absolute Error", @@ -165,6 +190,9 @@ def meanabs_xyt(dm, do, var=None): def rms_0(dm, do, var=None, weighted=True): """Computes rms over first axis -- compare two zonal mean fields""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rms_0 is deprecated. Please consider use pcmdi_metrics.stats.rms_0, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Root Mean Square over First Axis", @@ -182,6 +210,9 @@ def rms_0(dm, do, var=None, weighted=True): def rms_xy(dm, do, var=None, weights=None): """Computes rms""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rms_xy is deprecated. Please consider use pcmdi_metrics.stats.rms_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatial Root Mean Square", @@ -197,6 +228,9 @@ def rms_xy(dm, do, var=None, weights=None): def rms_xyt(dm, do, var=None): """Computes rms""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rms_xyt is deprecated. Please consider use pcmdi_metrics.stats.rms_xyt, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatio-Temporal Root Mean Square", @@ -214,6 +248,9 @@ def rms_xyt(dm, do, var=None): def rmsc_xy(dm, do, var=None, weights=None): """Computes centered rms""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rmsc_xy is deprecated. Please consider use pcmdi_metrics.stats.rmsc_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatial Root Mean Square", @@ -233,6 +270,9 @@ def rmsc_xy(dm, do, var=None, weights=None): def std_xy(d, var=None, weights=None): """Computes std""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.std_xy is deprecated. Please consider use pcmdi_metrics.stats.std_xy, instead" + ) if d is None: # just want the doc return { "Name": "Spatial Standard Deviation", @@ -250,6 +290,9 @@ def std_xy(d, var=None, weights=None): def std_xyt(d, var=None): """Computes std""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.std_xyt is deprecated. Please consider use pcmdi_metrics.stats.std_xyt, instead" + ) if d is None: # just want the doc return { "Name": "Spatial-temporal Standard Deviation", @@ -268,6 +311,9 @@ def std_xyt(d, var=None): def zonal_mean(dm, do, var=None): """Computes ZONAL MEAN assumes rectilinear/regular grid""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.zonal_mean is deprecated. Please consider use pcmdi_metrics.stats.zonal_mean, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Zonal Mean", diff --git a/pcmdi_metrics/mean_climate/mean_climate_driver.py b/pcmdi_metrics/mean_climate/mean_climate_driver.py index 382b574e3..68340e429 100755 --- a/pcmdi_metrics/mean_climate/mean_climate_driver.py +++ b/pcmdi_metrics/mean_climate/mean_climate_driver.py @@ -14,8 +14,13 @@ load_and_regrid, mean_climate_metrics_to_json, ) -from pcmdi_metrics.utils import apply_landmask, create_land_sea_mask, create_target_grid -from pcmdi_metrics.variability_mode.lib import sort_human, tree +from pcmdi_metrics.utils import ( + apply_landmask, + create_land_sea_mask, + create_target_grid, + sort_human, + tree, +) print("--- prepare mean climate metrics calculation ---") diff --git a/pcmdi_metrics/stats/__init__.py b/pcmdi_metrics/stats/__init__.py new file mode 100644 index 000000000..a9244dfdd --- /dev/null +++ b/pcmdi_metrics/stats/__init__.py @@ -0,0 +1,17 @@ +from .compute_statistics import ( + annual_mean, + bias_xy, + bias_xyt, + cor_xy, + mean_xy, + meanabs_xy, + meanabs_xyt, + rms_0, + rms_xy, + rms_xyt, + rmsc_xy, + seasonal_mean, + std_xy, + std_xyt, + zonal_mean, +) diff --git a/pcmdi_metrics/stats/compute_statistics.py b/pcmdi_metrics/stats/compute_statistics.py new file mode 100644 index 000000000..c356a0ac5 --- /dev/null +++ b/pcmdi_metrics/stats/compute_statistics.py @@ -0,0 +1,301 @@ +import math + +import numpy as np +import xcdat as xc + + +def annual_mean(dm, do, var=None): + """Computes ANNUAL MEAN""" + if dm is None and do is None: # just want the doc + return { + "Name": "Annual Mean", + "Abstract": "Compute Annual Mean", + "Contact": "pcmdi-metrics@llnl.gov", + "Comments": "Assumes input are 12 months climatology", + } + dm_am = dm.temporal.average(var) + do_am = do.temporal.average(var) + return dm_am, do_am # DataSets + + +def seasonal_mean(d, season, var=None): + """Computes SEASONAL MEAN""" + if d is None and season is None: # just want the doc + return { + "Name": "Seasonal Mean", + "Abstract": "Compute Seasonal Mean", + "Contact": "pcmdi-metrics@llnl.gov", + "Comments": "Assumes input are 12 months climatology", + } + + mo_wts = [31, 31, 28.25, 31, 30, 31, 30, 31, 31, 30, 31, 30] + + if season == "djf": + indx = [11, 0, 1] + if season == "mam": + indx = [2, 3, 4] + if season == "jja": + indx = [5, 6, 7] + if season == "son": + indx = [8, 9, 10] + + season_num_days = mo_wts[indx[0]] + mo_wts[indx[1]] + mo_wts[indx[2]] + + d_season = ( + d.isel(time=indx[0])[var] * mo_wts[indx[0]] + + d.isel(time=indx[1])[var] * mo_wts[indx[1]] + + d.isel(time=indx[2])[var] * mo_wts[indx[2]] + ) / season_num_days + + ds_new = d.isel(time=0).copy(deep=True) + ds_new[var] = d_season + + return ds_new + + +# Metrics calculations + + +def bias_xy(dm, do, var=None, weights=None): + """Computes bias""" + if dm is None and do is None: # just want the doc + return { + "Name": "Bias", + "Abstract": "Compute Full Average of Model - Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + dif = dm[var] - do[var] + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + stat = float(dif.weighted(weights).mean(("lon", "lat"))) + return float(stat) + + +def bias_xyt(dm, do, var=None): + """Computes bias""" + if dm is None and do is None: # just want the doc + return { + "Name": "Bias", + "Abstract": "Compute Full Average of Model - Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = dm.copy(deep=True) + ds["dif"] = dm[var] - do[var] + stat = ( + ds.spatial.average("dif", axis=["X", "Y"]).temporal.average("dif")["dif"].values + ) + return float(stat) + + +def cor_xy(dm, do, var=None, weights=None): + """Computes correlation""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatial Correlation", + "Abstract": "Compute Spatial Correlation", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + + dm_avg = dm.spatial.average(var, axis=["X", "Y"], weights=weights)[var].values + do_avg = do.spatial.average(var, axis=["X", "Y"], weights=weights)[var].values + + covariance = ( + ((dm[var] - dm_avg) * (do[var] - do_avg)) + .weighted(weights) + .mean(dim=["lon", "lat"]) + .values + ) + std_dm = std_xy(dm, var) + std_do = std_xy(do, var) + stat = covariance / (std_dm * std_do) + + return float(stat) + + +def mean_xy(d, var=None, weights=None): + """Computes bias""" + if d is None: # just want the doc + return { + "Name": "Mean", + "Abstract": "Area Mean (area weighted)", + "Contact": "pcmdi-metrics@llnl.gov", + } + + if weights is None: + weights = d.spatial.get_weights(axis=["X", "Y"]) + stat = float(d[var].weighted(weights).mean(("lon", "lat"))) + return float(stat) + + +def meanabs_xy(dm, do, var=None, weights=None): + """Computes Mean Absolute Error""" + if dm is None and do is None: # just want the doc + return { + "Name": "Mean Absolute Error", + "Abstract": "Compute Full Average of " + + "Absolute Difference Between Model And Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + dif = abs(dm[var] - do[var]) + stat = dif.weighted(weights).mean(("lon", "lat")) + return float(stat) + + +def meanabs_xyt(dm, do, var=None): + """Computes Mean Absolute Error""" + if dm is None and do is None: # just want the doc + return { + "Name": "Mean Absolute Error", + "Abstract": "Compute Full Average of " + + "Absolute Difference Between Model And Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = dm.copy(deep=True) + ds["absdif"] = abs(dm[var] - do[var]) + stat = ( + ds.spatial.average("absdif", axis=["X", "Y"]) + .temporal.average("absdif")["absdif"] + .values + ) + return float(stat) + + +def rms_0(dm, do, var=None, weighted=True): + """Computes rms over first axis -- compare two zonal mean fields""" + if dm is None and do is None: # just want the doc + return { + "Name": "Root Mean Square over First Axis", + "Abstract": "Compute Root Mean Square over the first axis", + "Contact": "pcmdi-metrics@llnl.gov", + } + dif_square = (dm[var] - do[var]) ** 2 + if weighted: + weights = dm.spatial.get_weights(axis=["Y"]) + stat = math.sqrt(dif_square.weighted(weights).mean(("lat"))) + else: + stat = math.sqrt(dif_square.mean(("lat"))) + return float(stat) + + +def rms_xy(dm, do, var=None, weights=None): + """Computes rms""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatial Root Mean Square", + "Abstract": "Compute Spatial Root Mean Square", + "Contact": "pcmdi-metrics@llnl.gov", + } + dif_square = (dm[var] - do[var]) ** 2 + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + stat = math.sqrt(dif_square.weighted(weights).mean(("lon", "lat"))) + return float(stat) + + +def rms_xyt(dm, do, var=None): + """Computes rms""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatio-Temporal Root Mean Square", + "Abstract": "Compute Spatial and Temporal Root Mean Square", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = dm.copy(deep=True) + ds["diff_square"] = (dm[var] - do[var]) ** 2 + ds["diff_square_sqrt"] = np.sqrt( + ds.spatial.average("diff_square", axis=["X", "Y"])["diff_square"] + ) + stat = ds.temporal.average("diff_square_sqrt")["diff_square_sqrt"].values + return float(stat) + + +def rmsc_xy(dm, do, var=None, weights=None): + """Computes centered rms""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatial Root Mean Square", + "Abstract": "Compute Centered Spatial Root Mean Square", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + + lat_key_dm = xc.axis.get_dim_keys(dm, axis="Y") + lon_key_dm = xc.axis.get_dim_keys(dm, axis="X") + + lat_key_do = xc.axis.get_dim_keys(do, axis="Y") + lon_key_do = xc.axis.get_dim_keys(do, axis="X") + + if lat_key_dm == lat_key_do: + lat_key = lat_key_dm + else: + print("Different key names: lat_key_dm, lat_key_do: ", lat_key_dm, lat_key_do) + + if lon_key_dm == lon_key_do: + lon_key = lon_key_dm + else: + print("Different key names: lon_key_dm, lon_key_do: ", lon_key_dm, lon_key_do) + + dm_anomaly = dm[var] - dm[var].weighted(weights).mean((lat_key_dm, lon_key_dm)) + do_anomaly = do[var] - do[var].weighted(weights).mean((lat_key_do, lon_key_do)) + diff_square = (dm_anomaly - do_anomaly) ** 2 + + stat = math.sqrt(diff_square.weighted(weights).mean((lat_key, lon_key))) + return float(stat) + + +def std_xy(ds, var=None, weights=None): + """Computes std""" + if ds is None: # just want the doc + return { + "Name": "Spatial Standard Deviation", + "Abstract": "Compute Spatial Standard Deviation", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = ds.spatial.get_weights(axis=["X", "Y"]) + + lat_key = xc.axis.get_dim_keys(ds, axis="Y") + lon_key = xc.axis.get_dim_keys(ds, axis="X") + + average = float(ds[var].weighted(weights).mean((lat_key, lon_key))) + anomaly = (ds[var] - average) ** 2 + variance = float(anomaly.weighted(weights).mean((lat_key, lon_key))) + std = math.sqrt(variance) + return float(std) + + +def std_xyt(d, var=None): + """Computes std""" + if d is None: # just want the doc + return { + "Name": "Spatial-temporal Standard Deviation", + "Abstract": "Compute Space-Time Standard Deviation", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = d.copy(deep=True) + average = d.spatial.average(var, axis=["X", "Y"]).temporal.average(var)[var] + ds["anomaly"] = (d[var] - average) ** 2 + variance = ( + ds.spatial.average("anomaly").temporal.average("anomaly")["anomaly"].values + ) + std = math.sqrt(variance) + return std + + +def zonal_mean(dm, do, var=None): + """Computes ZONAL MEAN assumes rectilinear/regular grid""" + if dm is None and do is None: # just want the doc + return { + "Name": "Zonal Mean", + "Abstract": "Compute Zonal Mean", + "Contact": "pcmdi-metrics@llnl.gov", + "Comments": "", + } + dm_zm = dm.spatial.average(var, axis=["X"]) + do_zm = do.spatial.average(var, axis=["X"]) + return dm_zm, do_zm # DataSets