From 909505a34f286cb8a9edf561bd62dd60e330924e Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Sat, 23 Dec 2023 12:06:08 -0800 Subject: [PATCH 01/16] initial commit --- pcmdi_metrics/stats/__init__.py | 17 ++ pcmdi_metrics/stats/compute_statistics.py | 301 ++++++++++++++++++++++ 2 files changed, 318 insertions(+) create mode 100644 pcmdi_metrics/stats/__init__.py create mode 100644 pcmdi_metrics/stats/compute_statistics.py diff --git a/pcmdi_metrics/stats/__init__.py b/pcmdi_metrics/stats/__init__.py new file mode 100644 index 000000000..a9244dfdd --- /dev/null +++ b/pcmdi_metrics/stats/__init__.py @@ -0,0 +1,17 @@ +from .compute_statistics import ( + annual_mean, + bias_xy, + bias_xyt, + cor_xy, + mean_xy, + meanabs_xy, + meanabs_xyt, + rms_0, + rms_xy, + rms_xyt, + rmsc_xy, + seasonal_mean, + std_xy, + std_xyt, + zonal_mean, +) diff --git a/pcmdi_metrics/stats/compute_statistics.py b/pcmdi_metrics/stats/compute_statistics.py new file mode 100644 index 000000000..c356a0ac5 --- /dev/null +++ b/pcmdi_metrics/stats/compute_statistics.py @@ -0,0 +1,301 @@ +import math + +import numpy as np +import xcdat as xc + + +def annual_mean(dm, do, var=None): + """Computes ANNUAL MEAN""" + if dm is None and do is None: # just want the doc + return { + "Name": "Annual Mean", + "Abstract": "Compute Annual Mean", + "Contact": "pcmdi-metrics@llnl.gov", + "Comments": "Assumes input are 12 months climatology", + } + dm_am = dm.temporal.average(var) + do_am = do.temporal.average(var) + return dm_am, do_am # DataSets + + +def seasonal_mean(d, season, var=None): + """Computes SEASONAL MEAN""" + if d is None and season is None: # just want the doc + return { + "Name": "Seasonal Mean", + "Abstract": "Compute Seasonal Mean", + "Contact": "pcmdi-metrics@llnl.gov", + "Comments": "Assumes input are 12 months climatology", + } + + mo_wts = [31, 31, 28.25, 31, 30, 31, 30, 31, 31, 30, 31, 30] + + if season == "djf": + indx = [11, 0, 1] + if season == "mam": + indx = [2, 3, 4] + if season == "jja": + indx = [5, 6, 7] + if season == "son": + indx = [8, 9, 10] + + season_num_days = mo_wts[indx[0]] + mo_wts[indx[1]] + mo_wts[indx[2]] + + d_season = ( + d.isel(time=indx[0])[var] * mo_wts[indx[0]] + + d.isel(time=indx[1])[var] * mo_wts[indx[1]] + + d.isel(time=indx[2])[var] * mo_wts[indx[2]] + ) / season_num_days + + ds_new = d.isel(time=0).copy(deep=True) + ds_new[var] = d_season + + return ds_new + + +# Metrics calculations + + +def bias_xy(dm, do, var=None, weights=None): + """Computes bias""" + if dm is None and do is None: # just want the doc + return { + "Name": "Bias", + "Abstract": "Compute Full Average of Model - Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + dif = dm[var] - do[var] + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + stat = float(dif.weighted(weights).mean(("lon", "lat"))) + return float(stat) + + +def bias_xyt(dm, do, var=None): + """Computes bias""" + if dm is None and do is None: # just want the doc + return { + "Name": "Bias", + "Abstract": "Compute Full Average of Model - Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = dm.copy(deep=True) + ds["dif"] = dm[var] - do[var] + stat = ( + ds.spatial.average("dif", axis=["X", "Y"]).temporal.average("dif")["dif"].values + ) + return float(stat) + + +def cor_xy(dm, do, var=None, weights=None): + """Computes correlation""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatial Correlation", + "Abstract": "Compute Spatial Correlation", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + + dm_avg = dm.spatial.average(var, axis=["X", "Y"], weights=weights)[var].values + do_avg = do.spatial.average(var, axis=["X", "Y"], weights=weights)[var].values + + covariance = ( + ((dm[var] - dm_avg) * (do[var] - do_avg)) + .weighted(weights) + .mean(dim=["lon", "lat"]) + .values + ) + std_dm = std_xy(dm, var) + std_do = std_xy(do, var) + stat = covariance / (std_dm * std_do) + + return float(stat) + + +def mean_xy(d, var=None, weights=None): + """Computes bias""" + if d is None: # just want the doc + return { + "Name": "Mean", + "Abstract": "Area Mean (area weighted)", + "Contact": "pcmdi-metrics@llnl.gov", + } + + if weights is None: + weights = d.spatial.get_weights(axis=["X", "Y"]) + stat = float(d[var].weighted(weights).mean(("lon", "lat"))) + return float(stat) + + +def meanabs_xy(dm, do, var=None, weights=None): + """Computes Mean Absolute Error""" + if dm is None and do is None: # just want the doc + return { + "Name": "Mean Absolute Error", + "Abstract": "Compute Full Average of " + + "Absolute Difference Between Model And Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + dif = abs(dm[var] - do[var]) + stat = dif.weighted(weights).mean(("lon", "lat")) + return float(stat) + + +def meanabs_xyt(dm, do, var=None): + """Computes Mean Absolute Error""" + if dm is None and do is None: # just want the doc + return { + "Name": "Mean Absolute Error", + "Abstract": "Compute Full Average of " + + "Absolute Difference Between Model And Observation", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = dm.copy(deep=True) + ds["absdif"] = abs(dm[var] - do[var]) + stat = ( + ds.spatial.average("absdif", axis=["X", "Y"]) + .temporal.average("absdif")["absdif"] + .values + ) + return float(stat) + + +def rms_0(dm, do, var=None, weighted=True): + """Computes rms over first axis -- compare two zonal mean fields""" + if dm is None and do is None: # just want the doc + return { + "Name": "Root Mean Square over First Axis", + "Abstract": "Compute Root Mean Square over the first axis", + "Contact": "pcmdi-metrics@llnl.gov", + } + dif_square = (dm[var] - do[var]) ** 2 + if weighted: + weights = dm.spatial.get_weights(axis=["Y"]) + stat = math.sqrt(dif_square.weighted(weights).mean(("lat"))) + else: + stat = math.sqrt(dif_square.mean(("lat"))) + return float(stat) + + +def rms_xy(dm, do, var=None, weights=None): + """Computes rms""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatial Root Mean Square", + "Abstract": "Compute Spatial Root Mean Square", + "Contact": "pcmdi-metrics@llnl.gov", + } + dif_square = (dm[var] - do[var]) ** 2 + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + stat = math.sqrt(dif_square.weighted(weights).mean(("lon", "lat"))) + return float(stat) + + +def rms_xyt(dm, do, var=None): + """Computes rms""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatio-Temporal Root Mean Square", + "Abstract": "Compute Spatial and Temporal Root Mean Square", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = dm.copy(deep=True) + ds["diff_square"] = (dm[var] - do[var]) ** 2 + ds["diff_square_sqrt"] = np.sqrt( + ds.spatial.average("diff_square", axis=["X", "Y"])["diff_square"] + ) + stat = ds.temporal.average("diff_square_sqrt")["diff_square_sqrt"].values + return float(stat) + + +def rmsc_xy(dm, do, var=None, weights=None): + """Computes centered rms""" + if dm is None and do is None: # just want the doc + return { + "Name": "Spatial Root Mean Square", + "Abstract": "Compute Centered Spatial Root Mean Square", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = dm.spatial.get_weights(axis=["X", "Y"]) + + lat_key_dm = xc.axis.get_dim_keys(dm, axis="Y") + lon_key_dm = xc.axis.get_dim_keys(dm, axis="X") + + lat_key_do = xc.axis.get_dim_keys(do, axis="Y") + lon_key_do = xc.axis.get_dim_keys(do, axis="X") + + if lat_key_dm == lat_key_do: + lat_key = lat_key_dm + else: + print("Different key names: lat_key_dm, lat_key_do: ", lat_key_dm, lat_key_do) + + if lon_key_dm == lon_key_do: + lon_key = lon_key_dm + else: + print("Different key names: lon_key_dm, lon_key_do: ", lon_key_dm, lon_key_do) + + dm_anomaly = dm[var] - dm[var].weighted(weights).mean((lat_key_dm, lon_key_dm)) + do_anomaly = do[var] - do[var].weighted(weights).mean((lat_key_do, lon_key_do)) + diff_square = (dm_anomaly - do_anomaly) ** 2 + + stat = math.sqrt(diff_square.weighted(weights).mean((lat_key, lon_key))) + return float(stat) + + +def std_xy(ds, var=None, weights=None): + """Computes std""" + if ds is None: # just want the doc + return { + "Name": "Spatial Standard Deviation", + "Abstract": "Compute Spatial Standard Deviation", + "Contact": "pcmdi-metrics@llnl.gov", + } + if weights is None: + weights = ds.spatial.get_weights(axis=["X", "Y"]) + + lat_key = xc.axis.get_dim_keys(ds, axis="Y") + lon_key = xc.axis.get_dim_keys(ds, axis="X") + + average = float(ds[var].weighted(weights).mean((lat_key, lon_key))) + anomaly = (ds[var] - average) ** 2 + variance = float(anomaly.weighted(weights).mean((lat_key, lon_key))) + std = math.sqrt(variance) + return float(std) + + +def std_xyt(d, var=None): + """Computes std""" + if d is None: # just want the doc + return { + "Name": "Spatial-temporal Standard Deviation", + "Abstract": "Compute Space-Time Standard Deviation", + "Contact": "pcmdi-metrics@llnl.gov", + } + ds = d.copy(deep=True) + average = d.spatial.average(var, axis=["X", "Y"]).temporal.average(var)[var] + ds["anomaly"] = (d[var] - average) ** 2 + variance = ( + ds.spatial.average("anomaly").temporal.average("anomaly")["anomaly"].values + ) + std = math.sqrt(variance) + return std + + +def zonal_mean(dm, do, var=None): + """Computes ZONAL MEAN assumes rectilinear/regular grid""" + if dm is None and do is None: # just want the doc + return { + "Name": "Zonal Mean", + "Abstract": "Compute Zonal Mean", + "Contact": "pcmdi-metrics@llnl.gov", + "Comments": "", + } + dm_zm = dm.spatial.average(var, axis=["X"]) + do_zm = do.spatial.average(var, axis=["X"]) + return dm_zm, do_zm # DataSets From c63cd9e72537b56918d94b0b12d9605360b8f2d9 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Sat, 23 Dec 2023 12:23:35 -0800 Subject: [PATCH 02/16] clean up --- .../mean_climate/lib/compute_metrics.py | 156 +++++++++--------- pcmdi_metrics/stats/__init__.py | 2 +- 2 files changed, 76 insertions(+), 82 deletions(-) diff --git a/pcmdi_metrics/mean_climate/lib/compute_metrics.py b/pcmdi_metrics/mean_climate/lib/compute_metrics.py index 33080f379..757d0e76b 100644 --- a/pcmdi_metrics/mean_climate/lib/compute_metrics.py +++ b/pcmdi_metrics/mean_climate/lib/compute_metrics.py @@ -1,33 +1,27 @@ from collections import OrderedDict -import pcmdi_metrics +from pcmdi_metrics import stats -def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): +def metrics(Var, dm, do, debug=False, time_dim_sync=False): # Var is sometimes sent with level associated var = Var.split("_")[0] # Did we send data? Or do we just want the info? if dm is None and do is None: metrics_defs = OrderedDict() - metrics_defs["rms_xyt"] = pcmdi_metrics.mean_climate.lib.rms_xyt(None, None) - metrics_defs["rms_xy"] = pcmdi_metrics.mean_climate.lib.rms_xy(None, None) - metrics_defs["rmsc_xy"] = pcmdi_metrics.mean_climate.lib.rmsc_xy(None, None) - metrics_defs["bias_xy"] = pcmdi_metrics.mean_climate.lib.bias_xy(None, None) - metrics_defs["mae_xy"] = pcmdi_metrics.mean_climate.lib.meanabs_xy(None, None) - metrics_defs["cor_xy"] = pcmdi_metrics.mean_climate.lib.cor_xy(None, None) - metrics_defs["mean_xy"] = pcmdi_metrics.mean_climate.lib.mean_xy(None) - metrics_defs["std_xy"] = pcmdi_metrics.mean_climate.lib.std_xy(None) - metrics_defs["std_xyt"] = pcmdi_metrics.mean_climate.lib.std_xyt(None) - - metrics_defs["seasonal_mean"] = pcmdi_metrics.mean_climate.lib.seasonal_mean( - None, None - ) - metrics_defs["annual_mean"] = pcmdi_metrics.mean_climate.lib.annual_mean( - None, None - ) - metrics_defs["zonal_mean"] = pcmdi_metrics.mean_climate.lib.zonal_mean( - None, None - ) + metrics_defs["rms_xyt"] = stats.rms_xyt(None, None) + metrics_defs["rms_xy"] = stats.rms_xy(None, None) + metrics_defs["rmsc_xy"] = stats.rmsc_xy(None, None) + metrics_defs["bias_xy"] = stats.bias_xy(None, None) + metrics_defs["mae_xy"] = stats.meanabs_xy(None, None) + metrics_defs["cor_xy"] = stats.cor_xy(None, None) + metrics_defs["mean_xy"] = stats.mean_xy(None) + metrics_defs["std_xy"] = stats.std_xy(None) + metrics_defs["std_xyt"] = stats.std_xyt(None) + + metrics_defs["seasonal_mean"] = stats.seasonal_mean(None, None) + metrics_defs["annual_mean"] = stats.annual_mean(None, None) + metrics_defs["zonal_mean"] = stats.zonal_mean(None, None) return metrics_defs # cdms.setAutoBounds("on") @@ -59,80 +53,80 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): sig_digits = ".3f" # CALCULATE ANNUAL CYCLE SPACE-TIME RMS, CORRELATIONS and STD - print("compute_metrics-CALCULATE ANNUAL CYCLE SPACE-TIME RMS, CORRELATIONS and STD") + print("metrics-CALCULATE ANNUAL CYCLE SPACE-TIME RMS, CORRELATIONS and STD") - print("compute_metrics, rms_xyt") - rms_xyt = pcmdi_metrics.mean_climate.lib.rms_xyt(dm, do, var) - print("compute_metrics, rms_xyt:", rms_xyt) + print("metrics, rms_xyt") + rms_xyt = stats.rms_xyt(dm, do, var) + print("metrics, rms_xyt:", rms_xyt) - print("compute_metrics, stdObs_xyt") - stdObs_xyt = pcmdi_metrics.mean_climate.lib.std_xyt(do, var) - print("compute_metrics, stdObs_xyt:", stdObs_xyt) + print("metrics, stdObs_xyt") + stdObs_xyt = stats.std_xyt(do, var) + print("metrics, stdObs_xyt:", stdObs_xyt) - print("compute_metrics, std_xyt") - std_xyt = pcmdi_metrics.mean_climate.lib.std_xyt(dm, var) - print("compute_metrics, std_xyt:", std_xyt) + print("metrics, std_xyt") + std_xyt = stats.std_xyt(dm, var) + print("metrics, std_xyt:", std_xyt) # CALCULATE ANNUAL MEANS - print("compute_metrics-CALCULATE ANNUAL MEANS") - dm_am, do_am = pcmdi_metrics.mean_climate.lib.annual_mean(dm, do, var) + print("metrics-CALCULATE ANNUAL MEANS") + dm_am, do_am = stats.annual_mean(dm, do, var) # CALCULATE ANNUAL MEAN BIAS - print("compute_metrics-CALCULATE ANNUAL MEAN BIAS") - bias_xy = pcmdi_metrics.mean_climate.lib.bias_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE ANNUAL MEAN BIAS, bias_xy:", bias_xy) + print("metrics-CALCULATE ANNUAL MEAN BIAS") + bias_xy = stats.bias_xy(dm_am, do_am, var) + print("metrics-CALCULATE ANNUAL MEAN BIAS, bias_xy:", bias_xy) # CALCULATE MEAN ABSOLUTE ERROR - print("compute_metrics-CALCULATE MSE") - mae_xy = pcmdi_metrics.mean_climate.lib.meanabs_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE MSE, mae_xy:", mae_xy) + print("metrics-CALCULATE MSE") + mae_xy = stats.meanabs_xy(dm_am, do_am, var) + print("metrics-CALCULATE MSE, mae_xy:", mae_xy) # CALCULATE ANNUAL MEAN RMS (centered and uncentered) - print("compute_metrics-CALCULATE MEAN RMS") - rms_xy = pcmdi_metrics.mean_climate.lib.rms_xy(dm_am, do_am, var) - rmsc_xy = pcmdi_metrics.mean_climate.lib.rmsc_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE MEAN RMS: rms_xy, rmsc_xy: ", rms_xy, rmsc_xy) + print("metrics-CALCULATE MEAN RMS") + rms_xy = stats.rms_xy(dm_am, do_am, var) + rmsc_xy = stats.rmsc_xy(dm_am, do_am, var) + print("metrics-CALCULATE MEAN RMS: rms_xy, rmsc_xy: ", rms_xy, rmsc_xy) # CALCULATE ANNUAL MEAN CORRELATION - print("compute_metrics-CALCULATE MEAN CORR") - cor_xy = pcmdi_metrics.mean_climate.lib.cor_xy(dm_am, do_am, var) - print("compute_metrics-CALCULATE MEAN CORR: cor_xy:", cor_xy) + print("metrics-CALCULATE MEAN CORR") + cor_xy = stats.cor_xy(dm_am, do_am, var) + print("metrics-CALCULATE MEAN CORR: cor_xy:", cor_xy) # CALCULATE ANNUAL OBS and MOD STD - print("compute_metrics-CALCULATE ANNUAL OBS AND MOD STD") - stdObs_xy = pcmdi_metrics.mean_climate.lib.std_xy(do_am, var) - std_xy = pcmdi_metrics.mean_climate.lib.std_xy(dm_am, var) + print("metrics-CALCULATE ANNUAL OBS AND MOD STD") + stdObs_xy = stats.std_xy(do_am, var) + std_xy = stats.std_xy(dm_am, var) # CALCULATE ANNUAL OBS and MOD MEAN - print("compute_metrics-CALCULATE ANNUAL OBS AND MOD MEAN") - meanObs_xy = pcmdi_metrics.mean_climate.lib.mean_xy(do_am, var) - mean_xy = pcmdi_metrics.mean_climate.lib.mean_xy(dm_am, var) + print("metrics-CALCULATE ANNUAL OBS AND MOD MEAN") + meanObs_xy = stats.mean_xy(do_am, var) + mean_xy = stats.mean_xy(dm_am, var) # ZONAL MEANS ###### # CALCULATE ANNUAL MEANS - print("compute_metrics-CALCULATE ANNUAL MEANS") - dm_amzm, do_amzm = pcmdi_metrics.mean_climate.lib.zonal_mean(dm_am, do_am, var) + print("metrics-CALCULATE ANNUAL MEANS") + dm_amzm, do_amzm = stats.zonal_mean(dm_am, do_am, var) # CALCULATE ANNUAL AND ZONAL MEAN RMS - print("compute_metrics-CALCULATE ANNUAL AND ZONAL MEAN RMS") - rms_y = pcmdi_metrics.mean_climate.lib.rms_0(dm_amzm, do_amzm, var) + print("metrics-CALCULATE ANNUAL AND ZONAL MEAN RMS") + rms_y = stats.rms_0(dm_amzm, do_amzm, var) # CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN RMS - print("compute_metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN RMS") + print("metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN RMS") dm_am_devzm = dm_am - dm_amzm do_am_devzm = do_am - do_amzm - rms_xy_devzm = pcmdi_metrics.mean_climate.lib.rms_xy( + rms_xy_devzm = stats.rms_xy( dm_am_devzm, do_am_devzm, var, weights=dm.spatial.get_weights(axis=["X", "Y"]) ) # CALCULATE ANNUAL AND ZONAL MEAN STD # CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN STD - print("compute_metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN STD") - stdObs_xy_devzm = pcmdi_metrics.mean_climate.lib.std_xy( + print("metrics-CALCULATE ANNUAL MEAN DEVIATION FROM ZONAL MEAN STD") + stdObs_xy_devzm = stats.std_xy( do_am_devzm, var, weights=do.spatial.get_weights(axis=["X", "Y"]) ) - std_xy_devzm = pcmdi_metrics.mean_climate.lib.std_xy( + std_xy_devzm = stats.std_xy( dm_am_devzm, var, weights=dm.spatial.get_weights(axis=["X", "Y"]) ) @@ -178,23 +172,23 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): # CALCULATE SEASONAL MEANS for sea in ["djf", "mam", "jja", "son"]: - dm_sea = pcmdi_metrics.mean_climate.lib.seasonal_mean(dm, sea, var) - do_sea = pcmdi_metrics.mean_climate.lib.seasonal_mean(do, sea, var) + dm_sea = stats.seasonal_mean(dm, sea, var) + do_sea = stats.seasonal_mean(do, sea, var) # CALCULATE SEASONAL RMS AND CORRELATION - rms_sea = pcmdi_metrics.mean_climate.lib.rms_xy(dm_sea, do_sea, var) - rmsc_sea = pcmdi_metrics.mean_climate.lib.rmsc_xy(dm_sea, do_sea, var) - cor_sea = pcmdi_metrics.mean_climate.lib.cor_xy(dm_sea, do_sea, var) - mae_sea = pcmdi_metrics.mean_climate.lib.meanabs_xy(dm_sea, do_sea, var) - bias_sea = pcmdi_metrics.mean_climate.lib.bias_xy(dm_sea, do_sea, var) + rms_sea = stats.rms_xy(dm_sea, do_sea, var) + rmsc_sea = stats.rmsc_xy(dm_sea, do_sea, var) + cor_sea = stats.cor_xy(dm_sea, do_sea, var) + mae_sea = stats.meanabs_xy(dm_sea, do_sea, var) + bias_sea = stats.bias_xy(dm_sea, do_sea, var) # CALCULATE SEASONAL OBS and MOD STD - stdObs_xy_sea = pcmdi_metrics.mean_climate.lib.std_xy(do_sea, var) - std_xy_sea = pcmdi_metrics.mean_climate.lib.std_xy(dm_sea, var) + stdObs_xy_sea = stats.std_xy(do_sea, var) + std_xy_sea = stats.std_xy(dm_sea, var) # CALCULATE SEASONAL OBS and MOD MEAN - meanObs_xy_sea = pcmdi_metrics.mean_climate.lib.mean_xy(do_sea, var) - mean_xy_sea = pcmdi_metrics.mean_climate.lib.mean_xy(dm_sea, var) + meanObs_xy_sea = stats.mean_xy(do_sea, var) + mean_xy_sea = stats.mean_xy(dm_sea, var) metrics_dictionary["bias_xy"][sea] = format(bias_sea, sig_digits) metrics_dictionary["rms_xy"][sea] = format(rms_sea, sig_digits) @@ -236,19 +230,19 @@ def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): do_mo = do.isel(time=n) # CALCULATE MONTHLY RMS AND CORRELATION - rms_mo = pcmdi_metrics.mean_climate.lib.rms_xy(dm_mo, do_mo, var) - rmsc_mo = pcmdi_metrics.mean_climate.lib.rmsc_xy(dm_mo, do_mo, var) - cor_mo = pcmdi_metrics.mean_climate.lib.cor_xy(dm_mo, do_mo, var) - mae_mo = pcmdi_metrics.mean_climate.lib.meanabs_xy(dm_mo, do_mo, var) - bias_mo = pcmdi_metrics.mean_climate.lib.bias_xy(dm_mo, do_mo, var) + rms_mo = stats.rms_xy(dm_mo, do_mo, var) + rmsc_mo = stats.rmsc_xy(dm_mo, do_mo, var) + cor_mo = stats.cor_xy(dm_mo, do_mo, var) + mae_mo = stats.meanabs_xy(dm_mo, do_mo, var) + bias_mo = stats.bias_xy(dm_mo, do_mo, var) # CALCULATE MONTHLY OBS and MOD STD - stdObs_xy_mo = pcmdi_metrics.mean_climate.lib.std_xy(do_mo, var) - std_xy_mo = pcmdi_metrics.mean_climate.lib.std_xy(dm_mo, var) + stdObs_xy_mo = stats.std_xy(do_mo, var) + std_xy_mo = stats.std_xy(dm_mo, var) # CALCULATE MONTHLY OBS and MOD MEAN - meanObs_xy_mo = pcmdi_metrics.mean_climate.lib.mean_xy(do_mo, var) - mean_xy_mo = pcmdi_metrics.mean_climate.lib.mean_xy(dm_mo, var) + meanObs_xy_mo = stats.mean_xy(do_mo, var) + mean_xy_mo = stats.mean_xy(dm_mo, var) rms_mo_l.append(format(rms_mo, sig_digits)) rmsc_mo_l.append(format(rmsc_mo, sig_digits)) diff --git a/pcmdi_metrics/stats/__init__.py b/pcmdi_metrics/stats/__init__.py index a9244dfdd..78dc1dd54 100644 --- a/pcmdi_metrics/stats/__init__.py +++ b/pcmdi_metrics/stats/__init__.py @@ -1,4 +1,4 @@ -from .compute_statistics import ( +from .statistics import ( annual_mean, bias_xy, bias_xyt, From d315690c77e61fa546bcc24b8c0cbe2664b3cce0 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Sat, 23 Dec 2023 12:32:58 -0800 Subject: [PATCH 03/16] add warning message for deprecation and pointer to newer place --- .../mean_climate/lib/compute_statistics.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pcmdi_metrics/mean_climate/lib/compute_statistics.py b/pcmdi_metrics/mean_climate/lib/compute_statistics.py index 4ef809b1f..9501e1044 100644 --- a/pcmdi_metrics/mean_climate/lib/compute_statistics.py +++ b/pcmdi_metrics/mean_climate/lib/compute_statistics.py @@ -1,10 +1,14 @@ import math +import warnings import numpy as np def annual_mean(dm, do, var=None): """Computes ANNUAL MEAN""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.annual_mean is deprecated. Please consider use pcmdi_metrics.stats.annual_mean, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Annual Mean", @@ -19,6 +23,9 @@ def annual_mean(dm, do, var=None): def seasonal_mean(d, season, var=None): """Computes SEASONAL MEAN""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.seasonal_mean is deprecated. Please consider use pcmdi_metrics.stats.seasonal_mean, instead" + ) if d is None and season is None: # just want the doc return { "Name": "Seasonal Mean", @@ -57,6 +64,9 @@ def seasonal_mean(d, season, var=None): def bias_xy(dm, do, var=None, weights=None): """Computes bias""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.bias_xy is deprecated. Please consider use pcmdi_metrics.stats.bias_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Bias", @@ -72,6 +82,9 @@ def bias_xy(dm, do, var=None, weights=None): def bias_xyt(dm, do, var=None): """Computes bias""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.bias_xyt is deprecated. Please consider use pcmdi_metrics.stats.bias_xyt, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Bias", @@ -88,6 +101,9 @@ def bias_xyt(dm, do, var=None): def cor_xy(dm, do, var=None, weights=None): """Computes correlation""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.cor_xy is deprecated. Please consider use pcmdi_metrics.stats.cor_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatial Correlation", @@ -115,6 +131,9 @@ def cor_xy(dm, do, var=None, weights=None): def mean_xy(d, var=None, weights=None): """Computes bias""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.mean_xy is deprecated. Please consider use pcmdi_metrics.stats.mean_xy, instead" + ) if d is None: # just want the doc return { "Name": "Mean", @@ -130,6 +149,9 @@ def mean_xy(d, var=None, weights=None): def meanabs_xy(dm, do, var=None, weights=None): """Computes Mean Absolute Error""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.meanabs_xy is deprecated. Please consider use pcmdi_metrics.stats.meanabs_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Mean Absolute Error", @@ -146,6 +168,9 @@ def meanabs_xy(dm, do, var=None, weights=None): def meanabs_xyt(dm, do, var=None): """Computes Mean Absolute Error""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.meanabs_xyt is deprecated. Please consider use pcmdi_metrics.stats.meanabs_xyt, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Mean Absolute Error", @@ -165,6 +190,9 @@ def meanabs_xyt(dm, do, var=None): def rms_0(dm, do, var=None, weighted=True): """Computes rms over first axis -- compare two zonal mean fields""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rms_0 is deprecated. Please consider use pcmdi_metrics.stats.rms_0, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Root Mean Square over First Axis", @@ -182,6 +210,9 @@ def rms_0(dm, do, var=None, weighted=True): def rms_xy(dm, do, var=None, weights=None): """Computes rms""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rms_xy is deprecated. Please consider use pcmdi_metrics.stats.rms_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatial Root Mean Square", @@ -197,6 +228,9 @@ def rms_xy(dm, do, var=None, weights=None): def rms_xyt(dm, do, var=None): """Computes rms""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rms_xyt is deprecated. Please consider use pcmdi_metrics.stats.rms_xyt, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatio-Temporal Root Mean Square", @@ -214,6 +248,9 @@ def rms_xyt(dm, do, var=None): def rmsc_xy(dm, do, var=None, weights=None): """Computes centered rms""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.rmsc_xy is deprecated. Please consider use pcmdi_metrics.stats.rmsc_xy, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Spatial Root Mean Square", @@ -233,6 +270,9 @@ def rmsc_xy(dm, do, var=None, weights=None): def std_xy(d, var=None, weights=None): """Computes std""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.std_xy is deprecated. Please consider use pcmdi_metrics.stats.std_xy, instead" + ) if d is None: # just want the doc return { "Name": "Spatial Standard Deviation", @@ -250,6 +290,9 @@ def std_xy(d, var=None, weights=None): def std_xyt(d, var=None): """Computes std""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.std_xyt is deprecated. Please consider use pcmdi_metrics.stats.std_xyt, instead" + ) if d is None: # just want the doc return { "Name": "Spatial-temporal Standard Deviation", @@ -268,6 +311,9 @@ def std_xyt(d, var=None): def zonal_mean(dm, do, var=None): """Computes ZONAL MEAN assumes rectilinear/regular grid""" + warnings.warn( + "pcmdi_metrics.mean_climate.lib.zonal_mean is deprecated. Please consider use pcmdi_metrics.stats.zonal_mean, instead" + ) if dm is None and do is None: # just want the doc return { "Name": "Zonal Mean", From 233c41f47e84d74db39a9a2fac6597e8a77c0129 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Sat, 23 Dec 2023 12:36:38 -0800 Subject: [PATCH 04/16] bug fix --- pcmdi_metrics/mean_climate/lib/compute_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcmdi_metrics/mean_climate/lib/compute_metrics.py b/pcmdi_metrics/mean_climate/lib/compute_metrics.py index 757d0e76b..169db1233 100644 --- a/pcmdi_metrics/mean_climate/lib/compute_metrics.py +++ b/pcmdi_metrics/mean_climate/lib/compute_metrics.py @@ -3,7 +3,7 @@ from pcmdi_metrics import stats -def metrics(Var, dm, do, debug=False, time_dim_sync=False): +def compute_metrics(Var, dm, do, debug=False, time_dim_sync=False): # Var is sometimes sent with level associated var = Var.split("_")[0] # Did we send data? Or do we just want the info? From 071279bab9e3539b790327b2b0f5eb296521d21e Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Sat, 23 Dec 2023 16:24:52 -0800 Subject: [PATCH 05/16] clean up --- pcmdi_metrics/mean_climate/mean_climate_driver.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pcmdi_metrics/mean_climate/mean_climate_driver.py b/pcmdi_metrics/mean_climate/mean_climate_driver.py index 382b574e3..68340e429 100755 --- a/pcmdi_metrics/mean_climate/mean_climate_driver.py +++ b/pcmdi_metrics/mean_climate/mean_climate_driver.py @@ -14,8 +14,13 @@ load_and_regrid, mean_climate_metrics_to_json, ) -from pcmdi_metrics.utils import apply_landmask, create_land_sea_mask, create_target_grid -from pcmdi_metrics.variability_mode.lib import sort_human, tree +from pcmdi_metrics.utils import ( + apply_landmask, + create_land_sea_mask, + create_target_grid, + sort_human, + tree, +) print("--- prepare mean climate metrics calculation ---") From ea6d3d75ad962a409901304e6f05285ffd242c7f Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:05:50 -0800 Subject: [PATCH 06/16] select_subset function added --- pcmdi_metrics/io/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pcmdi_metrics/io/__init__.py b/pcmdi_metrics/io/__init__.py index 9822a2637..58f574091 100644 --- a/pcmdi_metrics/io/__init__.py +++ b/pcmdi_metrics/io/__init__.py @@ -4,3 +4,4 @@ from .base import MV2Json # noqa from .default_regions_define import load_regions_specs # noqa from .default_regions_define import region_subset # noqa +from .select_subset import select_subset # noqa From 82ec3f65dd1d1c6f2519aa07457950c17b77c095 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:07:35 -0800 Subject: [PATCH 07/16] add select_subset function --- pcmdi_metrics/io/select_subset.py | 43 +++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 pcmdi_metrics/io/select_subset.py diff --git a/pcmdi_metrics/io/select_subset.py b/pcmdi_metrics/io/select_subset.py new file mode 100644 index 000000000..78d5eccd6 --- /dev/null +++ b/pcmdi_metrics/io/select_subset.py @@ -0,0 +1,43 @@ +def select_subset(ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tuple = None) -> xr.Dataset: + """ + Selects a subset of the given xarray dataset based on specified latitude, longitude, and time ranges. + + Parameters: + - ds (xr.Dataset): The input xarray dataset. + - lat (tuple, optional): Latitude range in the form of (min, max). + - lon (tuple, optional): Longitude range in the form of (min, max). + - time (tuple, optional): Time range. If time is specified, it should be in the form of (start_time, end_time), + where start_time and end_time can be integers, floats, or cftime.DatetimeProlepticGregorian objects. + + Returns: + - xr.Dataset: Subset of the input dataset based on the specified latitude, longitude, and time ranges. + + Example Usage: + ``` + import cftime + + # Define latitude, longitude, and time ranges + lat_tuple = (30, 50) # Latitude range + lon_tuple = (110, 130) # Longitude range + time_tuple = (cftime.datetime(1850, 1, 1, 0, 0, 0, 0), + cftime.datetime(1851, 12, 31, 23, 59, 59, 0)) # Time range + + # Load your xarray dataset (ds) here + + # Select subset based on specified ranges + ds_subset = select_subset(ds, lat=lat_tuple, lon=lon_tuple, time=time_tuple) + ``` + """ + sel_keys = {} + if lat is not None: + lat_key = xc.axis.get_dim_keys(ds, axis="Y") + sel_keys[lat_key] = slice(*lat) + if lon is not None: + lon_key = xc.axis.get_dim_keys(ds, axis="X") + sel_keys[lon_key] = slice(*lon) + if time is not None: + time_key = xc.axis.get_dim_keys(ds, axis="T") + sel_keys[time_key] = slice(*time) + + ds = ds.sel(**sel_keys) + return ds From 8b44ec4a4de57b9bfe6a56edf8c1b77275be3698 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:14:44 -0800 Subject: [PATCH 08/16] update --- pcmdi_metrics/io/select_subset.py | 12 +- pcmdi_metrics/io/xarray_datasets_io.py | 148 +++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 pcmdi_metrics/io/xarray_datasets_io.py diff --git a/pcmdi_metrics/io/select_subset.py b/pcmdi_metrics/io/select_subset.py index 78d5eccd6..b4308a68f 100644 --- a/pcmdi_metrics/io/select_subset.py +++ b/pcmdi_metrics/io/select_subset.py @@ -1,4 +1,10 @@ -def select_subset(ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tuple = None) -> xr.Dataset: +import xarray as xr +import xcdat as xc + + +def select_subset( + ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tuple = None +) -> xr.Dataset: """ Selects a subset of the given xarray dataset based on specified latitude, longitude, and time ranges. @@ -21,13 +27,13 @@ def select_subset(ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tu lon_tuple = (110, 130) # Longitude range time_tuple = (cftime.datetime(1850, 1, 1, 0, 0, 0, 0), cftime.datetime(1851, 12, 31, 23, 59, 59, 0)) # Time range - + # Load your xarray dataset (ds) here # Select subset based on specified ranges ds_subset = select_subset(ds, lat=lat_tuple, lon=lon_tuple, time=time_tuple) ``` - """ + """ sel_keys = {} if lat is not None: lat_key = xc.axis.get_dim_keys(ds, axis="Y") diff --git a/pcmdi_metrics/io/xarray_datasets_io.py b/pcmdi_metrics/io/xarray_datasets_io.py new file mode 100644 index 000000000..5cdae924a --- /dev/null +++ b/pcmdi_metrics/io/xarray_datasets_io.py @@ -0,0 +1,148 @@ +from typing import Union + +import xarray as xr +import xcdat as xc + +# Retrieve coordinate key names + + +def get_axis_list(ds: Union[xr.Dataset, xr.DataArray]) -> list[str]: + axes = list(ds.coords.keys()) + return axes + + +def get_time_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + try: + time_key = xc.get_dim_keys(ds, "T") + except Exception: + axes = get_axis_list(ds) + time_key = [k for k in axes if k.lower() in ["time"]][0] + return time_key + + +def get_lat_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + try: + lat_key = xc.get_dim_keys(ds, "Y") + except Exception: + axes = get_axis_list(ds) + lat_key = [k for k in axes if k.lower() in ["lat", "latitude"]][0] + return lat_key + + +def get_lon_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + try: + lon_key = xc.get_dim_keys(ds, "X") + except Exception: + axes = get_axis_list(ds) + lon_key = [k for k in axes if k.lower() in ["lon", "longitude"]][0] + return lon_key + + +# Retrieve bounds key names + + +def get_time_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lat_key = get_time_key(ds) + return ds[lat_key].attrs["bounds"] + + +def get_lat_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lat_key = get_lat_key(ds) + return ds[lat_key].attrs["bounds"] + + +def get_lon_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lon_key = get_lon_key(ds) + return ds[lon_key].attrs["bounds"] + + +# Extract coordinate data + + +def get_time(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + time_key = get_time_key(ds) + time = ds[time_key] + return time + + +def get_longitude(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lon_key = get_lon_key(ds) + lon = ds[lon_key] + return lon + + +def get_latitude(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lat_key = get_lat_key(ds) + lat = ds[lat_key] + return lat + + +# Extract coordinate bounds data + + +def get_time_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + time_bounds_key = get_time_bounds_key(ds) + time_bounds = ds[time_bounds_key] + return time_bounds + + +def get_longitude_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lon_bounds_key = get_lon_bounds_key(ds) + lon_bounds = ds[lon_bounds_key] + return lon_bounds + + +def get_latitude_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: + lat_bounds_key = get_lat_bounds_key(ds) + lat_bounds = ds[lat_bounds_key] + return lat_bounds + + +# Select subset + + +def select_subset( + ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tuple = None +) -> xr.Dataset: + """ + Selects a subset of the given xarray dataset based on specified latitude, longitude, and time ranges. + + Parameters: + - ds (xr.Dataset): The input xarray dataset. + - lat (tuple, optional): Latitude range in the form of (min, max). + - lon (tuple, optional): Longitude range in the form of (min, max). + - time (tuple, optional): Time range. If time is specified, it should be in the form of (start_time, end_time), + where start_time and end_time can be integers, floats, or cftime.DatetimeProlepticGregorian objects. + + Returns: + - xr.Dataset: Subset of the input dataset based on the specified latitude, longitude, and time ranges. + + Example Usage: + ``` + import cftime + + # Define latitude, longitude, and time ranges + lat_tuple = (30, 50) # Latitude range + lon_tuple = (110, 130) # Longitude range + time_tuple = (cftime.datetime(1850, 1, 1, 0, 0, 0, 0), + cftime.datetime(1851, 12, 31, 23, 59, 59, 0)) # Time range + + # Load your xarray dataset (ds) here + + # Select subset based on specified ranges + ds_subset = select_subset(ds, lat=lat_tuple, lon=lon_tuple, time=time_tuple) + ``` + """ + sel_keys = {} + if lat is not None: + lat_key = get_lat_key(ds) + sel_keys[lat_key] = slice(*lat) + if lon is not None: + lon_key = get_lon_key(ds) + sel_keys[lon_key] = slice(*lon) + if time is not None: + time_key = get_time_key(ds) + sel_keys[time_key] = slice(*time) + + ds = ds.sel(**sel_keys) + return ds From fe4c6ca4507cb72a50842a6df56ba99a2776cb00 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:15:09 -0800 Subject: [PATCH 09/16] select_subset function moved to xarray_dataset_io --- pcmdi_metrics/io/select_subset.py | 49 ------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 pcmdi_metrics/io/select_subset.py diff --git a/pcmdi_metrics/io/select_subset.py b/pcmdi_metrics/io/select_subset.py deleted file mode 100644 index b4308a68f..000000000 --- a/pcmdi_metrics/io/select_subset.py +++ /dev/null @@ -1,49 +0,0 @@ -import xarray as xr -import xcdat as xc - - -def select_subset( - ds: xr.Dataset, lat: tuple = None, lon: tuple = None, time: tuple = None -) -> xr.Dataset: - """ - Selects a subset of the given xarray dataset based on specified latitude, longitude, and time ranges. - - Parameters: - - ds (xr.Dataset): The input xarray dataset. - - lat (tuple, optional): Latitude range in the form of (min, max). - - lon (tuple, optional): Longitude range in the form of (min, max). - - time (tuple, optional): Time range. If time is specified, it should be in the form of (start_time, end_time), - where start_time and end_time can be integers, floats, or cftime.DatetimeProlepticGregorian objects. - - Returns: - - xr.Dataset: Subset of the input dataset based on the specified latitude, longitude, and time ranges. - - Example Usage: - ``` - import cftime - - # Define latitude, longitude, and time ranges - lat_tuple = (30, 50) # Latitude range - lon_tuple = (110, 130) # Longitude range - time_tuple = (cftime.datetime(1850, 1, 1, 0, 0, 0, 0), - cftime.datetime(1851, 12, 31, 23, 59, 59, 0)) # Time range - - # Load your xarray dataset (ds) here - - # Select subset based on specified ranges - ds_subset = select_subset(ds, lat=lat_tuple, lon=lon_tuple, time=time_tuple) - ``` - """ - sel_keys = {} - if lat is not None: - lat_key = xc.axis.get_dim_keys(ds, axis="Y") - sel_keys[lat_key] = slice(*lat) - if lon is not None: - lon_key = xc.axis.get_dim_keys(ds, axis="X") - sel_keys[lon_key] = slice(*lon) - if time is not None: - time_key = xc.axis.get_dim_keys(ds, axis="T") - sel_keys[time_key] = slice(*time) - - ds = ds.sel(**sel_keys) - return ds From e5eaecdf6c874bd3d548cfcab0bc7b5ea79416c4 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:19:04 -0800 Subject: [PATCH 10/16] update --- .../io/{xarray_datasets_io.py => xcdat_xarray_dataset_io.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename pcmdi_metrics/io/{xarray_datasets_io.py => xcdat_xarray_dataset_io.py} (98%) diff --git a/pcmdi_metrics/io/xarray_datasets_io.py b/pcmdi_metrics/io/xcdat_xarray_dataset_io.py similarity index 98% rename from pcmdi_metrics/io/xarray_datasets_io.py rename to pcmdi_metrics/io/xcdat_xarray_dataset_io.py index 5cdae924a..e730bb780 100644 --- a/pcmdi_metrics/io/xarray_datasets_io.py +++ b/pcmdi_metrics/io/xcdat_xarray_dataset_io.py @@ -135,10 +135,10 @@ def select_subset( """ sel_keys = {} if lat is not None: - lat_key = get_lat_key(ds) + lat_key = get_lat_key(ds) sel_keys[lat_key] = slice(*lat) if lon is not None: - lon_key = get_lon_key(ds) + lon_key = get_lon_key(ds) sel_keys[lon_key] = slice(*lon) if time is not None: time_key = get_time_key(ds) From 20be92e26763ca5f1ac8c69764580c326b928c04 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:21:00 -0800 Subject: [PATCH 11/16] update --- pcmdi_metrics/io/__init__.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pcmdi_metrics/io/__init__.py b/pcmdi_metrics/io/__init__.py index 58f574091..11ec769f8 100644 --- a/pcmdi_metrics/io/__init__.py +++ b/pcmdi_metrics/io/__init__.py @@ -4,4 +4,19 @@ from .base import MV2Json # noqa from .default_regions_define import load_regions_specs # noqa from .default_regions_define import region_subset # noqa -from .select_subset import select_subset # noqa +from .xcdat_xarray_dataset_io import ( # noqa + get_axis_list, + get_lat_bounds_key, + get_lat_key, + get_latitude, + get_latitude_bounds, + get_lon_bounds_key, + get_lon_key, + get_longitude, + get_longitude_bounds, + get_time, + get_time_bound, + get_time_bounds_key, + get_time_key, + select_subset, +) From 427d95409cba1ccf4cf4a424936c079c9471192e Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:49:27 -0800 Subject: [PATCH 12/16] edit function names to be more consistent --- pcmdi_metrics/io/__init__.py | 10 ++++----- pcmdi_metrics/io/xcdat_xarray_dataset_io.py | 24 ++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pcmdi_metrics/io/__init__.py b/pcmdi_metrics/io/__init__.py index 11ec769f8..7a9768fe6 100644 --- a/pcmdi_metrics/io/__init__.py +++ b/pcmdi_metrics/io/__init__.py @@ -6,16 +6,16 @@ from .default_regions_define import region_subset # noqa from .xcdat_xarray_dataset_io import ( # noqa get_axis_list, - get_lat_bounds_key, - get_lat_key, + get_latitude_bounds_key, + get_latitude_key, get_latitude, get_latitude_bounds, - get_lon_bounds_key, - get_lon_key, + get_longitude_bounds_key, + get_longitude_key, get_longitude, get_longitude_bounds, get_time, - get_time_bound, + get_time_bounds, get_time_bounds_key, get_time_key, select_subset, diff --git a/pcmdi_metrics/io/xcdat_xarray_dataset_io.py b/pcmdi_metrics/io/xcdat_xarray_dataset_io.py index e730bb780..0953b97f4 100644 --- a/pcmdi_metrics/io/xcdat_xarray_dataset_io.py +++ b/pcmdi_metrics/io/xcdat_xarray_dataset_io.py @@ -20,7 +20,7 @@ def get_time_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: return time_key -def get_lat_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: +def get_latitude_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: try: lat_key = xc.get_dim_keys(ds, "Y") except Exception: @@ -29,7 +29,7 @@ def get_lat_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: return lat_key -def get_lon_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: +def get_longitude_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: try: lon_key = xc.get_dim_keys(ds, "X") except Exception: @@ -46,13 +46,13 @@ def get_time_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: return ds[lat_key].attrs["bounds"] -def get_lat_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: - lat_key = get_lat_key(ds) +def get_latitude_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lat_key = get_latitude_key(ds) return ds[lat_key].attrs["bounds"] -def get_lon_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: - lon_key = get_lon_key(ds) +def get_longitude_bounds_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: + lon_key = get_longitude_key(ds) return ds[lon_key].attrs["bounds"] @@ -66,13 +66,13 @@ def get_time(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: def get_longitude(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: - lon_key = get_lon_key(ds) + lon_key = get_longitude_key(ds) lon = ds[lon_key] return lon def get_latitude(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: - lat_key = get_lat_key(ds) + lat_key = get_latitude_key(ds) lat = ds[lat_key] return lat @@ -87,13 +87,13 @@ def get_time_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: def get_longitude_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: - lon_bounds_key = get_lon_bounds_key(ds) + lon_bounds_key = get_longitude_bounds_key(ds) lon_bounds = ds[lon_bounds_key] return lon_bounds def get_latitude_bounds(ds: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray: - lat_bounds_key = get_lat_bounds_key(ds) + lat_bounds_key = get_latitude_bounds_key(ds) lat_bounds = ds[lat_bounds_key] return lat_bounds @@ -135,10 +135,10 @@ def select_subset( """ sel_keys = {} if lat is not None: - lat_key = get_lat_key(ds) + lat_key = get_latitude_key(ds) sel_keys[lat_key] = slice(*lat) if lon is not None: - lon_key = get_lon_key(ds) + lon_key = get_longitude_key(ds) sel_keys[lon_key] = slice(*lon) if time is not None: time_key = get_time_key(ds) From 932b33d3d802faedfab7b5a11d301f124936c637 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 11:56:30 -0800 Subject: [PATCH 13/16] typo fix --- pcmdi_metrics/stats/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcmdi_metrics/stats/__init__.py b/pcmdi_metrics/stats/__init__.py index 78dc1dd54..a9244dfdd 100644 --- a/pcmdi_metrics/stats/__init__.py +++ b/pcmdi_metrics/stats/__init__.py @@ -1,4 +1,4 @@ -from .statistics import ( +from .compute_statistics import ( annual_mean, bias_xy, bias_xyt, From e25b49ec8c3f1527be9d78de3a10aaf6cfb18e55 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 13:16:20 -0800 Subject: [PATCH 14/16] update --- pcmdi_metrics/io/__init__.py | 2 +- ...rray_dataset_io.py => xcdat_dataset_io.py} | 51 ++++++++++++------- 2 files changed, 34 insertions(+), 19 deletions(-) rename pcmdi_metrics/io/{xcdat_xarray_dataset_io.py => xcdat_dataset_io.py} (80%) diff --git a/pcmdi_metrics/io/__init__.py b/pcmdi_metrics/io/__init__.py index 7a9768fe6..6ffb95084 100644 --- a/pcmdi_metrics/io/__init__.py +++ b/pcmdi_metrics/io/__init__.py @@ -4,7 +4,7 @@ from .base import MV2Json # noqa from .default_regions_define import load_regions_specs # noqa from .default_regions_define import region_subset # noqa -from .xcdat_xarray_dataset_io import ( # noqa +from .xcdat_dataset_io import ( # noqa get_axis_list, get_latitude_bounds_key, get_latitude_key, diff --git a/pcmdi_metrics/io/xcdat_xarray_dataset_io.py b/pcmdi_metrics/io/xcdat_dataset_io.py similarity index 80% rename from pcmdi_metrics/io/xcdat_xarray_dataset_io.py rename to pcmdi_metrics/io/xcdat_dataset_io.py index 0953b97f4..570a1f66a 100644 --- a/pcmdi_metrics/io/xcdat_xarray_dataset_io.py +++ b/pcmdi_metrics/io/xcdat_dataset_io.py @@ -3,6 +3,26 @@ import xarray as xr import xcdat as xc +# Internal function + + +def _find_key( + ds: Union[xr.Dataset, xr.DataArray], axis: str, potential_names: list +) -> str: + try: + key = xc.get_dim_keys(ds, axis) + except Exception: + axes = get_axis_list(ds) + key_candidates = [k for k in axes if k.lower() in potential_names] + if len(key_candidates) > 0: + key = key_candidates[0] + else: + key_candidates = [k for k in axes if k.lower() in potential_names] + if len(key_candidates) > 0: + key = key_candidates[0] + return key + + # Retrieve coordinate key names @@ -11,31 +31,26 @@ def get_axis_list(ds: Union[xr.Dataset, xr.DataArray]) -> list[str]: return axes +def get_data_list(ds: Union[xr.Dataset, xr.DataArray]) -> list[str]: + return list(ds.data_vars.keys()) + + def get_time_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: - try: - time_key = xc.get_dim_keys(ds, "T") - except Exception: - axes = get_axis_list(ds) - time_key = [k for k in axes if k.lower() in ["time"]][0] - return time_key + axis = "T" + potential_names = ["time", "t"] + return _find_key(ds, axis, potential_names) def get_latitude_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: - try: - lat_key = xc.get_dim_keys(ds, "Y") - except Exception: - axes = get_axis_list(ds) - lat_key = [k for k in axes if k.lower() in ["lat", "latitude"]][0] - return lat_key + axis = "Y" + potential_names = ["lat", "latitude"] + return _find_key(ds, axis, potential_names) def get_longitude_key(ds: Union[xr.Dataset, xr.DataArray]) -> str: - try: - lon_key = xc.get_dim_keys(ds, "X") - except Exception: - axes = get_axis_list(ds) - lon_key = [k for k in axes if k.lower() in ["lon", "longitude"]][0] - return lon_key + axis = "X" + potential_names = ["lon", "longitude"] + return _find_key(ds, axis, potential_names) # Retrieve bounds key names From 71365b15fa438b6d29109780dbe7205a6a318c08 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 13:16:44 -0800 Subject: [PATCH 15/16] update --- pcmdi_metrics/io/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pcmdi_metrics/io/__init__.py b/pcmdi_metrics/io/__init__.py index 6ffb95084..a81b7b8b6 100644 --- a/pcmdi_metrics/io/__init__.py +++ b/pcmdi_metrics/io/__init__.py @@ -6,6 +6,7 @@ from .default_regions_define import region_subset # noqa from .xcdat_dataset_io import ( # noqa get_axis_list, + get_data_list, get_latitude_bounds_key, get_latitude_key, get_latitude, From d0c98dcbd5e399aab7669f4b68ff816aaecf68da Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 4 Jan 2024 13:30:19 -0800 Subject: [PATCH 16/16] update --- pcmdi_metrics/io/xcdat_dataset_io.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pcmdi_metrics/io/xcdat_dataset_io.py b/pcmdi_metrics/io/xcdat_dataset_io.py index 570a1f66a..72af41e4e 100644 --- a/pcmdi_metrics/io/xcdat_dataset_io.py +++ b/pcmdi_metrics/io/xcdat_dataset_io.py @@ -11,15 +11,21 @@ def _find_key( ) -> str: try: key = xc.get_dim_keys(ds, axis) - except Exception: + except Exception as e: axes = get_axis_list(ds) key_candidates = [k for k in axes if k.lower() in potential_names] if len(key_candidates) > 0: key = key_candidates[0] else: - key_candidates = [k for k in axes if k.lower() in potential_names] + data_keys = get_data_list(ds) + key_candidates = [k for k in data_keys if k.lower() in potential_names] if len(key_candidates) > 0: key = key_candidates[0] + else: + all_keys = ", ".join(axes + data_keys) + print( + f"Error: Cannot find a proper key name for {axis} from keys:{all_keys} {e}" + ) return key