From 65b69c45415c4fab2d4891b8f4fffcdaf8ef3f32 Mon Sep 17 00:00:00 2001 From: lee1043 Date: Mon, 26 Feb 2024 12:02:52 -0800 Subject: [PATCH 1/2] bug fix --- pcmdi_metrics/variability_mode/lib/lib_variability_mode.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py b/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py index 2a7b40b2b..1d05f61a8 100644 --- a/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py +++ b/pcmdi_metrics/variability_mode/lib/lib_variability_mode.py @@ -138,8 +138,8 @@ def subset_time( if not isinstance(eyear, int): eyear = int(eyear) - time1 = cftime.datetime(syear, 1, 1, 0, 0, 0, 0) - time2 = cftime.datetime(eyear, 12, eday, 23, 59, 59, 0) + time1 = f"{syear}-01-01 00:00:00" + time2 = f"{eyear}-12-{eday} 23:59:59" time_tuple = (time1, time2) # First trimming @@ -237,6 +237,9 @@ def pick_year_last_day(ds): if "calendar" in ds[time_key].attrs.keys(): if "360" in ds[time_key]["calendar"]: eday = 30 + else: + if "360" in ds[time_key][0].values.item().calendar: + eday = 30 except Exception: pass return eday From cf6fb8a6bc8321475eefab0ab8c1aaf5bc9ecbd0 Mon Sep 17 00:00:00 2001 From: lee1043 Date: Mon, 26 Feb 2024 12:26:59 -0800 Subject: [PATCH 2/2] move timeseries adjustment in a new separate file --- .../variability_mode/lib/__init__.py | 8 +- .../variability_mode/lib/adjust_timeseries.py | 147 ++++++++++++++++++ .../variability_mode/lib/eof_analysis.py | 142 ----------------- 3 files changed, 152 insertions(+), 145 deletions(-) create mode 100644 pcmdi_metrics/variability_mode/lib/adjust_timeseries.py diff --git a/pcmdi_metrics/variability_mode/lib/__init__.py b/pcmdi_metrics/variability_mode/lib/__init__.py index 2261226f7..0ddd9f843 100644 --- a/pcmdi_metrics/variability_mode/lib/__init__.py +++ b/pcmdi_metrics/variability_mode/lib/__init__.py @@ -1,3 +1,8 @@ +from .adjust_timeseries import ( # noqa + adjust_timeseries, + get_anomaly_timeseries, + get_residual_timeseries, +) from .argparse_functions import ( # noqa AddParserArgument, VariabilityModeCheck, @@ -9,13 +14,10 @@ ) from .dict_merge import dict_merge # noqa from .eof_analysis import ( # noqa - adjust_timeseries, arbitrary_checking, eof_analysis_get_variance_mode, gain_pcs_fraction, gain_pseudo_pcs, - get_anomaly_timeseries, - get_residual_timeseries, linear_regression, linear_regression_on_globe_for_teleconnection, ) diff --git a/pcmdi_metrics/variability_mode/lib/adjust_timeseries.py b/pcmdi_metrics/variability_mode/lib/adjust_timeseries.py new file mode 100644 index 000000000..50da9a77b --- /dev/null +++ b/pcmdi_metrics/variability_mode/lib/adjust_timeseries.py @@ -0,0 +1,147 @@ +import xarray as xr + +from pcmdi_metrics.io import ( + get_latitude_bounds, + get_latitude_bounds_key, + get_longitude_bounds, + get_longitude_bounds_key, + get_time_key, + region_subset, + select_subset, +) + + +def adjust_timeseries( + ds: xr.Dataset, + data_var: str, + mode: str, + season: str, + regions_specs: dict = None, + RmDomainMean: bool = True, +) -> xr.Dataset: + """ + Remove annual cycle (for all modes) and get its seasonal mean time series if + needed. Then calculate residual by subtraction domain (or global) average. + Input + - ds: array (t, y, x) + Output + - timeseries_season: array (t, y, x) + """ + if not isinstance(ds, xr.Dataset): + raise TypeError( + "The first parameter of adjust_timeseries must be an xarray Dataset" + ) + # Reomove annual cycle (for all modes) and get its seasonal mean time series if needed + ds_anomaly = get_anomaly_timeseries(ds, data_var, season) + # Calculate residual by subtracting domain (or global) average + ds_residual = get_residual_timeseries( + ds_anomaly, data_var, mode, regions_specs, RmDomainMean=RmDomainMean + ) + # return result + return ds_residual + + +def get_anomaly_timeseries(ds: xr.Dataset, data_var: str, season: str) -> xr.Dataset: + """ + Get anomaly time series by removing annual cycle + Input + - timeseries: variable + - season: string + Output + - timeseries_ano: variable + """ + if not isinstance(ds, xr.Dataset): + raise TypeError( + "The first parameter of get_anomaly_timeseries must be an xarray Dataset" + ) + # Get anomaly field by removing annual cycle + ds_anomaly = ds.temporal.departures(data_var, freq="month", weighted=True) + # Get temporal average if needed + if season == "yearly": + # yearly time series + ds_anomaly = ds_anomaly.temporal.group_average( + data_var, freq="year", weighted=True + ) + # restore bounds (especially time bounds) + ds_anomaly = ds_anomaly.bounds.add_missing_bounds() + # get overall average + ds_ave = ds_anomaly.temporal.average(data_var) + # anomaly + ds_anomaly[data_var] = ds_anomaly[data_var] - ds_ave[data_var] + elif season.upper() in ["DJF", "MAM", "JJA", "SON"]: + ds_anomaly_all_seasons = ds_anomaly.temporal.departures( + data_var, + freq="season", + weighted=True, + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + ) + ds_anomaly = select_by_season(ds_anomaly_all_seasons, season) + # return result + return ds_anomaly + + +def select_by_season(ds: xr.Dataset, season: str) -> xr.Dataset: + time_key = get_time_key(ds) + ds_subset = ds.where(ds[time_key].dt.season == season, drop=True) + # Preserve original spatial bounds info + # Extract original bounds + lat_bnds_key = get_latitude_bounds_key(ds) + lon_bnds_key = get_longitude_bounds_key(ds) + # Assign back to the new dataset + ds_subset[lat_bnds_key] = get_latitude_bounds(ds) + ds_subset[lon_bnds_key] = get_longitude_bounds(ds) + return ds_subset + + +def get_residual_timeseries( + ds_anomaly: xr.Dataset, + data_var: str, + mode: str, + regions_specs: dict = None, + RmDomainMean: bool = True, +) -> xr.Dataset: + """ + Calculate residual by subtracting domain average (or global mean) + Input + - ds_anomaly: anomaly time series, array, 3d (t, y, x) + - mode: string, mode name, must be defined in regions_specs + - RmDomainMean: bool (True or False). + If True, remove domain mean of each time step. + Ref: + Bonfils and Santer (2011) + https://doi.org/10.1007/s00382-010-0920-1 + Bonfils et al. (2015) + https://doi.org/10.1175/JCLI-D-15-0341.1 + If False, remove global mean of each time step for PDO, or + do nothing for other modes + Default is True for this function. + - region_subdomain: lat lon range of sub domain for given mode, which was + extracted from regions_specs -- that is a dict contains domain + lat lon ragne for given mode + Output + - ds_residual: array, 3d (t, y, x) + """ + if not isinstance(ds_anomaly, xr.Dataset): + raise TypeError( + "The first parameter of get_residual_timeseries must be an xarray Dataset" + ) + ds_residual = ds_anomaly.copy() + if RmDomainMean: + # Get domain mean + ds_anomaly_region = region_subset( + ds_anomaly, mode, data_var=data_var, regions_specs=regions_specs + ) + ds_anomaly_mean = ds_anomaly_region.spatial.average(data_var) + # Subtract domain mean + ds_residual[data_var] = ds_anomaly[data_var] - ds_anomaly_mean[data_var] + else: + if mode in ["PDO", "NPGO", "AMO"]: + # Get global mean (latitude -60 to 70) + ds_anomaly_subset = select_subset(ds_anomaly, lat=(-60, 70)) + ds_anomaly_subset_mean = ds_anomaly_subset.spatial.average(data_var) + # Subtract global mean + ds_residual[data_var] = ( + ds_anomaly[data_var] - ds_anomaly_subset_mean[data_var] + ) + # return result + return ds_residual diff --git a/pcmdi_metrics/variability_mode/lib/eof_analysis.py b/pcmdi_metrics/variability_mode/lib/eof_analysis.py index ccd8124f0..c35cad3e3 100644 --- a/pcmdi_metrics/variability_mode/lib/eof_analysis.py +++ b/pcmdi_metrics/variability_mode/lib/eof_analysis.py @@ -6,16 +6,10 @@ from pcmdi_metrics.io import ( get_latitude, - get_latitude_bounds, - get_latitude_bounds_key, get_latitude_key, get_longitude, - get_longitude_bounds, - get_longitude_bounds_key, get_longitude_key, get_time_key, - region_subset, - select_subset, ) from pcmdi_metrics.utils import calculate_area_weights, calculate_grid_area @@ -353,142 +347,6 @@ def gain_pcs_fraction( return fraction -def adjust_timeseries( - ds: xr.Dataset, - data_var: str, - mode: str, - season: str, - regions_specs: dict = None, - RmDomainMean: bool = True, -) -> xr.Dataset: - """ - Remove annual cycle (for all modes) and get its seasonal mean time series if - needed. Then calculate residual by subtraction domain (or global) average. - Input - - ds: array (t, y, x) - Output - - timeseries_season: array (t, y, x) - """ - if not isinstance(ds, xr.Dataset): - raise TypeError( - "The first parameter of adjust_timeseries must be an xarray Dataset" - ) - # Reomove annual cycle (for all modes) and get its seasonal mean time series if needed - ds_anomaly = get_anomaly_timeseries(ds, data_var, season) - # Calculate residual by subtracting domain (or global) average - ds_residual = get_residual_timeseries( - ds_anomaly, data_var, mode, regions_specs, RmDomainMean=RmDomainMean - ) - # return result - return ds_residual - - -def get_anomaly_timeseries(ds: xr.Dataset, data_var: str, season: str) -> xr.Dataset: - """ - Get anomaly time series by removing annual cycle - Input - - timeseries: variable - - season: string - Output - - timeseries_ano: variable - """ - if not isinstance(ds, xr.Dataset): - raise TypeError( - "The first parameter of get_anomaly_timeseries must be an xarray Dataset" - ) - # Get anomaly field by removing annual cycle - ds_anomaly = ds.temporal.departures(data_var, freq="month", weighted=True) - # Get temporal average if needed - if season == "yearly": - # yearly time series - ds_anomaly = ds_anomaly.temporal.group_average( - data_var, freq="year", weighted=True - ) - # restore bounds (especially time bounds) - ds_anomaly = ds_anomaly.bounds.add_missing_bounds() - # get overall average - ds_ave = ds_anomaly.temporal.average(data_var) - # anomaly - ds_anomaly[data_var] = ds_anomaly[data_var] - ds_ave[data_var] - elif season.upper() in ["DJF", "MAM", "JJA", "SON"]: - ds_anomaly_all_seasons = ds_anomaly.temporal.departures( - data_var, - freq="season", - weighted=True, - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, - ) - ds_anomaly = select_by_season(ds_anomaly_all_seasons, season) - # return result - return ds_anomaly - - -def select_by_season(ds: xr.Dataset, season: str) -> xr.Dataset: - time_key = get_time_key(ds) - ds_subset = ds.where(ds[time_key].dt.season == season, drop=True) - # Preserve original spatial bounds info - # Extract original bounds - lat_bnds_key = get_latitude_bounds_key(ds) - lon_bnds_key = get_longitude_bounds_key(ds) - # Assign back to the new dataset - ds_subset[lat_bnds_key] = get_latitude_bounds(ds) - ds_subset[lon_bnds_key] = get_longitude_bounds(ds) - return ds_subset - - -def get_residual_timeseries( - ds_anomaly: xr.Dataset, - data_var: str, - mode: str, - regions_specs: dict = None, - RmDomainMean: bool = True, -) -> xr.Dataset: - """ - Calculate residual by subtracting domain average (or global mean) - Input - - ds_anomaly: anomaly time series, array, 3d (t, y, x) - - mode: string, mode name, must be defined in regions_specs - - RmDomainMean: bool (True or False). - If True, remove domain mean of each time step. - Ref: - Bonfils and Santer (2011) - https://doi.org/10.1007/s00382-010-0920-1 - Bonfils et al. (2015) - https://doi.org/10.1175/JCLI-D-15-0341.1 - If False, remove global mean of each time step for PDO, or - do nothing for other modes - Default is True for this function. - - region_subdomain: lat lon range of sub domain for given mode, which was - extracted from regions_specs -- that is a dict contains domain - lat lon ragne for given mode - Output - - ds_residual: array, 3d (t, y, x) - """ - if not isinstance(ds_anomaly, xr.Dataset): - raise TypeError( - "The first parameter of get_residual_timeseries must be an xarray Dataset" - ) - ds_residual = ds_anomaly.copy() - if RmDomainMean: - # Get domain mean - ds_anomaly_region = region_subset( - ds_anomaly, mode, data_var=data_var, regions_specs=regions_specs - ) - ds_anomaly_mean = ds_anomaly_region.spatial.average(data_var) - # Subtract domain mean - ds_residual[data_var] = ds_anomaly[data_var] - ds_anomaly_mean[data_var] - else: - if mode in ["PDO", "NPGO", "AMO"]: - # Get global mean (latitude -60 to 70) - ds_anomaly_subset = select_subset(ds_anomaly, lat=(-60, 70)) - ds_anomaly_subset_mean = ds_anomaly_subset.spatial.average(data_var) - # Subtract global mean - ds_residual[data_var] = ( - ds_anomaly[data_var] - ds_anomaly_subset_mean[data_var] - ) - # return result - return ds_residual - - def debug_print(string, debug): if debug: nowtime = strftime("%Y-%m-%d %H:%M:%S", gmtime())