Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/lee1043 mov modularize #1062

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
move timeseries adjustment in a new separate file
  • Loading branch information
lee1043 committed Feb 26, 2024
commit cf6fb8a6bc8321475eefab0ab8c1aaf5bc9ecbd0
8 changes: 5 additions & 3 deletions pcmdi_metrics/variability_mode/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from .adjust_timeseries import ( # noqa
adjust_timeseries,
get_anomaly_timeseries,
get_residual_timeseries,
)
from .argparse_functions import ( # noqa
AddParserArgument,
VariabilityModeCheck,
Expand All @@ -9,13 +14,10 @@
)
from .dict_merge import dict_merge # noqa
from .eof_analysis import ( # noqa
adjust_timeseries,
arbitrary_checking,
eof_analysis_get_variance_mode,
gain_pcs_fraction,
gain_pseudo_pcs,
get_anomaly_timeseries,
get_residual_timeseries,
linear_regression,
linear_regression_on_globe_for_teleconnection,
)
Expand Down
147 changes: 147 additions & 0 deletions pcmdi_metrics/variability_mode/lib/adjust_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import xarray as xr

from pcmdi_metrics.io import (
get_latitude_bounds,
get_latitude_bounds_key,
get_longitude_bounds,
get_longitude_bounds_key,
get_time_key,
region_subset,
select_subset,
)


def adjust_timeseries(
ds: xr.Dataset,
data_var: str,
mode: str,
season: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Remove annual cycle (for all modes) and get its seasonal mean time series if
needed. Then calculate residual by subtraction domain (or global) average.
Input
- ds: array (t, y, x)
Output
- timeseries_season: array (t, y, x)
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of adjust_timeseries must be an xarray Dataset"
)
# Reomove annual cycle (for all modes) and get its seasonal mean time series if needed
ds_anomaly = get_anomaly_timeseries(ds, data_var, season)
# Calculate residual by subtracting domain (or global) average
ds_residual = get_residual_timeseries(
ds_anomaly, data_var, mode, regions_specs, RmDomainMean=RmDomainMean
)
# return result
return ds_residual


def get_anomaly_timeseries(ds: xr.Dataset, data_var: str, season: str) -> xr.Dataset:
"""
Get anomaly time series by removing annual cycle
Input
- timeseries: variable
- season: string
Output
- timeseries_ano: variable
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of get_anomaly_timeseries must be an xarray Dataset"
)
# Get anomaly field by removing annual cycle
ds_anomaly = ds.temporal.departures(data_var, freq="month", weighted=True)
# Get temporal average if needed
if season == "yearly":
# yearly time series
ds_anomaly = ds_anomaly.temporal.group_average(
data_var, freq="year", weighted=True
)
# restore bounds (especially time bounds)
ds_anomaly = ds_anomaly.bounds.add_missing_bounds()
# get overall average
ds_ave = ds_anomaly.temporal.average(data_var)
# anomaly
ds_anomaly[data_var] = ds_anomaly[data_var] - ds_ave[data_var]
elif season.upper() in ["DJF", "MAM", "JJA", "SON"]:
ds_anomaly_all_seasons = ds_anomaly.temporal.departures(
data_var,
freq="season",
weighted=True,
season_config={"dec_mode": "DJF", "drop_incomplete_djf": True},
)
ds_anomaly = select_by_season(ds_anomaly_all_seasons, season)
# return result
return ds_anomaly


def select_by_season(ds: xr.Dataset, season: str) -> xr.Dataset:
time_key = get_time_key(ds)
ds_subset = ds.where(ds[time_key].dt.season == season, drop=True)
# Preserve original spatial bounds info
# Extract original bounds
lat_bnds_key = get_latitude_bounds_key(ds)
lon_bnds_key = get_longitude_bounds_key(ds)
# Assign back to the new dataset
ds_subset[lat_bnds_key] = get_latitude_bounds(ds)
ds_subset[lon_bnds_key] = get_longitude_bounds(ds)
return ds_subset


def get_residual_timeseries(
ds_anomaly: xr.Dataset,
data_var: str,
mode: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Calculate residual by subtracting domain average (or global mean)
Input
- ds_anomaly: anomaly time series, array, 3d (t, y, x)
- mode: string, mode name, must be defined in regions_specs
- RmDomainMean: bool (True or False).
If True, remove domain mean of each time step.
Ref:
Bonfils and Santer (2011)
https://doi.org/10.1007/s00382-010-0920-1
Bonfils et al. (2015)
https://doi.org/10.1175/JCLI-D-15-0341.1
If False, remove global mean of each time step for PDO, or
do nothing for other modes
Default is True for this function.
- region_subdomain: lat lon range of sub domain for given mode, which was
extracted from regions_specs -- that is a dict contains domain
lat lon ragne for given mode
Output
- ds_residual: array, 3d (t, y, x)
"""
if not isinstance(ds_anomaly, xr.Dataset):
raise TypeError(
"The first parameter of get_residual_timeseries must be an xarray Dataset"
)
ds_residual = ds_anomaly.copy()
if RmDomainMean:
# Get domain mean
ds_anomaly_region = region_subset(
ds_anomaly, mode, data_var=data_var, regions_specs=regions_specs
)
ds_anomaly_mean = ds_anomaly_region.spatial.average(data_var)
# Subtract domain mean
ds_residual[data_var] = ds_anomaly[data_var] - ds_anomaly_mean[data_var]
else:
if mode in ["PDO", "NPGO", "AMO"]:
# Get global mean (latitude -60 to 70)
ds_anomaly_subset = select_subset(ds_anomaly, lat=(-60, 70))
ds_anomaly_subset_mean = ds_anomaly_subset.spatial.average(data_var)
# Subtract global mean
ds_residual[data_var] = (
ds_anomaly[data_var] - ds_anomaly_subset_mean[data_var]
)
# return result
return ds_residual
142 changes: 0 additions & 142 deletions pcmdi_metrics/variability_mode/lib/eof_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,10 @@

from pcmdi_metrics.io import (
get_latitude,
get_latitude_bounds,
get_latitude_bounds_key,
get_latitude_key,
get_longitude,
get_longitude_bounds,
get_longitude_bounds_key,
get_longitude_key,
get_time_key,
region_subset,
select_subset,
)
from pcmdi_metrics.utils import calculate_area_weights, calculate_grid_area

Expand Down Expand Up @@ -353,142 +347,6 @@ def gain_pcs_fraction(
return fraction


def adjust_timeseries(
ds: xr.Dataset,
data_var: str,
mode: str,
season: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Remove annual cycle (for all modes) and get its seasonal mean time series if
needed. Then calculate residual by subtraction domain (or global) average.
Input
- ds: array (t, y, x)
Output
- timeseries_season: array (t, y, x)
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of adjust_timeseries must be an xarray Dataset"
)
# Reomove annual cycle (for all modes) and get its seasonal mean time series if needed
ds_anomaly = get_anomaly_timeseries(ds, data_var, season)
# Calculate residual by subtracting domain (or global) average
ds_residual = get_residual_timeseries(
ds_anomaly, data_var, mode, regions_specs, RmDomainMean=RmDomainMean
)
# return result
return ds_residual


def get_anomaly_timeseries(ds: xr.Dataset, data_var: str, season: str) -> xr.Dataset:
"""
Get anomaly time series by removing annual cycle
Input
- timeseries: variable
- season: string
Output
- timeseries_ano: variable
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of get_anomaly_timeseries must be an xarray Dataset"
)
# Get anomaly field by removing annual cycle
ds_anomaly = ds.temporal.departures(data_var, freq="month", weighted=True)
# Get temporal average if needed
if season == "yearly":
# yearly time series
ds_anomaly = ds_anomaly.temporal.group_average(
data_var, freq="year", weighted=True
)
# restore bounds (especially time bounds)
ds_anomaly = ds_anomaly.bounds.add_missing_bounds()
# get overall average
ds_ave = ds_anomaly.temporal.average(data_var)
# anomaly
ds_anomaly[data_var] = ds_anomaly[data_var] - ds_ave[data_var]
elif season.upper() in ["DJF", "MAM", "JJA", "SON"]:
ds_anomaly_all_seasons = ds_anomaly.temporal.departures(
data_var,
freq="season",
weighted=True,
season_config={"dec_mode": "DJF", "drop_incomplete_djf": True},
)
ds_anomaly = select_by_season(ds_anomaly_all_seasons, season)
# return result
return ds_anomaly


def select_by_season(ds: xr.Dataset, season: str) -> xr.Dataset:
time_key = get_time_key(ds)
ds_subset = ds.where(ds[time_key].dt.season == season, drop=True)
# Preserve original spatial bounds info
# Extract original bounds
lat_bnds_key = get_latitude_bounds_key(ds)
lon_bnds_key = get_longitude_bounds_key(ds)
# Assign back to the new dataset
ds_subset[lat_bnds_key] = get_latitude_bounds(ds)
ds_subset[lon_bnds_key] = get_longitude_bounds(ds)
return ds_subset


def get_residual_timeseries(
ds_anomaly: xr.Dataset,
data_var: str,
mode: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Calculate residual by subtracting domain average (or global mean)
Input
- ds_anomaly: anomaly time series, array, 3d (t, y, x)
- mode: string, mode name, must be defined in regions_specs
- RmDomainMean: bool (True or False).
If True, remove domain mean of each time step.
Ref:
Bonfils and Santer (2011)
https://doi.org/10.1007/s00382-010-0920-1
Bonfils et al. (2015)
https://doi.org/10.1175/JCLI-D-15-0341.1
If False, remove global mean of each time step for PDO, or
do nothing for other modes
Default is True for this function.
- region_subdomain: lat lon range of sub domain for given mode, which was
extracted from regions_specs -- that is a dict contains domain
lat lon ragne for given mode
Output
- ds_residual: array, 3d (t, y, x)
"""
if not isinstance(ds_anomaly, xr.Dataset):
raise TypeError(
"The first parameter of get_residual_timeseries must be an xarray Dataset"
)
ds_residual = ds_anomaly.copy()
if RmDomainMean:
# Get domain mean
ds_anomaly_region = region_subset(
ds_anomaly, mode, data_var=data_var, regions_specs=regions_specs
)
ds_anomaly_mean = ds_anomaly_region.spatial.average(data_var)
# Subtract domain mean
ds_residual[data_var] = ds_anomaly[data_var] - ds_anomaly_mean[data_var]
else:
if mode in ["PDO", "NPGO", "AMO"]:
# Get global mean (latitude -60 to 70)
ds_anomaly_subset = select_subset(ds_anomaly, lat=(-60, 70))
ds_anomaly_subset_mean = ds_anomaly_subset.spatial.average(data_var)
# Subtract global mean
ds_residual[data_var] = (
ds_anomaly[data_var] - ds_anomaly_subset_mean[data_var]
)
# return result
return ds_residual


def debug_print(string, debug):
if debug:
nowtime = strftime("%Y-%m-%d %H:%M:%S", gmtime())
Expand Down
Loading