Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add figure preprocessing scripts to precip variability metrics #1069

Merged
merged 10 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pcmdi_metrics/precip_variability/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Reference: Ahn, M.-S., P. J. Gleckler, J. Lee, A. G. Pendergrass, and C. Jakob, 2022: Benchmarking Simulated Precipitation Variability Amplitude across Timescales. Journal of Climate. https://doi.org/10.1175/JCLI-D-21-0542.1

Demo: [precipitation variability across timescales](https://github.com/PCMDI/pcmdi_metrics/blob/93c30ce485719ecf6a531a4ee47886160ebb73e4/doc/jupyter/Demo/Demo_7_precip_variability.ipynb)
Demo: [precipitation variability across timescales](https://github.com/PCMDI/pcmdi_metrics/blob/main/doc/jupyter/Demo/Demo_7_precip_variability.ipynb)

## Driver code:
- `variability_across_timescales_PS_driver.py`
Expand All @@ -19,3 +19,12 @@ Demo: [precipitation variability across timescales](https://github.com/PCMDI/pcm
- `scripts_pcmdi/`
- `run_obs.bash`
- `run_parallel.wait.bash`

## Figure scripts:

These scripts can be modified by the user to postprocess the output from `variability_across_timescales_PS_driver.py` as a step needed to create Figure 6. The `*_regional.py` scripts are used for the custom region output case, not the default region set.

- `scripts_pcmdi/`
- `calc_ps_area_freq_mean_regional.py`
- `calc_ps_area_mean_regional.py`
- `calc_ps_freq_mean_regional.py`
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import copy
import datetime
import json
import os
import pickle

import numpy as np


def prdday_to_frqidx(prdday, frequency, ntd):
"""
Find frequency index from input period
Input
- prdday: period (day)
- frequency: frequency
- ntd: number of time steps per day (daily: 1, 3-hourly: 8)
Output
- idx: frequency index
"""
frq = 1.0 / (float(prdday) * ntd)
idx = (np.abs(frequency - frq)).argmin()
return int(idx)


# --------------------
# User settings here
# --------------------
# "3hr" or "day"
hr = ""
# Input file name (pickle .pkl output from calc_ps_area.mean.py)
fname = ""
# Output directory (not including version)
outdir = ""

# -----------------------
# The rest of the script
# -----------------------
ver = datetime.datetime.now().strftime("v%Y%m%d")

if hr == "day":
acordonez marked this conversation as resolved.
Show resolved Hide resolved
frqs_forced = ["semi-annual", "annual"]
frqs_unforced = ["synoptic", "sub-seasonal", "seasonal-annual", "interannual"]
ntd = 1
elif hr == "3hr":
frqs_forced = ["semi-diurnal", "diurnal", "semi-annual", "annual"]
frqs_unforced = [
"sub-daily",
"synoptic",
"sub-seasonal",
"seasonal-annual",
"interannual",
]
ntd = 8

infile = open(fname, "rb")
psdm = pickle.load(infile)

psdmfm = copy.deepcopy(psdm)
for frc in psdm.keys():
if frc == "forced":
frqs = frqs_forced
elif frc == "unforced":
frqs = frqs_unforced
print(frc)
for mip in psdm[frc].keys():
print(mip)
for dat in psdm[frc][mip].keys():
frequency = np.array(psdm[frc][mip][dat]["freqs"])
del psdm[frc][mip][dat]["freqs"]
del psdmfm[frc][mip][dat]["freqs"]

for var in psdm[frc][mip][dat].keys():
print(dat, var)
for idm, dom in enumerate(psdm[frc][mip][dat][var].keys()):
am = np.array(psdm[frc][mip][dat][var][dom])
del psdmfm[frc][mip][dat][var][dom]
psdmfm[frc][mip][dat][var][dom] = {}
print(dom)
for frq in frqs:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@msahn In this loop, lines 74-130, I would like to double-check that all the time scales are being correctly sliced. If you could review this section I would greatly appreciate it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For semi-annual and annual, I use np.amax instead of np.nanmean to select maximum power within their range as below. This is to handle CMIP models that have different periods for semi-annual and annual cycles because they use different calendars (e.g., 360-day, 365-day, and Gregorian).

elif frq == "semi-annual": # 180day=<pr=<183day
idx2 = prdday_to_frqidx(180, frequency, ntd)
idx1 = prdday_to_frqidx(183, frequency, ntd)
amfm = np.amax(am[idx1 : idx2 + 1])
elif frq == "annual": # 360day=<pr=<366day
idx2 = prdday_to_frqidx(360, frequency, ntd)
idx1 = prdday_to_frqidx(366, frequency, ntd)
amfm = np.amax(am[idx1 : idx2 + 1])

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@msahn Thank you for pointing this out! I have another question about the sub-daily timescale indices. If I'm interpreting this correctly, are we getting the average of data at frequencies equal to or larger than the sub-daily frequency?

amfm = np.nanmean(am[idx1 + 1 :])

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the sub-daily timescale, we average frequencies larger than 1 day (pr<1day). The frequency of 1day is included in the synoptic timescale (1day=<pr<20day). This information is written as comments in the code.

print(frq)
if frq == "semi-diurnal": # pr=0.5day
idx = prdday_to_frqidx(0.5, frequency, ntd)
amfm = am[idx]
elif frq == "diurnal": # pr=1day
idx = prdday_to_frqidx(1, frequency, ntd)
amfm = am[idx]
if frq == "semi-annual": # 180day=<pr=<183day
idx2 = prdday_to_frqidx(180, frequency, ntd)
idx1 = prdday_to_frqidx(183, frequency, ntd)
amfm = np.amax(am[idx1 : idx2 + 1])
elif frq == "annual": # 360day=<pr=<366day
idx2 = prdday_to_frqidx(360, frequency, ntd)
idx1 = prdday_to_frqidx(366, frequency, ntd)
amfm = np.amax(am[idx1 : idx2 + 1])
elif frq == "sub-daily": # pr<1day
idx1 = prdday_to_frqidx(1, frequency, ntd)
amfm = np.nanmean(am[idx1 + 1 :])
elif frq == "synoptic": # 1day=<pr<20day
idx2 = prdday_to_frqidx(1, frequency, ntd)
idx1 = prdday_to_frqidx(20, frequency, ntd)
amfm = np.nanmean(am[idx1 + 1 : idx2 + 1])
elif frq == "sub-seasonal": # 20day=<pr<90day
idx2 = prdday_to_frqidx(20, frequency, ntd)
idx1 = prdday_to_frqidx(90, frequency, ntd)
amfm = np.nanmean(am[idx1 + 1 : idx2 + 1])
elif frq == "seasonal-annual": # 90day=<pr<365day
idx2 = prdday_to_frqidx(90, frequency, ntd)
idx1 = prdday_to_frqidx(365, frequency, ntd)
amfm = np.nanmean(am[idx1 + 1 : idx2 + 1])
elif frq == "interannual": # 365day=<pr
idx2 = prdday_to_frqidx(365, frequency, ntd)
amfm = np.nanmean(am[: idx2 + 1])

psdmfm[frc][mip][dat][var][dom][frq] = amfm.tolist()

res = os.path.basename(fname).split("_")[2].split(".")[1]

outdir = os.path.join(outdir, ver) # add version to outdir
if not (os.path.isdir(outdir)):
os.makedirs(outdir)

# Save as pickle for figure creation
outfile = open(
outdir + "/PS_pr.{0}_regrid.{1}_area.freq.mean.forced.pkl".format(hr, res), "wb"
)
pickle.dump(psdmfm, outfile)
outfile.close()

# Write to JSON file
outfile = open(
outdir + "/PS_pr.{0}_regrid.{1}_area.freq.mean.forced.json".format(hr, res), "w"
)
json.dump(psdmfm, outfile, sort_keys=True, indent=4, separators=(",", ": "))
outfile.close()
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import datetime
import glob
import os
import pickle

import xcdat as xc

# --------------------
# User settings here
# --------------------
# List of domain names
# e.g. ['CONUS']
domains = []
# Which ensembles are included
# e.g. ['cmip6','obs']
mips = []
# Input directories. Replace model, realization,
# and other variables with wildcard as needed
# Replace domain name references with '%(domain)'
model_root = ""
obs_root = ""
# Output directory (not including version)
outdir = ""

# -----------------------
# The rest of the script
# -----------------------
ver = datetime.datetime.now().strftime("v%Y%m%d")

frcs = ["forced", "unforced"]

vars = ["power", "rednoise", "sig95"]

psdm = {}

for ifc, frc in enumerate(frcs):
psdm[frc] = {}
for dom in domains:
for im, mip in enumerate(mips):
if mip not in psdm[frc]:
psdm[frc][mip] = {}
dir = "/"
if mip == "cmip6":
root = model_root.replace("%(domain)", dom)
elif mip == "obs":
root = obs_root.replace("%(domain)", dom)
print(root)
if frc == "forced":
file_list = sorted(
set(glob.glob(os.path.join(root, "PS*")))
- set(glob.glob(os.path.join(root, "PS*_unforced.nc")))
- set(glob.glob(os.path.join(root, "PS*.json")))
)
elif frc == "unforced":
file_list = sorted(
set(glob.glob(os.path.join(root, "PS*_unforced.nc")))
)

data = []
for file in file_list:
print("File (first loop): ", file)
if mip == "obs":
tmp = file.split("/")[-1].split("_")[3]
model = tmp.split(".")[0]
data.append(model)
else:
tmp = file.split("/")[-1].split("_")[3]
model = tmp.split(".")[0]
ens = tmp.split(".")[1]
data.append(model + "." + ens)
print(mip, "# of data:", len(data))
print("DATA: ", data)

for id, dat in enumerate(data):
# freqs = f[id]['freqs']
print("File (second loop): ", file_list[id])
frds = xc.open_dataset(file_list[id])
if dat not in psdm[frc][mip]:
psdm[frc][mip][dat] = {}
if "freqs" not in psdm[frc][mip][dat]:
psdm[frc][mip][dat]["freqs"] = list(frds["freqs"])
for iv, var in enumerate(vars):
print(frc, mip, dat, var)
if var not in psdm[frc][mip][dat]:
psdm[frc][mip][dat][var] = {}
am = frds.spatial.average(var, axis=["X", "Y"], weights="generate")[
var
]
psdm[frc][mip][dat][var][dom] = list(am.data)
frds.close()

res = os.path.basename(file_list[0]).split("_")[2].split(".")[1]
hr = os.path.basename(file_list[0]).split("_")[1].split(".")[1]

outdir = os.path.join(outdir, ver) # add version to outdir
if not (os.path.isdir(outdir)):
os.makedirs(outdir)

outfile = open(
os.path.join(outdir, "PS_pr.{0}_regrid.{1}_area.mean.pkl".format(hr, res)), "wb"
)
pickle.dump(psdm, outfile)
outfile.close()
Loading
Loading