PCMDI · lee1043 · Apr 16, 2023 · Mar 30, 2023 · Mar 30, 2023 · Mar 30, 2023
diff --git a/pcmdi_metrics/graphics/parallel_coordinate_plot/parallel_coordinate_plot_lib.py b/pcmdi_metrics/graphics/parallel_coordinate_plot/parallel_coordinate_plot_lib.py
@@ -37,8 +37,11 @@ def parallel_coordinate_plot(
  comparing_models=None,
  fill_between_lines=False,
  fill_between_lines_colors=("green", "red"),
- median_centered=False,
- median_line=False,
+ vertical_center=None,
+ vertical_center_line=False,
+ vertical_center_line_label=None,
+ ymax=None,
+ ymin=None,
 ):
  """
  Parameters
@@ -72,8 +75,11 @@ def parallel_coordinate_plot(
  - `comparing_models`: tuple or list containing two strings for models to compare with colors filled between the two lines.
  - `fill_between_lines`: bool, default=False, fill color between lines for models in comparing_models
  - `fill_between_lines_colors`: tuple or list containing two strings for colors filled between the two lines. Default=('green', 'red')
- - `median_centered`: bool, default=False, adjust range of vertical axis to set center of vertical axis as median
- - `median_line`: bool, default=False, show median as line
+ - `vertical_center`: string ("median", "mean")/float/integer, default=None, adjust range of vertical axis to set center of vertical axis as median, mean, or given number
+ - `vertical_center_line`: bool, default=False, show median as line
+ - `vertical_center_line_label`: str, default=None, label in legend for the horizontal vertical center line. If not given, it will be automatically assigned. It can be turned off by "off"
+ - `ymax`: int or float, default=None, specify value of vertical axis top
+ - `ymin`: int or float, default=None, specify value of vertical axis bottom
 
  Return
  ------
@@ -82,9 +88,10 @@ def parallel_coordinate_plot(
 
  Author: Jiwoo Lee @ LLNL (2021. 7)
  Update history: 
+ 2021-07 Plotting code created. Inspired by https://stackoverflow.com/questions/8230638/parallel-coordinates-plot-in-matplotlib
  2022-09 violin plots added
  2023-03 median centered option added
- Inspired by https://stackoverflow.com/questions/8230638/parallel-coordinates-plot-in-matplotlib
+ 2023-04 vertical center option diversified (median, mean, or given number)
  """
  params = {
  "legend.fontsize": "large",
@@ -99,14 +106,16 @@ def parallel_coordinate_plot(
  _quick_qc(data, model_names, metric_names, model_names2=model_names2)
 
  # Transform data for plotting
- zs, zs_meds, N, ymins, ymaxs, df_stacked, df2_stacked = _data_transform(
+ zs, zs_middle, N, ymins, ymaxs, df_stacked, df2_stacked = _data_transform(
  data,
  metric_names,
  model_names,
  model_names2=model_names2,
  group1_name=group1_name,
  group2_name=group2_name,
- median_centered=median_centered,
+ vertical_center=vertical_center,
+ ymax=ymax,
+ ymin=ymin,
  )
 
  # Prepare plot
@@ -226,8 +235,12 @@ def parallel_coordinate_plot(
  clip_on=False,
  )
 
- if median_line: 
- ax.plot(range(N), zs_meds, "-", c="k", label="median", lw=1)
+ if vertical_center_line:
+ if vertical_center_line_label is None:
+ vertical_center_line_label = str(vertical_center)
+ elif vertical_center_line_label == "off":
+ vertical_center_line_label = None 
+ ax.plot(range(N), zs_middle, "-", c="k", label=vertical_center_line_label, lw=1)
 
  # Fill between lines
  if fill_between_lines and (comparing_models is not None):
@@ -309,30 +322,54 @@ def _data_transform(
  model_names2=None,
  group1_name="group1",
  group2_name="group2",
- median_centered=False,
+ vertical_center=None,
+ ymax=None,
+ ymin=None,
 ):
  # Data to plot
  ys = data # stacked y-axis values
  N = ys.shape[1] # number of vertical axis (i.e., =len(metric_names))
- ymins = np.nanmin(ys, axis=0) # minimum (ignore nan value)
- ymaxs = np.nanmax(ys, axis=0) # maximum (ignore nan value)
+ if ymax is None:
+ ymaxs = np.nanmax(ys, axis=0) # maximum (ignore nan value)
+ else:
+ ymaxs = np.repeat(ymax, N)
+
+ if ymin is None:
+ ymins = np.nanmin(ys, axis=0) # minimum (ignore nan value)
+ else:
+ ymins = np.repeat(ymin, N)
+
  ymeds = np.nanmedian(ys, axis=0) # median
- if median_centered:
+ ymean = np.nanmean(ys, axis=0) # mean
+
+ if vertical_center is not None:
+ if vertical_center == "median":
+ ymids = ymeds
+ elif vertical_center == "mean":
+ ymids = ymean
+ else:
+ ymids = np.repeat(vertical_center, N)
  for i in range(0, N):
- max_distance_from_median = max(abs(ymaxs[i] - ymeds[i]), abs(ymeds[i] - ymins[i]))
- ymaxs[i] = ymeds[i] + max_distance_from_median
- ymins[i] = ymeds[i] - max_distance_from_median
+ max_distance_from_middle = max(abs(ymaxs[i] - ymids[i]), abs(ymids[i] - ymins[i]))
+ ymaxs[i] = ymids[i] + max_distance_from_middle
+ ymins[i] = ymids[i] - max_distance_from_middle
+
  dys = ymaxs - ymins
- ymins -= dys * 0.05 # add 5% padding below and above
- ymaxs += dys * 0.05
+ if ymin is None:
+ ymins -= dys * 0.05 # add 5% padding below and above
+ if ymax is None:
+ ymaxs += dys * 0.05
  dys = ymaxs - ymins
 
  # Transform all data to be compatible with the main axis
  zs = np.zeros_like(ys)
  zs[:, 0] = ys[:, 0]
  zs[:, 1:] = (ys[:, 1:] - ymins[1:]) / dys[1:] * dys[0] + ymins[0]
 
- zs_meds = (ymeds[:] - ymins[:]) / dys[:] * dys[0] + ymins[0]
+ if vertical_center is not None:
+ zs_middle = (ymids[:] - ymins[:]) / dys[:] * dys[0] + ymins[0]
+ else:
+ zs_middle = (ymaxs[:] - ymins[:]) / 2 / dys[:] * dys[0] + ymins[0]
 
  if model_names2 is not None:
  print("Models in the second group:", model_names2)
@@ -355,7 +392,7 @@ def _data_transform(
  group2_name=group2_name,
  )
 
- return zs, zs_meds, N, ymins, ymaxs, df_stacked, df2_stacked
+ return zs, zs_middle, N, ymins, ymaxs, df_stacked, df2_stacked
 
 
 def _to_pd_dataframe(

diff --git a/pcmdi_metrics/mean_climate/lib/load_and_regrid.py b/pcmdi_metrics/mean_climate/lib/load_and_regrid.py
@@ -64,6 +64,16 @@ def load_and_regrid(data_path, varname, varname_in_file=None, level=None, t_grid
  if varname != varname_in_file:
  ds_regridded[varname] = ds_regridded[varname_in_file]
 
+ # preserve units
+ try:
+ units = ds[varname].units
+ except Exception as e:
+ print(e)
+ units = "" 
+ print('units:', units)
+
+ ds_regridded[varname] = ds_regridded[varname].assign_attrs({'units': units})
+
  if debug:
  print('ds_regridded:', ds_regridded)
  return ds_regridded
diff --git a/pcmdi_metrics/mean_climate/mean_climate_driver.py b/pcmdi_metrics/mean_climate/mean_climate_driver.py
@@ -162,6 +162,8 @@
  if level is not None:
  result_dict["Variable"]["level"] = level*100 # hPa to Pa
 
+ result_dict['References'] = dict()
+
  # ----------------
  # observation loop
  # ----------------
@@ -180,6 +182,8 @@
  # load data and regrid
  ds_ref = load_and_regrid(data_path=ref_data_full_path, varname=varname, level=level, t_grid=t_grid, decode_times=False, regrid_tool=regrid_tool, debug=debug)
  ds_ref_dict = OrderedDict()
+ # for record in output json
+ result_dict['References'][ref] = obs_dict[varname][ref_dataset_name]
 
  # ----------
  # model loop
@@ -189,6 +193,8 @@
  print('=================================')
  print('model, runs, find_all_realizations:', model, realizations, find_all_realizations)
 
+ result_dict["RESULTS"][model][ref]["source"] = ref_dataset_name 
+
  if find_all_realizations:
  test_data_full_path = os.path.join(
  test_data_path,
@@ -215,6 +221,8 @@
  # load data and regrid
  ds_test = load_and_regrid(data_path=test_data_full_path, varname=varname, varname_in_file=varname_testdata, level=level, t_grid=t_grid, decode_times=True, regrid_tool=regrid_tool, debug=debug)
  print('load and regrid done')
+ result_dict["RESULTS"][model]["units"] = ds_test[varname].units
+ result_dict["RESULTS"][model][ref][run]["InputClimatologyFileName"] = test_data_full_path.split('/')[-1] 
 
  # -----------
  # region loop
@@ -267,7 +275,6 @@
  # compute metrics
  print('compute metrics start')
  result_dict["RESULTS"][model][ref][run][region] = compute_metrics(varname, ds_test_dict[region], ds_ref_dict[region], debug=debug)
- result_dict["RESULTS"][model][ref]["source"] = ref_dataset_name 
 
  # write individual JSON
  # --- single simulation, obs (need to accumulate later) / single variable

diff --git a/pcmdi_metrics/mean_climate/param/pcmdi_MIP_EXP_pmp_parameterfile.py b/pcmdi_metrics/mean_climate/param/pcmdi_MIP_EXP_pmp_parameterfile.py
@@ -10,10 +10,10 @@
 # ###############################################################################
 case_id = ver
 
-# MIP = 'cmip6' # 'CMIP6'
-MIP = 'cmip5' # 'CMIP6'
-exp = 'historical'
-# exp = 'amip'
+MIP = 'cmip6' # 'CMIP6'
+# MIP = 'cmip5' # 'CMIP6'
+# exp = 'historical'
+exp = 'amip'
 # exp = 'picontrol'
 
 user_notes = "Provenance and results"
@@ -24,9 +24,11 @@
 # ################################################################
 
 if MIP == 'cmip6':
- modver = 'v20230202'
+ # modver = 'v20230202'
+ modver = 'v20230324'
 if MIP == 'cmip5':
- modver = 'v20230208'
+ # modver = 'v20230208'
+ modver = 'v20230324'
 
 # LIST OF MODEL VERSIONS TO BE TESTED - WHICH ARE EXPECTED TO BE PART OF CLIMATOLOGY FILENAME
 

diff --git a/pcmdi_metrics/mean_climate/scripts/allvars_parallel_mod_clims.py b/pcmdi_metrics/mean_climate/scripts/allvars_parallel_mod_clims.py
@@ -12,35 +12,26 @@ def find_latest(path):
 
 mip = 'cmip5'
 # mip = 'cmip6'
-exp = 'historical'
-# exp = 'amip'
-# verin = 'v20230201'
+# exp = 'historical'
+exp = 'amip'
+
 data_path = find_latest("/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest")
+pathout_base = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2/diagnostic_results/CMIP_CLIMS/' + mip + '/' + exp + '/'
 start = '1981-01'
 end = '2005-12'
 numw = 20 # number of workers in parallel processing
 verout = datetime.datetime.now().strftime('v%Y%m%d')
 
-# vars = ['rlut', 'tas', 'pr']
-# vars = ['ts', 'tas', 'uas', 'vas', 'huss', 'hurs', 'psl', 'prw', 'sfcWind', 'tauu', 'tauv', 'pr', 'rlut', 'rsut', 'rlutcs', 'rsutcs', 'rsdt', 'rsus', 'rsds', 'rlds', 'rlus', 'rldscs', 'rsdscs']
-# vars = ['ta', 'ua', 'va', 'zg', 'hur', 'hus']
-# vars = ['ts', 'tas', 'uas', 'vas', 'huss', 'hurs', 'psl', 'prw', 'sfcWind', 'tauu', 'tauv', 'pr', 'rlut', 'rsut', 'rlutcs', 'rsutcs', 'rsdt', 'rsus', 'rsds', 'rlds', 'rlus', 'rldscs', 'rsdscs', 'ta', 'ua', 'va', 'zg', 'hur', 'hus']
-# vars = ['ts', 'pr', 'tas', 'uas', 'vas', 'huss', 'hurs', 'psl', 'prw', 'sfcWind', 'tauu', 'tauv', 'rlut', 'rsut', 'rlutcs', 'rsutcs', 'rsdt', 'rsus', 'rsds', 'rlds', 'rlus', 'rldscs', 'rsdscs', 'ta', 'ua', 'va', 'zg', 'hur']
-vars = ['hur', 'hurs', 'huss', 'pr', 'prw', 'psl', 'rlds', 'rldscs', 'rlus', 'rlut', 'rlutcs', 'rsds', 'rsdscs', 'rsdt', 'rsus', 'rsut', 'rsutcs', 'sfcWind', 'ta', 'tas', 'tauu', 'tauv', 'ts', 'ua', 'uas', 'va', 'vas', 'zg']
-# vars = ['ts', 'pr']
+vars = ['hur', 'hurs', 'huss', 'pr', 'prw', 'psl', 'rlds', 'rldscs', 'rlus', 'rlut', 'rlutcs', 'rsds', 'rsdscs', 'rsdt', 'rsus', 'rsut', 'rsutcs', 'sfcWind', 'ta', 'tas', 'tauu', 'tauv', 'ts', 'ua', 'uas', 'va', 'vas', 'wap', 'zg']
 
 lst1 = []
 listlog = []
 
 for var in vars:
- # pin = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest/' + verin + '/' + mip + '/' + exp + '/atmos/mon/' + var + '/'
  pin = os.path.join(data_path, mip, exp,'atmos', 'mon', var)
-
  lst = sorted(glob.glob(os.path.join(pin, '*r1i1p1*.xml')))
 
- pathout_base = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2/diagnostic_results/CMIP_CLIMS/' + mip + '/' + exp + '/'
  pathoutdir = os.path.join(pathout_base, verout, var)
-
  os.makedirs(pathoutdir, exist_ok=True)
 
  for li in lst:

diff --git a/pcmdi_metrics/mean_climate/scripts/get_all_MIP_mods_from_CLIMS.py b/pcmdi_metrics/mean_climate/scripts/get_all_MIP_mods_from_CLIMS.py
@@ -1,15 +1,13 @@
 import glob
 import json
 
-ver = 'v20230208'
+ver = 'v20230324'
 
 pin = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2/diagnostic_results/CMIP_CLIMS/%(MIP)/%(EXP)/' + ver + '/ts/'
 
-# MIPS = ['cmip6', 'cmip5']
+MIPS = ['cmip6', 'cmip5']
 # exps = ['historical', 'amip']
-
-MIPS = ['cmip5']
-exps = ['historical']
+exps = ['amip']
 
 mod_dic = {}
 

diff --git a/pcmdi_metrics/mean_climate/scripts/mk_CRF_clims.py b/pcmdi_metrics/mean_climate/scripts/mk_CRF_clims.py
@@ -12,20 +12,20 @@
 cdms.setNetcdfDeflateFlag(0)
 cdms.setNetcdfDeflateLevelFlag(0)
 
-exp = 'historical'
-# exp = 'amip'
+#exp = 'historical'
+exp = 'amip'
 
-# MIP = 'cmip6' # 'CMIP6'
-MIP = 'cmip5' # 'CMIP5'
+MIP = 'cmip6' # 'CMIP6'
+# MIP = 'cmip5' # 'CMIP5'
 
 if MIP == 'cmip6':
- ver = 'v20230202'
+ ver = 'v20230324'
 if MIP == 'cmip5':
- ver = 'v20230208'
+ ver = 'v20230324'
 
 # NEED TO RUN SEPERATELY FOR LW AND SW (i.e., rsut and rlut)
-radvar = 'rsut'
-# radvar = 'rlut'
+# radvar = 'rsut'
+radvar = 'rlut'
 
 pit = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2/diagnostic_results/CMIP_CLIMS/' + MIP + '/' + exp + '/' + ver + '/'
 pi = pit + radvar + 'cs/'

diff --git a/pcmdi_metrics/version.py b/pcmdi_metrics/version.py
@@ -1,3 +1,3 @@
 __version__ = 'v3.0.2'
-__git_tag_describe__ = 'v3.0.2-4-g110dcf50'
-__git_sha1__ = '110dcf50d933e099ee063c8b0a64644635d0af12'
+__git_tag_describe__ = 'v3.0.2-11-g06b151f'
+__git_sha1__ = '06b151f46b182259277ddc6b9e25ea9ff08b1f50'