From 140353d94ebf1c568eb1f7c4efe7a6abf7d8cc37 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 1 Feb 2024 11:40:15 -0800 Subject: [PATCH 1/6] Use open_dataset if single file Update xcdat_openxml.py to use `open_dataset` instead of `open_mfdataset` to save memory, considering discussions at https://github.com/PCMDI/pcmdi_metrics/pull/1041#issuecomment-1922035850 --- pcmdi_metrics/io/xcdat_openxml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index fe245eac8..622a0e9bc 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -27,7 +27,7 @@ def xcdat_open(infile, data_var=None, decode_times=True): if infile.split(".")[-1].lower() == "xml": ds = xcdat_openxml(infile, data_var=data_var, decode_times=decode_times) else: - ds = xcdat.open_mfdataset( + ds = xcdat.open_dataset( infile, data_var=data_var, decode_times=decode_times ) From cc5683be4be8f732ea7f0f67fb3300f1e4adad5c Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 1 Feb 2024 11:55:39 -0800 Subject: [PATCH 2/6] improve doc --- pcmdi_metrics/io/xcdat_openxml.py | 85 ++++++++++++++++++------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index 622a0e9bc..e665efa5e 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -1,60 +1,75 @@ import glob import os import sys +from typing import Union -import xcdat +import xarray as xr +import xcdat as xc import xmltodict -def xcdat_open(infile, data_var=None, decode_times=True): - """ - Parameter - --------- - infile: - list of string, or string - File(s) to open using xcdat - data_var: - (Optional[str], optional) – The key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None. - - Output - ------ - ds: - xcdat dataset +def xcdat_open( + infile: Union[str, list], data_var: str = None, decode_times: bool = True +) -> xr.Dataset: + """Open input file (netCDF, or xml generated by cdscan) + + Parameters + ---------- + infile : Union[str, list] + list of string, or string, for path of file(s) to open using xcdat + data_var : str, optional + key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None, which loads all data variables + decode_times : bool, optional + If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects. Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True + + Returns + ------- + xr.Dataset + xarray dataset opened via xcdat """ if isinstance(infile, list): - ds = xcdat.open_mfdataset(infile, data_var=data_var, decode_times=decode_times) + ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times) else: if infile.split(".")[-1].lower() == "xml": ds = xcdat_openxml(infile, data_var=data_var, decode_times=decode_times) else: - ds = xcdat.open_dataset( - infile, data_var=data_var, decode_times=decode_times - ) + ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times) return ds -def xcdat_openxml(xmlfile, data_var=None, decode_times=True): - """ - Parameter - --------- - infile: - xml file to open using xcdat - data_var: - (Optional[str], optional) – The key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None. - - Output - ------ - ds: - xcdat dataset +def xcdat_openxml( + xmlfile: str, data_var: str = None, decode_times: bool = True +) -> xr.Dataset: + """Open input file (xml generated by cdscan) + + Parameters + ---------- + infile: str + path of xml file to open using xcdat + data_var: str, optional + key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None, which loads all data variables + decode_times : bool, optional + If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects. Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True + + Returns + ------- + xr.Dataset + xarray dataset opened via xcdat """ if not os.path.exists(xmlfile): - sys.exit("ERROR: File not exist: {}".format(xmlfile)) + sys.exit(f"ERROR: File not exist: {xmlfile}") - with open(xmlfile) as fd: + with open(xmlfile, encoding="utf-8") as fd: doc = xmltodict.parse(fd.read()) ncfile_list = glob.glob(os.path.join(doc["dataset"]["@directory"], "*.nc")) - ds = xcdat.open_mfdataset(ncfile_list, data_var=data_var, decode_times=decode_times) + + if len(ncfile_list) > 1: + ds = xc.open_mfdataset( + ncfile_list, data_var=data_var, decode_times=decode_times + ) + else: + ds = xc.open_dataset(ncfile_list, data_var=data_var, decode_times=decode_times) return ds From d856a62a1080a3b6d15db3860e640999d5fea95f Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 1 Feb 2024 11:59:51 -0800 Subject: [PATCH 3/6] add usage example to inline doc --- pcmdi_metrics/io/xcdat_openxml.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index e665efa5e..977b4559d 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -26,6 +26,18 @@ def xcdat_open( ------- xr.Dataset xarray dataset opened via xcdat + + Usage + ----- + >>> from pcmdi_metrics.io import xcdat_open + # Open a single netCDF file + >>> ds = xcdat_open('mydata.nc') + # Open multiple files + >>> ds2 = xcdat_open(['mydata1.nc', 'mydata2.nc'] # Open multipe netCDF files + # Open with specifing the variable 'ts' + >>> ds3 = xcdat_open(['mydata1.nc', 'mydata2.nc'], data_var='ts') + # Open an xml file + >>> ds = xcdat_open('mydata.xml') """ if isinstance(infile, list): ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times) From 04a5d5c2d13a61840e6f4da08c7eb71991864fca Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 1 Feb 2024 12:10:40 -0800 Subject: [PATCH 4/6] pre-commit clean up --- pcmdi_metrics/io/xcdat_openxml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index 977b4559d..8809f370b 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -26,7 +26,7 @@ def xcdat_open( ------- xr.Dataset xarray dataset opened via xcdat - + Usage ----- >>> from pcmdi_metrics.io import xcdat_open @@ -35,7 +35,7 @@ def xcdat_open( # Open multiple files >>> ds2 = xcdat_open(['mydata1.nc', 'mydata2.nc'] # Open multipe netCDF files # Open with specifing the variable 'ts' - >>> ds3 = xcdat_open(['mydata1.nc', 'mydata2.nc'], data_var='ts') + >>> ds3 = xcdat_open(['mydata1.nc', 'mydata2.nc'], data_var='ts') # Open an xml file >>> ds = xcdat_open('mydata.xml') """ From d1e41978f86a22fe2a9ccb2c32d85694d7e0cf00 Mon Sep 17 00:00:00 2001 From: lee1043 Date: Thu, 1 Feb 2024 12:26:04 -0800 Subject: [PATCH 5/6] bug fix --- pcmdi_metrics/io/xcdat_openxml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index 8809f370b..b4d6a4c3d 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -82,6 +82,6 @@ def xcdat_openxml( ncfile_list, data_var=data_var, decode_times=decode_times ) else: - ds = xc.open_dataset(ncfile_list, data_var=data_var, decode_times=decode_times) + ds = xc.open_dataset(ncfile_list[0], data_var=data_var, decode_times=decode_times) return ds From 194992811e3634778b1df099522dafc886a96256 Mon Sep 17 00:00:00 2001 From: lee1043 Date: Thu, 1 Feb 2024 12:30:15 -0800 Subject: [PATCH 6/6] pre-commit clean up --- pcmdi_metrics/io/xcdat_openxml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index b4d6a4c3d..8b42b964b 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -82,6 +82,8 @@ def xcdat_openxml( ncfile_list, data_var=data_var, decode_times=decode_times ) else: - ds = xc.open_dataset(ncfile_list[0], data_var=data_var, decode_times=decode_times) + ds = xc.open_dataset( + ncfile_list[0], data_var=data_var, decode_times=decode_times + ) return ds