Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use open_dataset if single file #1042

Merged
merged 7 commits into from
Feb 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 64 additions & 35 deletions pcmdi_metrics/io/xcdat_openxml.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,89 @@
import glob
import os
import sys
from typing import Union

import xcdat
import xarray as xr
import xcdat as xc
import xmltodict


def xcdat_open(infile, data_var=None, decode_times=True):
"""
Parameter
---------
infile:
list of string, or string
File(s) to open using xcdat
data_var:
(Optional[str], optional) – The key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None.

Output
------
ds:
xcdat dataset
def xcdat_open(
infile: Union[str, list], data_var: str = None, decode_times: bool = True
) -> xr.Dataset:
"""Open input file (netCDF, or xml generated by cdscan)

Parameters
----------
infile : Union[str, list]
list of string, or string, for path of file(s) to open using xcdat
data_var : str, optional
key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None, which loads all data variables
decode_times : bool, optional
If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects. Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True

Returns
-------
xr.Dataset
xarray dataset opened via xcdat

Usage
-----
>>> from pcmdi_metrics.io import xcdat_open
# Open a single netCDF file
>>> ds = xcdat_open('mydata.nc')
# Open multiple files
>>> ds2 = xcdat_open(['mydata1.nc', 'mydata2.nc'] # Open multipe netCDF files
# Open with specifing the variable 'ts'
>>> ds3 = xcdat_open(['mydata1.nc', 'mydata2.nc'], data_var='ts')
# Open an xml file
>>> ds = xcdat_open('mydata.xml')
"""
if isinstance(infile, list):
ds = xcdat.open_mfdataset(infile, data_var=data_var, decode_times=decode_times)
ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times)
else:
if infile.split(".")[-1].lower() == "xml":
ds = xcdat_openxml(infile, data_var=data_var, decode_times=decode_times)
else:
ds = xcdat.open_mfdataset(
infile, data_var=data_var, decode_times=decode_times
)
ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times)

return ds


def xcdat_openxml(xmlfile, data_var=None, decode_times=True):
"""
Parameter
---------
infile:
xml file to open using xcdat
data_var:
(Optional[str], optional) – The key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None.

Output
------
ds:
xcdat dataset
def xcdat_openxml(
xmlfile: str, data_var: str = None, decode_times: bool = True
) -> xr.Dataset:
"""Open input file (xml generated by cdscan)

Parameters
----------
infile: str
path of xml file to open using xcdat
data_var: str, optional
key of the non-bounds data variable to keep in the Dataset, alongside any existing bounds data variables, by default None, which loads all data variables
decode_times : bool, optional
If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects. Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True

Returns
-------
xr.Dataset
xarray dataset opened via xcdat
"""
if not os.path.exists(xmlfile):
sys.exit("ERROR: File not exist: {}".format(xmlfile))
sys.exit(f"ERROR: File not exist: {xmlfile}")

with open(xmlfile) as fd:
with open(xmlfile, encoding="utf-8") as fd:
doc = xmltodict.parse(fd.read())

ncfile_list = glob.glob(os.path.join(doc["dataset"]["@directory"], "*.nc"))
ds = xcdat.open_mfdataset(ncfile_list, data_var=data_var, decode_times=decode_times)

if len(ncfile_list) > 1:
ds = xc.open_mfdataset(
ncfile_list, data_var=data_var, decode_times=decode_times
)
else:
ds = xc.open_dataset(
ncfile_list[0], data_var=data_var, decode_times=decode_times
)

return ds
Loading