SciTools · pp-mo · Jun 23, 2022 · Oct 11, 2022 · Oct 12, 2022 · Oct 12, 2022
diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py
@@ -89,11 +89,11 @@ def callback(cube, field, filename):
 
 """
 
+from collections.abc import Iterable
 import contextlib
 import glob
 import itertools
 import os.path
-import pathlib
 import threading
 
 import iris._constraints
@@ -251,7 +251,8 @@ def context(self, **kwargs):
 
 def _generate_cubes(uris, callback, constraints):
  """Returns a generator of cubes given the URIs and a callback."""
- if isinstance(uris, (str, pathlib.PurePath)):
+ if isinstance(uris, str) or not isinstance(uris, Iterable):
+ # Make a string, or other single item, into an iterable.
  uris = [uris]
 
  # Group collections of uris by their iris handler
@@ -268,6 +269,10 @@ def _generate_cubes(uris, callback, constraints):
  urls = [":".join(x) for x in groups]
  for cube in iris.io.load_http(urls, callback):
  yield cube
+ elif scheme == "data":
+ data_objects = [x[1] for x in groups]
+ for cube in iris.io.load_data_objects(data_objects, callback):
+ yield cube
  else:
  raise ValueError("Iris cannot handle the URI scheme: %s" % scheme)
 

diff --git a/lib/iris/fileformats/__init__.py b/lib/iris/fileformats/__init__.py
@@ -9,6 +9,7 @@
 """
 
 from iris.io.format_picker import (
+ DataSourceObjectProtocol,
  FileExtension,
  FormatAgent,
  FormatSpecification,
@@ -125,16 +126,32 @@ def _load_grib(*args, **kwargs):
 )
 
 
-_nc_dap = FormatSpecification(
- "NetCDF OPeNDAP",
- UriProtocol(),
- lambda protocol: protocol in ["http", "https"],
- netcdf.load_cubes,
- priority=6,
- constraint_aware_handler=True,
+FORMAT_AGENT.add_spec(
+ FormatSpecification(
+ "NetCDF OPeNDAP",
+ UriProtocol(),
+ lambda protocol: protocol in ["http", "https"],
+ netcdf.load_cubes,
+ priority=6,
+ constraint_aware_handler=True,
+ )
 )
-FORMAT_AGENT.add_spec(_nc_dap)
-del _nc_dap
+
+# NetCDF file presented as an open, readable netCDF4 dataset (or mimic).
+FORMAT_AGENT.add_spec(
+ FormatSpecification(
+ "NetCDF dataset",
+ DataSourceObjectProtocol(),
+ lambda object: all(
+ hasattr(object, x)
+ for x in ("variables", "dimensions", "groups", "ncattrs")
+ ),
+ netcdf.load_cubes, # using the same call : it must distinguish.
+ priority=4,
+ constraint_aware_handler=True,
+ )
+)
+
 
 #
 # UM Fieldsfiles.

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
@@ -1043,15 +1043,23 @@ class CFReader:
  # TODO: remove once iris.experimental.ugrid.CFUGridReader is folded in.
  CFGroup = CFGroup
 
- def __init__(self, filename, warn=False, monotonic=False):
- self._dataset = None
- self._filename = os.path.expanduser(filename)
+ def __init__(self, file_source, warn=False, monotonic=False):
+ # Ensure safe operation for destructor, should init fail.
+ self._own_file = False
+ if isinstance(file_source, str):
+ # Create from filepath : open it + own it (=close when we die).
+ self._filename = os.path.expanduser(file_source)
+ self._dataset = netCDF4.Dataset(self._filename, mode="r")
+ self._own_file = True
+ else:
+ # We have been passed an open dataset.
+ # We use it but don't own it (don't close it).
+ self._dataset = file_source
+ self._filename = self._dataset.filepath()
 
  #: Collection of CF-netCDF variables associated with this netCDF file
  self.cf_group = self.CFGroup()
 
- self._dataset = netCDF4.Dataset(self._filename, mode="r")
-
  # Issue load optimisation warning.
  if warn and self._dataset.file_format in [
  "NETCDF3_CLASSIC",
@@ -1296,7 +1304,7 @@ def _reset(self):
 
  def __del__(self):
  # Explicitly close dataset to prevent file remaining open.
- if self._dataset is not None:
+ if self._own_file and self._dataset is not None:
  self._dataset.close()
 
 

diff --git a/lib/iris/fileformats/netcdf/loader.py b/lib/iris/fileformats/netcdf/loader.py
@@ -13,6 +13,7 @@
 Also : `CF Conventions <https://cfconventions.org/>`_.
 
 """
+from collections.abc import Iterable
 import warnings
 
 import netCDF4
@@ -494,14 +495,15 @@ def inner(cf_datavar):
  return result
 
 
-def load_cubes(filenames, callback=None, constraints=None):
+def load_cubes(file_sources, callback=None, constraints=None):
  """
- Loads cubes from a list of NetCDF filenames/OPeNDAP URLs.
+ Loads cubes from a list of NetCDF file_sources/OPeNDAP URLs.
 
  Args:
 
- * filenames (string/list):
- One or more NetCDF filenames/OPeNDAP URLs to load from.
+ * file_sources (string/list):
+ One or more NetCDF file_sources/OPeNDAP URLs to load from.
+ OR open datasets.
 
  Kwargs:
 
@@ -529,17 +531,17 @@ def load_cubes(filenames, callback=None, constraints=None):
  # Create an actions engine.
  engine = _actions_engine()
 
- if isinstance(filenames, str):
- filenames = [filenames]
+ if isinstance(file_sources, str) or not isinstance(file_sources, Iterable):
+ file_sources = [file_sources]
 
- for filename in filenames:
- # Ingest the netCDF file.
+ for file_source in file_sources:
+ # Ingest the file. At present may be a filepath or an open netCDF4.Dataset.
  meshes = {}
  if PARSE_UGRID_ON_LOAD:
- cf = CFUGridReader(filename)
+ cf = CFUGridReader(file_source)
  meshes = _meshes_from_cf(cf)
  else:
- cf = iris.fileformats.cf.CFReader(filename)
+ cf = iris.fileformats.cf.CFReader(file_source)
 
  # Process each CF data variable.
  data_variables = list(cf.cf_group.data_variables.values()) + list(
@@ -571,7 +573,7 @@ def load_cubes(filenames, callback=None, constraints=None):
  if mesh is not None:
  mesh_coords, mesh_dim = _build_mesh_coords(mesh, cf_var)
 
- cube = _load_cube(engine, cf, cf_var, filename)
+ cube = _load_cube(engine, cf, cf_var, cf.filename)
 
  # Attach the mesh (if present) to the cube.
  for mesh_coord in mesh_coords:
@@ -585,7 +587,7 @@ def load_cubes(filenames, callback=None, constraints=None):
  warnings.warn("{}".format(e))
 
  # Perform any user registered callback function.
- cube = run_callback(callback, cube, cf_var, filename)
+ cube = run_callback(callback, cube, cf_var, file_source)
 
  # Callback mechanism may return None, which must not be yielded
  if cube is None:

diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -501,6 +501,7 @@ def __init__(self, filename, netcdf_format):
 
  * filename (string):
  Name of the netCDF file to save the cube.
+ OR a writeable object supporting the netCF4.Dataset api.
 
  * netcdf_format (string):
  Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC',
@@ -543,29 +544,37 @@ def __init__(self, filename, netcdf_format):
  #: A dictionary, mapping formula terms to owner cf variable name
  self._formula_terms_cache = {}
  #: NetCDF dataset
- try:
- self._dataset = netCDF4.Dataset(
- filename, mode="w", format=netcdf_format
- )
- except RuntimeError:
- dir_name = os.path.dirname(filename)
- if not os.path.isdir(dir_name):
- msg = "No such file or directory: {}".format(dir_name)
- raise IOError(msg)
- if not os.access(dir_name, os.R_OK | os.W_OK):
- msg = "Permission denied: {}".format(filename)
- raise IOError(msg)
- else:
- raise
+ self._dataset = None # this line just for the API page
+
+ # Detect if we were passed a pre-opened dataset
+ self._to_open_dataset = hasattr(filename, "createVariable")
+ if self._to_open_dataset:
+ self._dataset = filename
+ else:
+ try:
+ self._dataset = netCDF4.Dataset(
+ filename, mode="w", format=netcdf_format
+ )
+ except RuntimeError:
+ dir_name = os.path.dirname(filename)
+ if not os.path.isdir(dir_name):
+ msg = "No such file or directory: {}".format(dir_name)
+ raise IOError(msg)
+ if not os.access(dir_name, os.R_OK | os.W_OK):
+ msg = "Permission denied: {}".format(filename)
+ raise IOError(msg)
+ else:
+ raise
 
  def __enter__(self):
  return self
 
  def __exit__(self, type, value, traceback):
  """Flush any buffered data to the CF-netCDF file before closing."""
-
  self._dataset.sync()
- self._dataset.close()
+ if not self._to_open_dataset:
+ # Only close if the Saver created it.
+ self._dataset.close()
 
  def write(
  self,

diff --git a/lib/iris/io/__init__.py b/lib/iris/io/__init__.py
@@ -89,6 +89,8 @@ def decode_uri(uri, default="file"):
  In addition to well-formed URIs, it also supports bare file paths as strings
  or :class:`pathlib.PurePath`. Both Windows and UNIX style paths are
  accepted.
+ It also supports 'bare objects', i.e. anything which is not a string.
+ These are identified with a scheme of 'data', and returned unchanged.
 
  .. testsetup::
 
@@ -114,20 +116,31 @@ def decode_uri(uri, default="file"):
  >>> print(decode_uri('dataZoo/...'))
  ('file', 'dataZoo/...')
 
+ >>> print(decode_uri({}))
+ ('data', {})
+
  """
  if isinstance(uri, pathlib.PurePath):
  uri = str(uri)
- # make sure scheme has at least 2 letters to avoid windows drives
- # put - last in the brackets so it refers to the character, not a range
- # reference on valid schemes: https://tools.ietf.org/html/std66#section-3.1
- match = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]+):(.+)", uri)
- if match:
- scheme = match.group(1)
- part = match.group(2)
+
+ if isinstance(uri, str):
+ # make sure scheme has at least 2 letters to avoid windows drives
+ # put - last in the brackets so it refers to the character, not a range
+ # reference on valid schemes: https://tools.ietf.org/html/std66#section-3.1
+ match = re.match(r"^([a-zA-Z][a-zA-Z0-9+.-]+):(.+)", uri)
+ if match:
+ scheme = match.group(1)
+ part = match.group(2)
+ else:
+ # Catch bare UNIX and Windows paths
+ scheme = default
+ part = uri
  else:
- # Catch bare UNIX and Windows paths
- scheme = default
+ # We can pass things other than strings, like open files.
+ # These are simply identified as 'data objects'.
+ scheme = "data"
  part = uri
+
  return scheme, part
 
 
@@ -250,6 +263,25 @@ def load_http(urls, callback):
  yield cube
 
 
+def load_data_objects(urls, callback):
+ """
+ Takes a list of data-source objects and a callback function, and returns a
+ generator of Cubes.
+ The 'objects' take the place of 'uris' in the load calls.
+ The appropriate types of the data-source objects are expected to be
+ recognised by the handlers : This is done in the usual way by passing the
+ context to the format picker to get a handler for each.
+
+ .. note::
+
+ Typically, this function should not be called directly; instead, the
+ intended interface for loading is :func:`iris.load`.
+
+ """
+ # NOTE: this operation is currently *identical* to the http one.
+ yield from load_http(urls, callback)
+
+
 def _dot_save(cube, target):
  # A simple wrapper for `iris.fileformats.dot.save` which allows the
  # saver to be registered without triggering the import of

diff --git a/lib/iris/io/format_picker.py b/lib/iris/io/format_picker.py
@@ -330,3 +330,22 @@ def get_element(self, basename, file_handle):
  from iris.io import decode_uri
 
  return decode_uri(basename)[0]
+
+
+class DataSourceObjectProtocol(FileElement):
+ """
+ A :class:`FileElement` that simply returns the URI entry itself.
+
+ This enables a arbitrary non-string data object to be passed, subject to
+ subsequent checks on the object itself (specified in the handler).
+
+ """
+
+ def __init__(self):
+ super().__init__(requires_fh=False)
+
+ def get_element(self, basename, file_handle):
+ # In this context, there should *not* be a file opened by the handler.
+ # Just return 'basename', which in this case is not a name, or even a
+ # string, but a passed 'data object'.
+ return basename