SciTools · lbdreyer · Apr 21, 2023 · Oct 21, 2022 · Oct 21, 2022 · Oct 21, 2022
diff --git a/lib/iris/fileformats/netcdf/_dask_locks.py b/lib/iris/fileformats/netcdf/_dask_locks.py
@@ -0,0 +1,131 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Module containing code to create locks enabling dask workers to co-operate.
+
+This matter is complicated by needing different solutions for different dask scheduler
+types, i.e. local 'threads' scheduler, local 'processes' or distributed.
+
+In any case, a "iris.fileformats.netcdf.saver.Saver" object contains a netCDF4.Dataset
+targetting an output file, and creates a Saver.lock object to serialise write-accesses
+to the file from dask tasks : All dask-task file writes go via a
+"iris.fileformats.netcdf.saver.DeferredSaveWrapper" object, which also contains a link
+to the Saver.lock, and uses it to prevent workers from fouling each other.
+For each chunk written, the DeferredSaveWrapper acquires the common per-file lock;
+opens a Dataset on the file; performs a write to the relevant variable; closes the
+Dataset and then releases the lock.
+
+For a threaded scheduler, the Saver.lock is a simple threading.Lock(). The workers
+(threads) execute tasks which contain a DeferredSaveWrapper, as above. All of those
+contain the common lock, and this is simply **the same object** for all workers, since
+they share an address space.
+
+For a distributed scheduler, the Saver.lock is a `distributed.Lock()` which is
+identified with the output filepath. This is distributed to the workers by
+serialising the task function arguments, which will include the DeferredSaveWrapper.
+A worker behaves like a process, though it may execute on a remote machine. When a
+distributed.Lock is deserialised to reconstruct the worker task, this creates an object
+that communicates with the scheduler. These objects behave as a single common lock,
+as they all have the same string 'identity', so the scheduler implements inter-process
+communication so that the mutally exclude each other.
+
+It is also *conceivable* that multiple processes could write to the same file in
+parallel, if the operating system supports it. However, this also requires that the
+libnetcdf C library is built with parallel access option, which is not common.
+With the "ordinary" libnetcdf build, a process which attempts to rpen for writing a file
+which is _already_ open for writing simply raises an access error.
+In any case, Iris netcdf saver will not support this mode of operation, at present.
+
+We don't currently support a local "processes" type scheduler. If we did, the
+behaviour should be very similar to a distributed scheduler. It would need to use some
+other serialisable shared-lock solution in place of 'distributed.Lock', which requires
+a distributed scheduler to function.
+
+"""
+import threading
+
+import dask.array
+import dask.base
+import dask.multiprocessing
+import dask.threaded
+
+
+def dask_scheduler_is_distributed():
+ """Return whether a distributed.Client is active."""
+ # NOTE: this replicates logic in `dask.base.get_scheduler` : if a distributed client
+ # has been created + is still active, then the default scheduler will always be
+ # "distributed".
+ is_distributed = False
+ # NOTE: must still work when 'distributed' is not available.
+ try:
+ import distributed
+
+ client = distributed.get_client()
+ is_distributed = client is not None
+ except (ImportError, ValueError):
+ pass
+ return is_distributed
+
+
+def get_dask_array_scheduler_type():
+ """
+ Work out what type of scheduler an array.compute*() will use.
+
+ Returns one of 'distributed', 'threads' or 'processes'.
+ The return value is a valid argument for dask.config.set(scheduler=<type>).
+ This cannot distinguish between distributed local and remote clusters -- both of
+ those simply return 'distributed'.
+
+ NOTE: this takes account of how dask is *currently* configured. It will be wrong
+ if the config changes before the compute actually occurs.
+
+ """
+ if dask_scheduler_is_distributed():
+ result = "distributed"
+ else:
+ # Call 'get_scheduler', which respects the config settings, but pass an array
+ # so we default to the default scheduler for that type of object.
+ trial_dask_array = dask.array.zeros(1)
+ get_function = dask.base.get_scheduler(collections=[trial_dask_array])
+ # Detect the ones which we recognise.
+ if get_function == dask.threaded.get:
+ result = "threads"
+ elif get_function == dask.multiprocessing.get:
+ result = "processes"
+ else:
+ msg = f"Dask default scheduler for arrays is unrecognised : {get_function}"
+ raise ValueError(msg)
+
+ return result
+
+
+def get_worker_lock(identity: str):
+ """
+ Return a mutex Lock which can be shared by multiple Dask workers.
+
+ The type of Lock generated depends on the dask scheduler type, which must therefore
+ be set up before this is called.
+
+ """
+ scheduler_type = get_dask_array_scheduler_type()
+ if scheduler_type == "threads":
+ # N.B. the "identity" string is never used in this case, as the same actual
+ # lock object is used by all workers.
+ lock = threading.Lock()
+ elif scheduler_type == "distributed":
+ from dask.distributed import Lock as DistributedLock
+
+ lock = DistributedLock(identity)
+ else:
+ msg = (
+ "The configured dask array scheduler type is "
+ f'"{scheduler_type}", '
+ "which is not supported by the Iris netcdf saver."
+ )
+ raise ValueError(msg)
+
+ # NOTE: not supporting 'processes' scheduler, for now.
+ return lock
diff --git a/lib/iris/fileformats/netcdf/_thread_safe_nc.py b/lib/iris/fileformats/netcdf/_thread_safe_nc.py
@@ -340,3 +340,41 @@ def __getstate__(self):
  def __setstate__(self, state):
  for key, value in state.items():
  setattr(self, key, value)
+
+
+class DeferredSaveWrapper:
+ """
+ The "opposite" of a NetCDFDataProxy : An object mimicking the data access of a
+ netCDF4.Variable, but where the data is to be ***written to***.
+
+ It encapsulates the netcdf file and variable which are actually to be written to.
+ This opens the file each time, to enable writing the data chunk, then closes it.
+ TODO: could be improved with a caching scheme, but this just about works.
+ """
+
+ def __init__(self, filepath, cf_var, file_write_lock):
+ self.path = filepath
+ self.varname = cf_var.name
+ self.lock = file_write_lock
+
+ def __setitem__(self, keys, array_data):
+ # Write to the variable.
+ # First acquire a file-specific lock for all workers writing to this file.
+ self.lock.acquire()
+ # Open the file for writing + write to the specific file variable.
+ # Exactly as above, in NetCDFDataProxy : a DatasetWrapper causes problems with
+ # invalid ID's and the netCDF4 library, for so-far unknown reasons.
+ # Instead, use _GLOBAL_NETCDF4_LOCK, and netCDF4 _directly_.
+ with _GLOBAL_NETCDF4_LOCK:
+ dataset = None
+ try:
+ dataset = netCDF4.Dataset(self.path, "r+")
+ var = dataset.variables[self.varname]
+ var[keys] = array_data
+ finally:
+ if dataset:
+ dataset.close()
+ self.lock.release()
+
+ def __repr__(self):
+ return f"<{self.__class__.__name__} path={self.path!r} var={self.varname!r}>"
diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py
@@ -23,6 +23,7 @@
 import warnings
 
 import cf_units
+import dask
 import dask.array as da
 import numpy as np
 import numpy.ma as ma
@@ -44,7 +45,7 @@
 from iris.coords import AncillaryVariable, AuxCoord, CellMeasure, DimCoord
 import iris.exceptions
 import iris.fileformats.cf
-from iris.fileformats.netcdf import _thread_safe_nc
+from iris.fileformats.netcdf import _dask_locks, _thread_safe_nc
 import iris.io
 import iris.util
 
@@ -499,7 +500,7 @@ def __setitem__(self, keys, arr):
 class Saver:
  """A manager for saving netcdf files."""
 
- def __init__(self, filename, netcdf_format):
+ def __init__(self, filename, netcdf_format, compute=True):
  """
  A manager for saving netcdf files.
 
@@ -512,6 +513,15 @@ def __init__(self, filename, netcdf_format):
  Underlying netCDF file format, one of 'NETCDF4', 'NETCDF4_CLASSIC',
  'NETCDF3_CLASSIC' or 'NETCDF3_64BIT'. Default is 'NETCDF4' format.
 
+ * compute (bool):
+ If True, the Saver performs normal 'synchronous' data writes, where data
+ is streamed directly into file variables during the save operation.
+ If False, the file is created as normal, but computation and streaming of
+ any lazy array content is instead deferred to :class:`dask.delayed` objects,
+ which are held in a list in the saver 'delayed_writes' property.
+ The relavant file variables are created empty, and the write can
+ subsequently be completed by computing the 'save.deferred_writes'.
+
  Returns:
  None.
 
@@ -548,18 +558,28 @@ def __init__(self, filename, netcdf_format):
  self._mesh_dims = {}
  #: A dictionary, mapping formula terms to owner cf variable name
  self._formula_terms_cache = {}
+ #: Target filepath
+ self.filepath = os.path.abspath(filename)
+ #: Whether lazy saving.
+ self.lazy_saves = not compute
+ #: A list of deferred writes for lazy saving : each is a (source, target) pair
+ self.deferred_writes = []
+ # N.B. the file-write-lock *type* actually depends on the dask scheduler type.
+ #: A per-file write lock to prevent dask attempting overlapping writes.
+ self.file_write_lock = _dask_locks.get_worker_lock(self.filepath)
  #: NetCDF dataset
+ self._dataset = None
  try:
  self._dataset = _thread_safe_nc.DatasetWrapper(
- filename, mode="w", format=netcdf_format
+ self.filepath, mode="w", format=netcdf_format
  )
  except RuntimeError:
- dir_name = os.path.dirname(filename)
+ dir_name = os.path.dirname(self.filepath)
  if not os.path.isdir(dir_name):
  msg = "No such file or directory: {}".format(dir_name)
  raise IOError(msg)
  if not os.access(dir_name, os.R_OK | os.W_OK):
- msg = "Permission denied: {}".format(filename)
+ msg = "Permission denied: {}".format(self.filepath)
  raise IOError(msg)
  else:
  raise
@@ -2444,8 +2464,7 @@ def _increment_name(self, varname):
 
  return "{}_{}".format(varname, num)
 
- @staticmethod
- def _lazy_stream_data(data, fill_value, fill_warn, cf_var):
+ def _lazy_stream_data(self, data, fill_value, fill_warn, cf_var):
  if hasattr(data, "shape") and data.shape == (1,) + cf_var.shape:
  # (Don't do this check for string data).
  # Reduce dimensionality where the data array has an extra dimension
@@ -2455,16 +2474,37 @@ def _lazy_stream_data(data, fill_value, fill_warn, cf_var):
  data = data.squeeze(axis=0)
 
  if is_lazy_data(data):
+ if self.lazy_saves:
+ # deferred lazy streaming
+ def store(data, cf_var, fill_value):
+ # Create a data-writeable object that we can stream into, which
+ # encapsulates the file to be opened + variable to be written.
+ writeable_var_wrapper = (
+ _thread_safe_nc.DeferredSaveWrapper(
+ self.filepath, cf_var, self.file_write_lock
+ )
+ )
+ # Add to the list of deferred writes, used in _deferred_save().
+ self.deferred_writes.append((data, writeable_var_wrapper))
+ # NOTE: in this case, no checking of fill-value violations so just
+ # return dummy values for this.
+ # TODO: just for now -- can probably make this work later
+ is_masked, contains_value = False, False
+ return is_masked, contains_value
 
- def store(data, cf_var, fill_value):
- # Store lazy data and check whether it is masked and contains
- # the fill value
- target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value)
- da.store([data], [target])
- return target.is_masked, target.contains_value
+ else:
+ # Immediate streaming store : check mask+fill as we go.
+ def store(data, cf_var, fill_value):
+ # Store lazy data and check whether it is masked and contains
+ # the fill value
+ target = _FillValueMaskCheckAndStoreTarget(
+ cf_var, fill_value
+ )
+ da.store([data], [target], lock=False)
+ return target.is_masked, target.contains_value
 
  else:
-
+ # Real data is always written directly, i.e. not via lazy save.
  def store(data, cf_var, fill_value):
  cf_var[:] = data
  is_masked = np.ma.is_masked(data)
@@ -2513,6 +2553,28 @@ def store(data, cf_var, fill_value):
  )
  warnings.warn(msg.format(cf_var.name, fill_value))
 
+ def _deferred_save(self):
+ """
+ Create a 'delayed' to trigger file completion for lazy saves.
+
+ This contains all the deferred writes, which complete the file by filling out
+ the data of variables initially created empty.
+
+ """
+ if self.deferred_writes:
+ # Create a single delayed da.store operation to complete the file.
+ sources, targets = zip(*self.deferred_writes)
+ result = da.store(sources, targets, compute=False, lock=False)
+ else:
+ # Return a delayed anyway, just for usage consistency.
+ @dask.delayed
+ def no_op():
+ return None
+
+ result = no_op()
+
+ return result
+
 
 def save(
  cube,
@@ -2530,6 +2592,7 @@ def save(
  least_significant_digit=None,
  packing=None,
  fill_value=None,
+ compute=True,
 ):
  """
  Save cube(s) to a netCDF file, given the cube and the filename.
@@ -2652,6 +2715,14 @@ def save(
  `:class:`iris.cube.CubeList`, or a single element, and each element of
  this argument will be applied to each cube separately.
 
+ * compute (bool):
+ When False, create the output file but defer writing any lazy array content to
+ its variables, such as (lazy) data and aux-coords points and bounds.
+ Instead return a class:`dask.delayed` which, when computed, will compute all
+ the lazy content and stream it to complete the file.
+ Several such data saves can be performed in parallel, by passing a list of them
+ into a :func:`dask.compute` call.
+
  Returns:
  None.
 
@@ -2752,7 +2823,9 @@ def is_valid_packspec(p):
  raise ValueError(msg)
 
  # Initialise Manager for saving
- with Saver(filename, netcdf_format) as sman:
+ # N.B. FOR NOW -- we are cheating and making all saves compute=False, as otherwise
+ # non-lazy saves do *not* work with the distributed scheduler.
+ with Saver(filename, netcdf_format, compute=False) as sman:
  # Iterate through the cubelist.
  for cube, packspec, fill_value in zip(cubes, packspecs, fill_values):
  sman.write(
@@ -2797,3 +2870,12 @@ def is_valid_packspec(p):
 
  # Add conventions attribute.
  sman.update_global_attributes(Conventions=conventions)
+
+ # For now, not using Saver(compute=True) as it doesn't work with distributed or
+ # process workers (only threaded).
+ result = sman._deferred_save()
+ if compute:
+ result = result.compute()
+ result = None
+
+ return result