Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data.mask: migrate to Dask #301

41 changes: 10 additions & 31 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6351,9 +6351,9 @@ def datetime_array(self):

@property
def mask(self):
"""The boolean missing data mask of the data array.
"""The Boolean missing data mask of the data array.

The boolean mask has True where the data array has missing data
The Boolean mask has True where the data array has missing data
and False otherwise.

:Returns:
Expand All @@ -6368,40 +6368,19 @@ def mask(self):
>>> m.dtype
dtype('bool')
>>> m.shape
(12, 73, 96])
(12, 73, 96)

"""
mask = self.copy()

config = mask.partition_configuration(readonly=False)

for partition in mask.partitions.matrix.flat:
partition.open(config)
array = partition.array

if partition.masked:
# Array is masked
partition.subarray = array.mask.copy()
else:
# Array is not masked
partition.subarray = FilledArray(
shape=array.shape,
size=array.size,
ndim=array.ndim,
dtype=_dtype_bool,
fill_value=0,
)

partition.Units = _units_None
mask_data_obj = self.copy()

partition.close()

mask._Units = _units_None
mask.dtype = _dtype_bool
dx = self._get_dask()
mask = da.ma.getmaskarray(dx)

mask._hardmask = True
mask_data_obj._set_dask(mask, reset_mask_hardness=True)
mask_data_obj.override_units(_units_None, inplace=True)
mask_data_obj.hardmask = True

return mask
return mask_data_obj
sadielbartholomew marked this conversation as resolved.
Show resolved Hide resolved

@staticmethod
def mask_fpe(*arg):
Expand Down
46 changes: 40 additions & 6 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

ones = np.ones(a.shape, dtype=float)

# TODODASK: these can be moved into the lone tests that use them now
ma = np.ma.arange(-100, 200.0, dtype=float).reshape(3, 4, 5, 5)
ma[:, 1, 4, 4] = np.ma.masked
ma[0, :, 2, 3] = np.ma.masked
Expand Down Expand Up @@ -77,7 +76,6 @@ class DataTest(unittest.TestCase):
os.path.dirname(os.path.abspath(__file__)), "test_file2.nc"
)

# TODODASK: these can be moved into the lone tests that use them now
a = a
w = w
ma = ma
Expand Down Expand Up @@ -535,17 +533,53 @@ def test_Data_halo(self):
# [ 8 8 9 10 -- --]
# [ 8 8 9 10 -- --]]

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'")
def test_Data_apply_masking(self):
def test_Data_mask(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return

a = self.ma
# TODODASK: once test_Data_apply_masking is passing after daskification
# of apply_masking, might make sense to combine this test with that?

# Test for a masked Data object (having some masked points)
a = self.ma
d = cf.Data(a, units="m")

self.assertTrue((a == d.array).all())
self.assertTrue((a.mask == d.mask.array).all())
self.assertEqual(d.mask.shape, d.shape)
self.assertEqual(d.mask.dtype, bool)
self.assertEqual(d.mask.Units, cf.Units(None))
self.assertTrue(d.mask.hardmask)
self.assertIn(True, d.mask.array)

# Test for a non-masked Data object
a2 = np.arange(-100, 200.0, dtype=float).reshape(3, 4, 5, 5)
d2 = cf.Data(a2, units="m")
d2[...] = a2
self.assertTrue((a2 == d2.array).all())
self.assertEqual(d2.shape, d2.mask.shape)
self.assertEqual(d2.mask.dtype, bool)
self.assertEqual(d2.mask.Units, cf.Units(None))
self.assertTrue(d2.mask.hardmask)
self.assertNotIn(True, d2.mask.array)

# Test for a masked Data object of string type, including chunking
a3 = np.ma.array(["one", "two", "four"], dtype="S4")
a3[1] = np.ma.masked
d3 = cf.Data(a3, "m", chunks=(3,))
self.assertTrue((a3 == d3.array).all())
self.assertEqual(d3.shape, d3.mask.shape)
self.assertEqual(d3.mask.dtype, bool)
self.assertEqual(d3.mask.Units, cf.Units(None))
self.assertTrue(d3.mask.hardmask)
self.assertTrue(d3.mask.array[1], True)

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'")
def test_Data_apply_masking(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return

a = self.ma
d = cf.Data(a, units="m")

b = a.copy()
e = d.apply_masking()
Expand Down