Skip to content

Commit

Permalink
Merge pull request #301 from sadielbartholomew/daskify-property-mask
Browse files Browse the repository at this point in the history
`Data.mask`: migrate to Dask
  • Loading branch information
sadielbartholomew committed Feb 17, 2022
2 parents 9f7afbf + 29d4215 commit 4001dd5
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 37 deletions.
41 changes: 10 additions & 31 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6418,9 +6418,9 @@ def datetime_array(self):

@property
def mask(self):
"""The boolean missing data mask of the data array.
"""The Boolean missing data mask of the data array.
The boolean mask has True where the data array has missing data
The Boolean mask has True where the data array has missing data
and False otherwise.
:Returns:
Expand All @@ -6435,40 +6435,19 @@ def mask(self):
>>> m.dtype
dtype('bool')
>>> m.shape
(12, 73, 96])
(12, 73, 96)
"""
mask = self.copy()

config = mask.partition_configuration(readonly=False)
mask_data_obj = self.copy()

for partition in mask.partitions.matrix.flat:
partition.open(config)
array = partition.array

if partition.masked:
# Array is masked
partition.subarray = array.mask.copy()
else:
# Array is not masked
partition.subarray = FilledArray(
shape=array.shape,
size=array.size,
ndim=array.ndim,
dtype=_dtype_bool,
fill_value=0,
)

partition.Units = _units_None

partition.close()

mask._Units = _units_None
mask.dtype = _dtype_bool
dx = self._get_dask()
mask = da.ma.getmaskarray(dx)

mask._hardmask = True
mask_data_obj._set_dask(mask, reset_mask_hardness=True)
mask_data_obj.override_units(_units_None, inplace=True)
mask_data_obj.hardmask = True

return mask
return mask_data_obj

@staticmethod
def mask_fpe(*arg):
Expand Down
46 changes: 40 additions & 6 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@

ones = np.ones(a.shape, dtype=float)

# TODODASK: these can be moved into the lone tests that use them now
ma = np.ma.arange(-100, 200.0, dtype=float).reshape(3, 4, 5, 5)
ma[:, 1, 4, 4] = np.ma.masked
ma[0, :, 2, 3] = np.ma.masked
Expand Down Expand Up @@ -78,7 +77,6 @@ class DataTest(unittest.TestCase):
os.path.dirname(os.path.abspath(__file__)), "test_file2.nc"
)

# TODODASK: these can be moved into the lone tests that use them now
a = a
w = w
ma = ma
Expand Down Expand Up @@ -505,17 +503,53 @@ def test_Data_halo(self):
# [ 8 8 9 10 -- --]
# [ 8 8 9 10 -- --]]

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'")
def test_Data_apply_masking(self):
def test_Data_mask(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return

a = self.ma
# TODODASK: once test_Data_apply_masking is passing after daskification
# of apply_masking, might make sense to combine this test with that?

# Test for a masked Data object (having some masked points)
a = self.ma
d = cf.Data(a, units="m")

self.assertTrue((a == d.array).all())
self.assertTrue((a.mask == d.mask.array).all())
self.assertEqual(d.mask.shape, d.shape)
self.assertEqual(d.mask.dtype, bool)
self.assertEqual(d.mask.Units, cf.Units(None))
self.assertTrue(d.mask.hardmask)
self.assertIn(True, d.mask.array)

# Test for a non-masked Data object
a2 = np.arange(-100, 200.0, dtype=float).reshape(3, 4, 5, 5)
d2 = cf.Data(a2, units="m")
d2[...] = a2
self.assertTrue((a2 == d2.array).all())
self.assertEqual(d2.shape, d2.mask.shape)
self.assertEqual(d2.mask.dtype, bool)
self.assertEqual(d2.mask.Units, cf.Units(None))
self.assertTrue(d2.mask.hardmask)
self.assertNotIn(True, d2.mask.array)

# Test for a masked Data object of string type, including chunking
a3 = np.ma.array(["one", "two", "four"], dtype="S4")
a3[1] = np.ma.masked
d3 = cf.Data(a3, "m", chunks=(3,))
self.assertTrue((a3 == d3.array).all())
self.assertEqual(d3.shape, d3.mask.shape)
self.assertEqual(d3.mask.dtype, bool)
self.assertEqual(d3.mask.Units, cf.Units(None))
self.assertTrue(d3.mask.hardmask)
self.assertTrue(d3.mask.array[1], True)

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'")
def test_Data_apply_masking(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return

a = self.ma
d = cf.Data(a, units="m")

b = a.copy()
e = d.apply_masking()
Expand Down

0 comments on commit 4001dd5

Please sign in to comment.