diff --git a/act/qc/qcfilter.py b/act/qc/qcfilter.py index ab4b252306..a3f99a4dc3 100644 --- a/act/qc/qcfilter.py +++ b/act/qc/qcfilter.py @@ -942,9 +942,9 @@ def datafilter( ds.qcfilter.datafilter(rm_assessments="Bad") ds_2 = ds.mean() - print("All_data=", ds_1[var_name].values) + print("All_data =", ds_1[var_name].values) All_data = 98.86098 - print("Bad_Removed=", ds_2[var_name].values) + print("Bad_Removed =", ds_2[var_name].values) Bad_Removed = 99.15148 """ @@ -958,10 +958,18 @@ def datafilter( qc_var_name = self.check_for_ancillary_qc(var_name, add_if_missing=False, cleanup=False) if qc_var_name is None: if verbose: - print( - f'No quality control variable for {var_name} found ' - f'in call to .qcfilter.datafilter()' - ) + if var_name in ['base_time', 'time_offset']: + continue + + try: + if self._obj[var_name].attrs['standard_name'] == 'quality_flag': + continue + except KeyError: + pass + + print(f'No quality control variable for {var_name} found ' + f'in call to .qcfilter.datafilter()') + continue data = self.get_masked_data( @@ -971,8 +979,16 @@ def datafilter( ma_fill_value=np_ma, ) - self._obj[var_name].values = data + # If data was orginally stored as Dask array return values to Dataset as Dask array + # else set as Numpy array. + try: + self._obj[var_name].data = dask.array.from_array( + data, chunks=self._obj[var_name].data.chunksize) + + except AttributeError: + self._obj[var_name].values = data + # If requested delete quality control variable if del_qc_var: del self._obj[qc_var_name] if verbose: diff --git a/act/tests/test_qc.py b/act/tests/test_qc.py index 8011704a7e..677ab02a98 100644 --- a/act/tests/test_qc.py +++ b/act/tests/test_qc.py @@ -718,21 +718,40 @@ def test_qctests_dos(): def test_datafilter(): - ds = read_netcdf(EXAMPLE_MET1) + ds = read_netcdf(EXAMPLE_MET1, drop_variables=['base_time', 'time_offset']) ds.clean.cleanup() + data_var_names = list(ds.data_vars) + qc_var_names = [var_name for var_name in ds.data_vars if var_name.startswith('qc_')] + data_var_names = list(set(data_var_names) - set(qc_var_names)) + data_var_names.sort() + qc_var_names.sort() + var_name = 'atmos_pressure' ds_1 = ds.mean() ds.qcfilter.add_less_test(var_name, 99, test_assessment='Bad') - ds.qcfilter.datafilter(rm_assessments='Bad') - ds_2 = ds.mean() - + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=False) + ds_2 = ds_filtered.mean() assert np.isclose(ds_1[var_name].values, 98.86, atol=0.01) assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + assert isinstance(ds_1[var_name].data, da.core.Array) + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments='Bad', variables=var_name) + ds_2 = ds_filtered.mean() + assert np.isclose(ds_2[var_name].values, 99.15, atol=0.01) + expected_var_names = sorted(list(set(data_var_names + qc_var_names) - set(['qc_' + var_name]))) + assert sorted(list(ds_filtered.data_vars)) == expected_var_names + + ds_filtered = copy.deepcopy(ds) + ds_filtered.qcfilter.datafilter(rm_assessments='Bad', del_qc_var=True) + assert sorted(list(ds_filtered.data_vars)) == data_var_names ds.close() + del ds def test_qc_remainder():