Source code for act.qc.qcfilter

"""
Functions and methods for creating ancillary quality control variables
and filters (masks) which can be used with various corrections
routines in ACT.

"""

import dask
import numpy as np
import xarray as xr

from act.qc import comparison_tests, qctests, bsrn_tests, qc_summary
from act.utils.data_utils import get_missing_value


[docs]@xr.register_dataset_accessor('qcfilter') class QCFilter(qctests.QCTests, comparison_tests.QCTests, bsrn_tests.QCTests, qc_summary.QCSummary): """ A class for building quality control variables containing arrays for filtering data based on a set of test condition typically based on the values in the data fields. These filters can be used in various algorithms and calculations within ACT. """ def __init__(self, ds): """initialize""" self._ds = ds
[docs] def check_for_ancillary_qc(self, var_name, add_if_missing=True, cleanup=False, flag_type=False): """ Method to check if a quality control variable exist in the dataset and return the quality control varible name. Will call create_qc_variable() to make variable if does not exist and update_ancillary_variable() to ensure linkage between data and quality control variable. Can also be used just to get the corresponding quality control variable name with adding if it is missing. Parameters ---------- var_name : str Data variable name. add_if_missing : boolean Add quality control variable if missing from teh dataset. Will raise and exception if the var_name does not exist in Dataset. Set to False to not raise exception. cleanup : boolean Option to run qc.clean.cleanup() method on the dataset to ensure the dataset was updated from ARM QC to the correct standardized QC. flag_type : boolean Indicating the QC variable uses flag_values instead of flag_masks. Returns ------- qc_var_name : str or None Name of existing or new quality control variable. Returns None if no existing quality control variable is found and add_if_missing is set to False. Examples -------- .. code-block:: python from act.tests import EXAMPLE_METE40 from act.io.arm import read_arm_netcdf ds = read_arm_netcdf(EXAMPLE_METE40, cleanup_qc=True) qc_var_name = ds.qcfilter.check_for_ancillary_qc('atmos_pressure') print(f'qc_var_name: {qc_var_name}') qc_var_name = ds.qcfilter.check_for_ancillary_qc('the_greatest_variable_ever', add_if_missing=False) print(f'qc_var_name: {qc_var_name}') """ qc_var_name = None try: ancillary_variables = self._ds[var_name].attrs['ancillary_variables'] if isinstance(ancillary_variables, str): ancillary_variables = ancillary_variables.split() for var in ancillary_variables: for attr, value in self._ds[var].attrs.items(): if attr == 'standard_name' and 'quality_flag' in value: qc_var_name = var if add_if_missing and qc_var_name is None: qc_var_name = self._ds.qcfilter.create_qc_variable(var_name, flag_type=flag_type) except KeyError: # Since no ancillary_variables exist look for ARM style of QC # variable name. If it exists use it else create new # QC varaible. if add_if_missing: try: self._ds['qc_' + var_name] qc_var_name = 'qc_' + var_name except KeyError: qc_var_name = self._ds.qcfilter.create_qc_variable( var_name, flag_type=flag_type ) # Make sure data varaible has a variable attribute linking # data variable to QC variable. if add_if_missing: self._ds.qcfilter.update_ancillary_variable(var_name, qc_var_name) # Clean up quality control variables to the requried standard in the # xarray dataset. if cleanup: self._ds.clean.cleanup(handle_missing_value=True, link_qc_variables=False) return qc_var_name
[docs] def create_qc_variable( self, var_name, flag_type=False, flag_values_set_value=0, qc_var_name=None ): """ Method to create a quality control variable in the dataset. Will try not to destroy the qc variable by appending numbers to the variable name if needed. Parameters ---------- var_name : str Data variable name. flag_type : boolean If an integer flag type should be created instead of bitpacked mask type. Will create flag_values instead of flag_masks. flag_values_set_value : int Initial flag value to use when initializing array. qc_var_name : str Optional new quality control variable name. If not set will create one using \\"qc\\_\\" prepended to the data variable name. If the name given or created is taken will append a number that does not have a conflict. Returns ------- qc_var_name : str Name of new quality control variable created. Examples -------- .. code-block:: python from act.tests import EXAMPLE_AOSMET from act.io.arm import read_arm_netcdf ds = read_arm_netcdf(EXAMPLE_AOSMET) qc_var_name = ds.qcfilter.create_qc_variable('temperature_ambient') print(qc_var_name) print(ds[qc_var_name]) """ # Make QC variable long name. The variable long_name attribute # may not exist so catch that error and set to default. try: qc_variable_long_name = ( 'Quality check results on field: ' + self._ds[var_name].attrs['long_name'] ) except KeyError: qc_variable_long_name = 'Quality check results for ' + var_name # Make a new quality control variable name. Check if exists in the # dataset. If so loop through creation of new name until one is # found that will not replace existing variable. if qc_var_name is None: qc_var_name = 'qc_' + var_name variable_names = list(self._ds.data_vars) if qc_var_name in variable_names: for ii in range(1, 100): temp_qc_var_name = '_'.join([qc_var_name, str(ii)]) if temp_qc_var_name not in variable_names: qc_var_name = temp_qc_var_name break # Create the QC variable filled with 0 values matching the # shape of data variable. try: qc_data = dask.array.from_array( np.zeros_like(self._ds[var_name].values, dtype=np.int32), chunks=self._ds[var_name].data.chunksize, ) except AttributeError: qc_data = np.zeros_like(self._ds[var_name].values, dtype=np.int32) # Updating to use coords instead of dim, which caused a loss of # attribuets as noted in Issue 347 self._ds[qc_var_name] = xr.DataArray( data=qc_data, coords=self._ds[var_name].coords, attrs={'long_name': qc_variable_long_name, 'units': '1'}, ) # Update if using flag_values and don't want 0 to be default value. if flag_type and flag_values_set_value != 0: self._ds[qc_var_name].values = self._ds[qc_var_name].values + int(flag_values_set_value) # Add requried variable attributes. if flag_type: self._ds[qc_var_name].attrs['flag_values'] = [] else: self._ds[qc_var_name].attrs['flag_masks'] = [] self._ds[qc_var_name].attrs['flag_meanings'] = [] self._ds[qc_var_name].attrs['flag_assessments'] = [] self._ds[qc_var_name].attrs['standard_name'] = 'quality_flag' self.update_ancillary_variable(var_name, qc_var_name=qc_var_name) return qc_var_name
[docs] def update_ancillary_variable(self, var_name, qc_var_name=None): """ Method to check if ancillary_variables variable attribute is set with quality control variable name. Parameters ---------- var_name : str Data variable name. qc_var_name : str quality control variable name. If not given will attempt to get the name from data variable ancillary_variables attribute. Examples -------- .. code-block:: python from act.tests import EXAMPLE_AOSMET from act.io.arm import read_arm_netcdf ds = read_arm_netcdf(EXAMPLE_AOSMET) var_name = 'temperature_ambient' qc_var_name = ds.qcfilter.create_qc_variable(var_name) del ds[var_name].attrs['ancillary_variables'] ds.qcfilter.update_ancillary_variable(var_name, qc_var_name) print(ds[var_name].attrs['ancillary_variables']) """ if qc_var_name is None: qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False) if qc_var_name is None: return try: ancillary_variables = self._ds[var_name].attrs['ancillary_variables'] if qc_var_name not in ancillary_variables: ancillary_variables = ' '.join([ancillary_variables, qc_var_name]) except KeyError: ancillary_variables = qc_var_name self._ds[var_name].attrs['ancillary_variables'] = ancillary_variables
[docs] def add_test( self, var_name, index=None, test_number=None, test_meaning=None, test_assessment='Bad', flag_value=False, recycle=False, ): """ Method to add a new test/filter to a quality control variable. Parameters ---------- var_name : str data variable name index : int, bool, list of int or bool, numpy array, tuple of numpy arrays, None Indexes into quality control array to set the test bit. If not set or set to None will not set the test on any element of the quality control variable but will still add the test to the flag_masks, flag_meanings and flag_assessments attributes. test_number : int Test number to use. If keyword is not set will use first available test bit/test number. test_meaning : str String describing the test. Will be added to flag_meanings variable attribute. test_assessment : str String describing the test assessment. If not set will use "Bad" as the string to append to flag_assessments. Will update to be capitalized. flag_value : boolean Switch to use flag_values integer quality control. recyle : boolean Option to use number less than next highest test if available. For example tests 1, 2, 4, 5 are set. Set to true the next test chosen will be 3, else will be 6. Returns ------- test_dict : dict A dictionary containing information added to the QC variable. Examples -------- .. code-block:: python result = ds.qcfilter.add_test(var_name, test_meaning='Birds!') """ test_dict = {} if test_meaning is None: raise ValueError( 'You need to provide a value for test_meaning ' 'keyword when calling the add_test method' ) # This ensures the indexing will work even if given float values. # Preserves tuples from np.where() or boolean arrays for standard # python indexing. if index is not None and not isinstance(index, (np.ndarray, tuple)): index = np.array(index) if index.dtype.kind not in np.typecodes['AllInteger']: index = index.astype(int) # Ensure assessment is capitalized to be consistent test_assessment = test_assessment.capitalize() qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, flag_type=flag_value) if test_number is None: test_number = self._ds.qcfilter.available_bit(qc_var_name, recycle=recycle) self._ds.qcfilter.set_test(var_name, index, test_number, flag_value) if flag_value: try: self._ds[qc_var_name].attrs['flag_values'].append(test_number) except KeyError: self._ds[qc_var_name].attrs['flag_values'] = [test_number] else: # Determine if flag_masks test number is too large for current data type. # If so up convert data type. flag_masks = np.array(self._ds[qc_var_name].attrs['flag_masks']) mask_dtype = flag_masks.dtype if not np.issubdtype(mask_dtype, np.integer): mask_dtype = np.uint32 if np.iinfo(mask_dtype).max - set_bit(0, test_number) <= -1: if mask_dtype == np.int8 or mask_dtype == np.uint8: mask_dtype = np.uint16 elif mask_dtype == np.int16 or mask_dtype == np.uint16: mask_dtype = np.uint32 elif mask_dtype == np.int32 or mask_dtype == np.uint32: mask_dtype = np.uint64 flag_masks = flag_masks.astype(mask_dtype) flag_masks = np.append(flag_masks, np.array(set_bit(0, test_number), dtype=mask_dtype)) self._ds[qc_var_name].attrs['flag_masks'] = list(flag_masks) try: self._ds[qc_var_name].attrs['flag_meanings'].append(test_meaning) except KeyError: self._ds[qc_var_name].attrs['flag_meanings'] = [test_meaning] try: self._ds[qc_var_name].attrs['flag_assessments'].append(test_assessment) except KeyError: self._ds[qc_var_name].attrs['flag_assessments'] = [test_assessment] test_dict['test_number'] = test_number test_dict['test_meaning'] = test_meaning test_dict['test_assessment'] = test_assessment test_dict['qc_variable_name'] = qc_var_name test_dict['variable_name'] = var_name return test_dict
[docs] def remove_test( self, var_name=None, test_number=None, qc_var_name=None, flag_value=False, flag_values_reset_value=0, ): """ Method to remove a test/filter from a quality control variable. Must set var_name or qc_var_name. Parameters ---------- var_name : str or None Data variable name. test_number : int Test number to remove. qc_var_name : str or None Quality control variable name. Ignored if var_name is set. flag_value : boolean Switch to use flag_values integer quality control. flag_values_reset_value : int Value to use when resetting a flag_values value to not be set. Examples -------- .. code-block:: python ds.qcfilter.remove_test(var_name, test_number=3) """ if test_number is None: raise ValueError( 'You need to provide a value for test_number ' 'keyword when calling the remove_test() method' ) if var_name is None and qc_var_name is None: raise ValueError( 'You need to provide a value for var_name or qc_var_name ' 'keyword when calling the remove_test() method' ) if var_name is not None: qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name) # Determine which index is using the test number index = None if flag_value: flag_values = self._ds[qc_var_name].attrs['flag_values'] for ii, flag_num in enumerate(flag_values): if flag_num == test_number: index = ii break else: flag_masks = self._ds[qc_var_name].attrs['flag_masks'] for ii, bit_num in enumerate(flag_masks): if parse_bit(bit_num)[0] == test_number: index = ii break # If can't find the index of test return before doing anything. if index is None: return if flag_value: remove_index = self._ds.qcfilter.get_qc_test_mask( var_name=var_name, qc_var_name=qc_var_name, test_number=test_number, return_index=True, flag_value=True, ) self._ds.qcfilter.unset_test( var_name=var_name, qc_var_name=qc_var_name, index=remove_index, test_number=test_number, flag_value=flag_value, flag_values_reset_value=flag_values_reset_value, ) del flag_values[index] self._ds[qc_var_name].attrs['flag_values'] = flag_values else: remove_index = self._ds.qcfilter.get_qc_test_mask( var_name=var_name, qc_var_name=qc_var_name, test_number=test_number, return_index=True, ) self._ds.qcfilter.unset_test( var_name=var_name, qc_var_name=qc_var_name, index=remove_index, test_number=test_number, flag_value=flag_value, ) if isinstance(flag_masks, list): del flag_masks[index] else: flag_masks = np.delete(flag_masks, index) self._ds[qc_var_name].attrs['flag_masks'] = flag_masks flag_meanings = self._ds[qc_var_name].attrs['flag_meanings'] del flag_meanings[index] self._ds[qc_var_name].attrs['flag_meanings'] = flag_meanings flag_assessments = self._ds[qc_var_name].attrs['flag_assessments'] del flag_assessments[index] self._ds[qc_var_name].attrs['flag_assessments'] = flag_assessments
[docs] def set_test(self, var_name, index=None, test_number=None, flag_value=False): """ Method to set a test/filter in a quality control variable. Parameters ---------- var_name : str Data variable name. index : int or list or numpy array Index to set test in quality control array. If want to unset all values will need to pass in index of all values. test_number : int Test number to set. flag_value : boolean Switch to use flag_values integer quality control. Examples -------- .. code-block:: python index = [0, 1, 2, 30] ds.qcfilter.set_test(var_name, index=index, test_number=2) """ qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name) qc_variable = np.array(self._ds[qc_var_name].values) # Ensure the qc_variable data type is integer. This ensures bitwise comparison # will not cause an error. if qc_variable.dtype.kind not in np.typecodes['AllInteger']: qc_variable = qc_variable.astype(int) # Determine if test number is too large for current data type. If so # up convert data type. dtype = qc_variable.dtype if np.iinfo(dtype).max - set_bit(0, test_number) < -1: if dtype == np.int8: dtype = np.int16 elif dtype == np.int16: dtype = np.int32 elif dtype == np.int32: dtype = np.int64 qc_variable = qc_variable.astype(dtype) if index is not None: if flag_value: if len(qc_variable.shape) == 0: qc_variable = test_number else: qc_variable[index] = test_number else: if bool(np.shape(index)): qc_variable[index] = set_bit(qc_variable[index], test_number) elif index == 0: qc_variable = set_bit(qc_variable, test_number) self._ds[qc_var_name].values = qc_variable
[docs] def unset_test( self, var_name=None, qc_var_name=None, index=None, test_number=None, flag_value=False, flag_values_reset_value=0, ): """ Method to unset a test/filter from a quality control variable. Parameters ---------- var_name : str or None Data variable name. qc_var_name : str or None Quality control variable name. Ignored if var_name is set. index : int or list or numpy array Index to unset test in quality control array. If want to unset all values will need to pass in index of all values. test_number : int Test number to remove. flag_value : boolean Switch to use flag_values integer quality control. flag_values_reset_value : int Value to use when resetting a flag_values value to not be set. Examples -------- .. code-block:: python ds.qcfilter.unset_test(var_name, index=range(10, 100), test_number=2) """ if index is None: return if var_name is None and qc_var_name is None: raise ValueError( 'You need to provide a value for var_name or qc_var_name ' 'keyword when calling the unset_test() method' ) if var_name is not None: qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name) # Get QC variable qc_variable = self._ds[qc_var_name].values # Ensure the qc_variable data type is integer. This ensures bitwise comparison # will not cause an error. if qc_variable.dtype.kind not in np.typecodes['AllInteger']: qc_variable = qc_variable.astype(int) if flag_value: qc_variable[index] = flag_values_reset_value else: qc_variable[index] = unset_bit(qc_variable[index], test_number) self._ds[qc_var_name].values = qc_variable
[docs] def available_bit(self, qc_var_name, recycle=False): """ Method to determine next available bit or flag to use with a QC test. Will check for flag_masks first and if not found will check for flag_values. This will drive how the next value is chosen. Parameters ---------- qc_var_name : str Quality control variable name. recycle : boolean Option to look for a bit (test) not in use starting from 1. If a test is not defined will return the lowest number, else will just use next highest number. Returns ------- test_num : int Next available test number. Examples -------- .. code-block:: python from act.tests import EXAMPLE_METE40 from act.io.arm import read_arm_netcdf ds = read_arm_netcdf(EXAMPLE_METE40, cleanup_qc=True) test_number = ds.qcfilter.available_bit('qc_atmos_pressure') print(test_number) """ try: flag_masks = self._ds[qc_var_name].attrs['flag_masks'] flag_value = False except KeyError: try: flag_masks = self._ds[qc_var_name].attrs['flag_values'] flag_value = True except KeyError: try: self._ds[qc_var_name].attrs['flag_values'] flag_masks = self._ds[qc_var_name].attrs['flag_masks'] flag_value = False except KeyError: raise ValueError( 'Problem getting next value from ' 'available_bit(). flag_values and ' 'flag_masks not set as expected' ) if flag_masks == []: next_bit = 1 else: if flag_value: if recycle: next_bit = min(set(range(1, 100000)) - set(flag_masks)) else: next_bit = max(flag_masks) + 1 else: if recycle: tests = [parse_bit(mask)[0] for mask in flag_masks] next_bit = min(set(range(1, 63)) - set(tests)) else: next_bit = parse_bit(max(flag_masks))[0] + 1 return int(next_bit)
[docs] def get_qc_test_mask( self, var_name=None, test_number=None, qc_var_name=None, flag_value=False, return_index=False, ): """ Returns a numpy array of False or True where a particular flag or bit is set in a numpy array. Must set var_name or qc_var_name when calling. Parameters ---------- var_name : str or None Data variable name. test_number : int Test number to return array where test is set. qc_var_name : str or None Quality control variable name. Ignored if var_name is set. flag_value : boolean Switch to use flag_values integer quality control. return_index : boolean Return a numpy array of index numbers into QC array where the test is set instead of False or True mask. Returns ------- test_mask : numpy bool array or numpy integer array A numpy boolean array with False or True where the test number or bit was set, or numpy integer array of indexes where test is True. Examples -------- .. code-block:: python from act.io.arm import read_arm_netcdf from act.tests import EXAMPLE_IRT25m20s ds = read_arm_netcdf(EXAMPLE_IRT25m20s) var_name = "inst_up_long_dome_resist" result = ds.qcfilter.add_test( var_name, index=[0, 1, 2], test_meaning="Birds!" ) qc_var_name = result["qc_variable_name"] mask = ds.qcfilter.get_qc_test_mask( var_name, result["test_number"], return_index=True ) print(mask) array([0, 1, 2]) mask = ds.qcfilter.get_qc_test_mask(var_name, result["test_number"]) print(mask) array([True, True, True, ..., False, False, False]) data = ds[var_name].values print(data[mask]) array([7.84, 7.8777, 7.8965], dtype=float32) import numpy as np data[mask] = np.nan print(data) array([nan, nan, nan, ..., 7.6705, 7.6892, 7.6892], dtype=float32) """ if var_name is None and qc_var_name is None: raise ValueError( 'You need to provide a value for var_name or qc_var_name ' 'keyword when calling the get_qc_test_mask() method' ) if test_number is None: raise ValueError( 'You need to provide a value for test_number ' 'keyword when calling the get_qc_test_mask() method' ) if var_name is not None: qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name) qc_variable = self._ds[qc_var_name].values # Ensure the qc_variable data type is integer. This ensures bitwise comparison # will not cause an error. if qc_variable.dtype.kind not in np.typecodes['AllInteger']: qc_variable = qc_variable.astype(int) if flag_value: tripped = qc_variable == test_number else: check_bit = set_bit(0, test_number) & qc_variable tripped = check_bit > 0 test_mask = np.full(qc_variable.shape, False, dtype='bool') # Make sure test_mask is an array. If qc_variable is scalar will # be retuned from np.zeros as scalar. test_mask = np.atleast_1d(test_mask) test_mask[tripped] = True test_mask = np.ma.make_mask(test_mask, shrink=False) if return_index: test_mask = np.where(test_mask)[0] return test_mask
[docs] def get_masked_data( self, var_name, rm_assessments=None, rm_tests=None, return_nan_array=False, ma_fill_value=None, return_inverse=False, return_mask_only=False, ): """ Returns a numpy masked array containing data and mask or a numpy float array with masked values set to NaN. Parameters ---------- var_name : str Data variable name. rm_assessments : str or list of str Assessment name to exclude from returned data. rm_tests : int or list of int Test numbers to exclude from returned data. This is the test number (or bit position number) not the mask number. return_nan_array : boolean Return a numpy array with filtered ( or masked) values set to numpy NaN value. If the data is type int will upconvert to numpy float to allow setting NaN value. ma_fill_value : int or float (or str?) The numpy masked array fill_value used in creation of the the masked array. If the datatype needs to be upconverted to allow the fill value to be used, data will be upconverted. return_inverse : boolean Invert the masked array mask or return data array where mask is set to False instead of True set to NaN. Useful for overplotting where failing. return_mask_only : boolean Return the boolean mask only as a numpy array. Returns ------- variable : numpy masked array or numpy float array Default is to return a numpy masked array with the mask set to True where the test with requested assessment or test number was found set. If return_nan_array is True will return numpy array upconverted to float with locations where the test with requested assessment or test number was found set converted to NaN. Examples -------- .. code-block:: python from act.io.arm import read_arm_netcdf from act.tests import EXAMPLE_IRT25m20s ds = read_arm_netcdf(EXAMPLE_IRT25m20s) var_name = "inst_up_long_dome_resist" result = ds.qcfilter.add_test( var_name, index=[0, 1, 2], test_meaning="Birds!" ) data = ds.qcfilter.get_masked_data( var_name, rm_assessments=["Bad", "Indeterminate"] ) print(data) masked_array( data=[..., 7.670499801635742, 7.689199924468994, 7.689199924468994], mask=[..., False, False, False], fill_value=1e20, dtype=float32, ) """ qc_var_name = self._ds.qcfilter.check_for_ancillary_qc(var_name, add_if_missing=False) flag_value = False flag_values = None flag_masks = None flag_assessments = None try: flag_assessments = self._ds[qc_var_name].attrs['flag_assessments'] flag_masks = self._ds[qc_var_name].attrs['flag_masks'] except KeyError: pass try: flag_values = self._ds[qc_var_name].attrs['flag_values'] flag_value = True except KeyError: pass test_numbers = [] if rm_tests is not None: if isinstance(rm_tests, (int, float, str)): rm_tests = [int(rm_tests)] test_numbers.extend(rm_tests) if rm_assessments is not None: if isinstance(rm_assessments, str): rm_assessments = [rm_assessments] if flag_masks is not None: test_nums = [parse_bit(mask)[0] for mask in flag_masks] if flag_values is not None: test_nums = flag_values rm_assessments = [x.lower() for x in rm_assessments] if flag_assessments is not None: for ii, assessment in enumerate(flag_assessments): if assessment.lower() in rm_assessments: test_numbers.append(test_nums[ii]) # Make the list of test numbers to mask unique test_numbers = list(set(test_numbers)) # Create mask of indexes by looking where each test is set variable = self._ds[var_name].values nan_dtype = np.float32 if variable.dtype in (np.float64, np.int64): nan_dtype = np.float64 try: # Get shape of mask from QC variable since there is a chance it will # be a different shape than data variable. mask = np.zeros(self._ds[qc_var_name].shape, dtype=bool) except KeyError: # If there is no QC variable make mask from shape of data variable. mask = np.zeros(self._ds[var_name].shape, dtype=bool) for test in test_numbers: qc_test_mask = self._ds.qcfilter.get_qc_test_mask(var_name, test, flag_value=flag_value) mask = mask | qc_test_mask # If requested only return the mask. if return_mask_only: return mask # Convert data numpy array into masked array try: variable = np.ma.array(variable, mask=mask, fill_value=ma_fill_value) except TypeError: variable = np.ma.array( variable, mask=mask, fill_value=ma_fill_value, dtype=np.array(ma_fill_value).dtype, ) # If requested switch array from where data is not failing tests # to where data is failing tests. This can be used when over plotting # where the data if failing the tests. if return_inverse: mask = variable.mask mask = np.invert(mask) variable.mask = mask # If asked to return numpy array with values set to NaN if return_nan_array: variable = variable.astype(nan_dtype) variable = variable.filled(fill_value=np.nan) return variable
[docs] def datafilter( self, variables=None, rm_assessments=None, rm_tests=None, verbose=False, del_qc_var=False, no_NaN=False, ): """ Method to apply quality control variables to data variables by changing the data values in the dataset using quality control variables. The data is updated with failing data set to NaN. This can be used to update the data variable in the xarray dataset for use with xarray methods to perform analysis on the data since those methods don't read the quality control variables. Parameters ---------- variables : None or str or list of str Data variable names to process. If set to None will update all data variables. rm_assessments : str or list of str Assessment names listed under quality control varible flag_assessments to exclude from returned data. Examples include ['Bad', 'Incorrect', 'Indeterminate', 'Suspect'] rm_tests : int or list of int Test numbers listed under quality control variable to exclude from returned data. This is the test number (or bit position number) not the mask number. verbose : boolean Print processing information. del_qc_var : boolean Option to delete quality control variable after processing. Since the data values can not be determined after they are set to NaN and xarray method processing would also process the quality control variables, the default is to remove the quality control data variables. Defaults to False. no_NaN : boolean Should the returned Xarray Dataset use NaN as the missing value indicator. If Xarray did not convert the _FillValue or missing_value to NaN upon reading the same missing value indicator should be used. If set to true will try to determine the current missing_value or _FillValue set in the file and use that value. If neither are set as a variable attribute it will use the default value (most likely -9999). Examples -------- .. code-block:: python from act.io.arm import read_arm_netcdf from act.tests import EXAMPLE_MET1 ds = read_arm_netcdf(EXAMPLE_MET1) ds.clean.cleanup() var_name = "atmos_pressure" ds_1 = ds.nanmean() ds.qcfilter.add_less_test(var_name, 99, test_assessment="Bad") ds.qcfilter.datafilter(rm_assessments="Bad") ds_2 = ds.nanmean() print("All_data =", ds_1[var_name].values) All_data = 98.86098 print("Bad_Removed =", ds_2[var_name].values) Bad_Removed = 99.15148 """ if rm_assessments is None and rm_tests is None: raise ValueError('Need to set rm_assessments or rm_tests option') if variables is not None and isinstance(variables, str): variables = [variables] if variables is None: variables = list(self._ds.data_vars) for var_name in variables: qc_var_name = self.check_for_ancillary_qc(var_name, add_if_missing=False, cleanup=False) if qc_var_name is None: if verbose: if var_name in ['base_time', 'time_offset']: continue try: if self._ds[var_name].attrs['standard_name'] == 'quality_flag': continue except KeyError: pass print( f'No quality control variable for {var_name} found ' f'in call to .qcfilter.datafilter()' ) continue # Need to return data as Numpy array with NaN values. Setting the Dask array # to Numpy masked array does not work with other tools. data = self.get_masked_data( var_name, rm_assessments=rm_assessments, rm_tests=rm_tests, return_nan_array=True ) if no_NaN: missing_value = get_missing_value(self._ds, var_name, add_if_missing_in_ds=True) index = np.isnan(data) if np.any(index): data[index] = missing_value # If data was orginally stored as Dask array return values to Dataset as Dask array # else set as Numpy array. try: self._ds[var_name].data = dask.array.from_array( data, chunks=self._ds[var_name].data.chunksize ) except AttributeError: self._ds[var_name].values = data # Adding information on filtering to history attribute flag_masks = None flag_assessments = None flag_meanings = None try: flag_assessments = list(self._ds[qc_var_name].attrs['flag_assessments']) flag_masks = list(self._ds[qc_var_name].attrs['flag_masks']) flag_meanings = list(self._ds[qc_var_name].attrs['flag_meanings']) except KeyError: pass # Add comment to history for each test that's filtered out if rm_tests is not None: if isinstance(rm_tests, int): rm_tests = [rm_tests] for test in rm_tests: try: index = flag_masks.index(set_bit(0, test)) except ValueError: continue comment = f'act.qc.datafilter: {flag_meanings[index]}' if 'history' in self._ds[var_name].attrs.keys(): self._ds[var_name].attrs['history'] += f'\n {comment}' else: self._ds[var_name].attrs['history'] = comment if rm_assessments is not None: if isinstance(rm_assessments, str): rm_assessments = [rm_assessments] for assessment in rm_assessments: if assessment in flag_assessments: index = [i for i, e in enumerate(flag_assessments) if e == assessment] for ind in index: comment = f'act.qc.datafilter: {flag_meanings[ind]}' if 'history' in self._ds[var_name].attrs.keys(): self._ds[var_name].attrs['history'] += f'\n {comment}' else: self._ds[var_name].attrs['history'] = comment # If requested delete quality control variable if del_qc_var: del self._ds[qc_var_name] try: if self._ds[var_name].attrs['ancillary_variables'] == qc_var_name: del self._ds[var_name].attrs['ancillary_variables'] except KeyError: pass if verbose: print(f'Deleting {qc_var_name} from dataset')
[docs]def set_bit(array, bit_number): """ Function to set a quality control bit given a scalar or array of values and a bit number. Parameters ---------- array : int list of int or numpy array of int The bitpacked array to set the bit number. bit_number : int The bit (or test) number to set starting at 1. Returns ------- array : int, numpy array, tuple, list Integer or numpy array with bit set for each element of the array. Returned in same type. Examples -------- Example use setting bit 2 to an array called data: .. code-block:: python from act.qc.qcfilter import set_bit data = np.array(range(0, 7)) data = set_bit(data, 2) print(data) array([2, 3, 2, 3, 6, 7, 6]) """ was_list = False was_tuple = False if isinstance(array, list): array = np.array(array) was_list = True if isinstance(array, tuple): array = np.array(array) was_tuple = True if bit_number > 0: array |= 1 << bit_number - 1 if was_list: array = list(array) if was_tuple: array = tuple(array) return array
[docs]def unset_bit(array, bit_number): """ Function to remove a quality control bit given a scalar or array of values and a bit number. Parameters ---------- array : int list of int or numpy array Array of integers containing bit packed numbers. bit_number : int Bit number to remove starting at 1. Returns ------- array : int or numpy array Returns same data type as array entered with bit removed. Will fail gracefully if the bit requested to be removed was not set. Examples -------- .. code-block:: python from act.qc.qcfilter import set_bit, unset_bit data = set_bit([0, 1, 2, 3, 4], 2) data = set_bit(data, 3) print(data) [6, 7, 6, 7, 6] data = unset_bit(data, 2) print(data) [4, 5, 4, 5, 4] """ was_list = False was_tuple = False if isinstance(array, list): array = np.array(array) was_list = True if isinstance(array, tuple): array = np.array(array) was_tuple = True if bit_number > 0: array &= ~(1 << bit_number - 1) if was_list: array = list(array) if was_tuple: array = tuple(array) return array
[docs]def parse_bit(qc_bit): """ Given a single integer value, return bit positions. Parameters ---------- qc_bit : int or numpy int Bit packed integer number to be parsed. Returns ------- bit_number : numpy.int32 array Array containing all bit numbers of the bit packed number. If no bits set returns empty array. Examples -------- .. code-block:: python from act.qc.qcfilter import parse_bit parse_bit(7) array([1, 2, 3], dtype=int32) """ if isinstance(qc_bit, (list, tuple, np.ndarray)): if len(qc_bit) > 1: raise ValueError('Must be a single value.') qc_bit = qc_bit[0] if qc_bit < 0: raise ValueError('Must be a positive integer.') # Convert integer value to single element numpy array of type unsigned integer 64 value = np.array([qc_bit]).astype(">u8") # Convert value to view containing only unsigned integer 8 data type. This # is required for the numpy unpackbits function which only works with # unsigned integer 8 bit data type. value = value.view("u1") # Unpack bits using numpy into array of 1 where bit is set and convert into boolean array index = np.unpackbits(value).astype(bool) # Create range of numbers from 64 to 1 and subset where unpackbits found a bit set. bit_number = np.arange(index.size, 0, -1)[index] # Flip the array to increasing numbers to match historical method bit_number = np.flip(bit_number) # bit_number = [] # qc_bit = int(qc_bit) # counter = 0 # while qc_bit > 0: # temp_value = qc_bit % 2 # qc_bit = qc_bit >> 1 # counter += 1 # if temp_value == 1: # bit_number.append(counter) # Convert data type into expected type bit_number = np.asarray(bit_number, dtype=np.int32) return bit_number