{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Working with embedded quality control variables\n\nThis is an example of how to use existing or create new quality\ncontrol varibles. All the tests are located in act/qc/qctests.py\nfile but called under the qcfilter method.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\nfrom arm_test_data import DATASETS\n\nfrom act.io.arm import read_arm_netcdf\nfrom act.qc.qcfilter import parse_bit\n\n# Read a data file that does not have any embedded quality control\n# variables. This data comes from the example dataset within ACT.\n# Can also read data that has existing quality control variables\n# and add, manipulate or use those variables the same.\nfilename_irt = DATASETS.fetch('sgpirt25m20sC1.a0.20190601.000000.cdf')\nds = read_arm_netcdf(filename_irt)\n\n# The name of the data variable we wish to work with\nvar_name = 'inst_up_long_dome_resist'\n\n# Since there is no embedded quality control varible one will be\n# created for us.\n# We can start with adding where the data are set to missing value.\n# First we will change the first value to NaN to simulate where\n# a missing value exist in the data file.\ndata = ds[var_name].values\ndata[0] = np.nan\nds[var_name].values = data\n\n# Add a test for where the data are set to missing value.\n# Since a quality control variable does not exist in the file\n# one will be created as part of adding this test.\nresult = ds.qcfilter.add_missing_value_test(var_name)\n\n# The returned dictionary will contain the information added to the\n# quality control varible for direct use now. Or the information\n# can be looked up later for use.\nprint('\\nresult =', result)\n\n# We can add a second test where data is less than a specified value.\nresult = ds.qcfilter.add_less_test(var_name, 7.8)\n\n# Next we add a test to indicate where a value is greater than\n# or equal to a specified number. We also set the assessement\n# to a user defined word. The default assessment is \"Bad\".\nresult = ds.qcfilter.add_greater_equal_test(var_name, 12, test_assessment='Suspect')\n\n# We can now get the data as a numpy masked array with a mask set\n# where the third test we added (greater than or equal to) using\n# the result dictionary to get the test number created for us.\ndata = ds.qcfilter.get_masked_data(var_name, rm_tests=result['test_number'])\nprint('\\nData type =', type(data))\n\n# Or we can get the masked array for all tests that use the assessment\n# set to \"Bad\".\ndata = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Bad'])\n\n# If we prefer to mask all data for both Bad or Suspect we can list\n# as many assessments as needed\ndata = ds.qcfilter.get_masked_data(var_name, rm_assessments=['Suspect', 'Bad'])\nprint('\\ndata =', data)\n\n# We can convert the masked array into numpy array and choose the fill value.\ndata = data.filled(fill_value=np.nan)\nprint('\\ndata filled with masked array fill_value =', data)\n\n# We can create our own test by creating an array of indexes of where\n# we want the test to be set and call the method to create our own test.\n# We can allow the method to pick the test number (next available)\n# or set the test number we wan to use. This example uses test number\n# 5 to demonstrate how not all tests need to be used in order.\ndata = ds.qcfilter.get_masked_data(var_name)\ndiff = np.diff(data)\nmax_difference = 0.04\ndata = np.ma.masked_greater(diff, max_difference)\nindex = np.where(data.mask)[0]\nresult = ds.qcfilter.add_test(\n var_name,\n index=index,\n test_meaning=f'Difference is greater than {max_difference}',\n test_assessment='Suspect',\n test_number=5,\n)\n\n# If we prefer to work with numpy arrays directly we can return the\n# data array converted to a numpy array with masked values set\n# to NaN. Here we are requesting both Suspect and Bad data be masked.\ndata = ds.qcfilter.get_masked_data(\n var_name, rm_assessments=['Suspect', 'Bad'], return_nan_array=True\n)\nprint('\\nData type =', type(data))\nprint('data =', data)\n\n# We can see how the quality control data is stored and what assessments,\n# or test descriptions are set. Some of the tests have also added attributes to\n# store the test limit values.\nqc_variable = ds[result['qc_variable_name']]\nprint('\\nQC Variable =', qc_variable)\n\n# The test numbers are not the flag_masks numbers. The flag masks numbers\n# are bit-paked numbers used to store what bit is set. To see the test\n# numbers we can unpack the bits.\nprint('\\nmask : test')\nprint('-' * 11)\nfor mask in qc_variable.attrs['flag_masks']:\n print(mask, ' : ', parse_bit(mask))\n\n# We can also just use the get_masked_data() method to get data\n# the same as using \".values\" method on the xarray dataset. If we don't\n# request any tests or assessments to mask the returned masked array\n# will not have any mask set. The returned value is a numpy masked array\n# where the raw numpy array is accessable with .data property.\ndata = ds.qcfilter.get_masked_data(var_name)\nprint('\\nNormal numpy array data values:', data.data)\nprint('Mask associated with values:', data.mask)\n\n# We can use the get_masked_data() method to return a masked array\n# where the test is set in the quality control varialbe, and use the\n# masked array method to see if any of the values have the test set.\ndata = ds.qcfilter.get_masked_data(var_name, rm_tests=3)\nprint('\\nAt least one less than test set =', data.mask.any())\ndata = ds.qcfilter.get_masked_data(var_name, rm_tests=4)\nprint('At least one difference test set =', data.mask.any())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.3" } }, "nbformat": 4, "nbformat_minor": 0 }