{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Working with netCDF groups\n\nThis is an example about how to work with netCDF\ngroups. Xarray does not natively read netCDF group\nfiles, but it does have the ability to read the\ndata with a few independent calls.\n\nAuthor: Ken Kehoe\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\nimport numpy as np\nimport xarray as xr\nfrom arm_test_data import DATASETS\n\nfrom act.io.arm import read_arm_netcdf\nfrom act.plotting import TimeSeriesDisplay\n\n# This data file is a bit complicated with the group organization. Each group\n# will need to be treated as a different netCDF file for reading. We can read each group\n# independently and merge into a single Dataset to use with standard Xarray or ACT methods.\n\n# Top Level:\n# data\n#\n# Groups:\n# - data\n#   - light_absorption\n#     - instrument\n#   - particle_concentration\n#     - instrument\n#   - light_scattering\n#     - instrument\n\nfilename = DATASETS.fetch(\n    'ESRL-GMD-AEROSOL_v1.0_HOUR_MLO_s20200101T000000Z_e20210101T000000Z_c20210214T053835Z.nc'\n)\n\n# We start by reading the location information from the top level of the netCDF file.\n# This is a standard Xarary call without a group keyword. Only the top level data is read.\n# The returned Dataset will contain all top level variables and top level global attributes.\n# We will use the direct Xarray call to read data since there is no time dimension at the top\n# level so we don't need ACT to do anything for us.\nds_location = xr.open_mfdataset(filename)\n\n# Second, we read the 'data' group. This has a time dimension so we can use ACT to manage\n# correctly reading and formatting that information. We need to specify the 'data' group\n# to be read. This will read the data group but not the sub-group.\nds_data = read_arm_netcdf(filename, group='data')\n\n# Third, we read the 'light_scattering' sub-group. We can read sub-groups\n# by using standard Linux directory path notation. Notice the light_scattering group is a\n# sub-group to 'data'.\nds_ls = xr.open_mfdataset(filename, group='data/light_scattering')\n\n# We can also read the third group 'instrument'. This only contains a small amount of information\n# about the instrument used to collect the data for 'light_scattering' and has no\n# dimensions, only scalar values.\nds = xr.open_mfdataset(filename, group='data/light_scattering/instrument')\n\n# Since the dimensionality aligns we can merge the three Datasets into a single Dataset.\nds = xr.merge([ds_data, ds_ls, ds_location])\n\n# Since the data file contains data for a full year we can subset the Dataset to be easier\n# to work with and process faster.\nds = ds.sel(time=slice('2020-01-01T00:00:00', '2020-01-04T23:59:59'))\n\n# The data is written with dimensionality in reverse order from what ACT expects. We need to\n# reverse the dimensionality order.\nds = ds.transpose()\n\n# Since we want to only plot the data from one of the cut sizes we can subset the Dataset\n# to only have data where the cut_size variable is set to a single value. This will reduce\n# the dimensionality from 3 to 2 or 2 to 1 dimensions on the variables. Take note that\n# the value of cut size is not the um value, it is the index. Since the index values\n# are 0 or 1, 0 = 1 um and 1 = 10 um. We are subsetting for less than 10 um.\nds = ds.isel(cut_size=1)\n\n# netCDF has a default _FillValue that is used to indicate when the values are missing. There\n# is a known issue with float precision that does not correctly set the _FillValue to NaN\n# when reading. We need to set the obviously incorrect values to NaN for the data variables\n# that can have this problem.\nfor var_name in ds.data_vars:\n    try:\n        data = ds[var_name].values\n        data[ds[var_name].values >= 9e36] = np.nan\n        ds[var_name].values = data\n    except np._core._exceptions._UFuncNoLoopError:\n        pass\n\n# This is a querk of reading the data. If we want to plot the day/night background correctly\n# we need to delete these global attribute.\ndel ds.attrs['_file_dates']\ndel ds.attrs['_file_times']\n\n# Since the data variable has a second dimension of wavelength and we want to plot each one as a\n# line plot, we will pass force_line_plot=True to force the second dimension to be removed from\n# the time series plot. We will want to set the labels to view on the plot by extracting the values\n# from the wavelenght dimension and pass into the plotting call.\nlabels = [f\"{int(wl)} {ds['wavelength'].attrs['units']}\" for wl in ds['wavelength'].values]\n\ndisplay = TimeSeriesDisplay({'ESRL ML': ds})\ndisplay.plot(\n    'scattering_coefficient', day_night_background=True, force_line_plot=True, labels=labels\n)\nplt.show()\n\n# A second option is to extract the wavelength dimension form the variable and create a new variable\n# to plot. We are selecting the wavelength by using index so a value of 1 = 550 um. We need to\n# provide a new variable name not already in use and correctly describes the data.\nds['scattering_coefficient_450'] = ds['scattering_coefficient'].isel(indexers={'wavelength': 0})\nds['scattering_coefficient_550'] = ds['scattering_coefficient'].isel(indexers={'wavelength': 1})\nds['scattering_coefficient_700'] = ds['scattering_coefficient'].isel(indexers={'wavelength': 2})\n\ndisplay = TimeSeriesDisplay({'ESRL ML': ds})\ntitle = 'ESRL ML Scatter Coefficient from 2020-01-01 to 2020-01-04'\ndisplay.plot('scattering_coefficient_450', label=labels[0], day_night_background=True)\ndisplay.plot('scattering_coefficient_550', label=labels[1])\ndisplay.plot('scattering_coefficient_700', label=labels[2], set_title=title)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.14.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}