Source code for act.retrievals.sonde

"""
Functions for radiosonde related calculations.

"""

import numpy as np
import xarray as xr
from operator import itemgetter
from itertools import groupby
import metpy.calc as mpcalc
from metpy.units import units

from act.utils.data_utils import convert_to_potential_temp


[docs]def calculate_precipitable_water(ds, temp_name='tdry', rh_name='rh', pres_name='pres'):
    """

    Function to calculate precipitable water vapor from ARM sondewnpn b1 data.
    Will first calculate saturation vapor pressure of all data using Arden-Buck
    equations, then calculate specific humidity and integrate over all pressure
    levels to give us a precipitable water value in centimeters.

    ds : xarray.Dataset
        Xarray dataset as read in by the ACT netCDF reader.
    temp_name : str
        Name of temperature field to use. Defaults to 'tdry' for sondewnpn b1
        level data.
    rh_name : str
        Name of relative humidity field to use. Defaults to 'rh' for sondewnpn
        b1 level data.
    pres_name : str
        Name of atmospheric pressure field to use. Defaults to 'pres' for
        sondewnpn b1 level data.

    """
    temp = ds[temp_name].values
    rh = ds[rh_name].values
    pres = ds[pres_name].values

    # Get list of temperature values for saturation vapor pressure calc
    temperature = []
    for t in np.nditer(temp):
        temperature.append(t)

    # Apply Arden-Buck equation to get saturation vapor pressure
    sat_vap_pres = []
    for t in temperature:
        # Over liquid water, above freezing
        if t >= 0:
            sat_vap_pres.append(0.61121 * np.exp((18.678 - (t / 234.5)) * (t / (257.14 + t))))
        # Over ice, below freezing
        else:
            sat_vap_pres.append(0.61115 * np.exp((23.036 - (t / 333.7)) * (t / (279.82 + t))))

    # convert rh from % to decimal
    rel_hum = []
    for r in np.nditer(rh):
        rel_hum.append(r / 100.0)

    # get vapor pressure from rh and saturation vapor pressure
    vap_pres = []
    for i in range(0, len(sat_vap_pres)):
        es = rel_hum[i] * sat_vap_pres[i]
        vap_pres.append(es)

    # Get list of pressure values for mixing ratio calc
    pressure = []
    for p in np.nditer(pres):
        pressure.append(p)

    # Mixing ratio calc

    mix_rat = []
    for i in range(0, len(vap_pres)):
        mix_rat.append(0.622 * vap_pres[i] / (pressure[i] - vap_pres[i]))

    # Specific humidity

    spec_hum = []
    for rat in mix_rat:
        spec_hum.append(rat / (1 + rat))

    # Integrate specific humidity

    pwv = 0.0
    for i in range(1, len(pressure) - 1):
        pwv = pwv + 0.5 * (spec_hum[i] + spec_hum[i - 1]) * (pressure[i - 1] - pressure[i])

    pwv = pwv / 0.098
    return pwv


[docs]def calculate_stability_indicies(
    ds,
    temp_name='temperature',
    td_name='dewpoint_temperature',
    p_name='pressure',
    moving_ave_window=0,
):
    """
    Function for calculating stability indices from sounding data.

    Parameters
    ----------
    ds : ACT dataset
        The dataset to compute the stability indicies of. Must have
        temperature, dewpoint, and pressure in vertical coordinates.
    temp_name : str
        The name of the temperature field.
    td_name : str
        The name of the dewpoint field.
    p_name : str
        The name of the pressure field.
    moving_ave_window : int
        Number of points to do a moving average on sounding data to reduce
        noise. This is useful if noise in the sounding is preventing parcel
        ascent.

    Returns
    -------
    ds : ACT dataset
        An ACT dataset with additional stability indicies added.

    """
    t = ds[temp_name]
    td = ds[td_name]
    p = ds[p_name]

    if not hasattr(t, 'units'):
        raise AttributeError('Temperature field must have units' + ' for ACT to discern!')

    if not hasattr(td, 'units'):
        raise AttributeError('Dewpoint field must have units' + ' for ACT to discern!')

    if not hasattr(p, 'units'):
        raise AttributeError('Pressure field must have units' + ' for ACT to discern!')
    if t.units == 'C':
        t_units = units.degC
    else:
        t_units = getattr(units, t.units)

    if td.units == 'C':
        td_units = units.degC
    else:
        td_units = getattr(units, td.units)

    p_units = getattr(units, p.units)

    # Sort all values by decreasing pressure
    t_sorted = np.array(t.values)
    td_sorted = np.array(td.values)
    p_sorted = np.array(p.values)
    ind_sort = np.argsort(p_sorted)
    t_sorted = t_sorted[ind_sort[-1:0:-1]]
    td_sorted = td_sorted[ind_sort[-1:0:-1]]
    p_sorted = p_sorted[ind_sort[-1:0:-1]]

    if moving_ave_window > 0:
        t_sorted = np.convolve(t_sorted, np.ones((moving_ave_window,)) / moving_ave_window)
        td_sorted = np.convolve(td_sorted, np.ones((moving_ave_window,)) / moving_ave_window)
        p_sorted = np.convolve(p_sorted, np.ones((moving_ave_window,)) / moving_ave_window)

    t_sorted = t_sorted * t_units
    td_sorted = td_sorted * td_units
    p_sorted = p_sorted * p_units

    t_profile = mpcalc.parcel_profile(p_sorted, t_sorted[0], td_sorted[0])

    # Calculate parcel trajectory
    ds['parcel_temperature'] = t_profile.magnitude
    ds['parcel_temperature'].attrs['units'] = t_profile.units

    # Calculate CAPE, CIN, LCL
    sbcape, sbcin = mpcalc.surface_based_cape_cin(p_sorted, t_sorted, td_sorted)

    lcl = mpcalc.lcl(p_sorted[0], t_sorted[0], td_sorted[0])
    try:
        lfc = mpcalc.lfc(p_sorted[0], t_sorted[0], td_sorted[0])
    except IndexError:
        lfc = np.nan * p_sorted.units

    mucape, mucin = mpcalc.most_unstable_cape_cin(p_sorted, t_sorted, td_sorted)

    where_500 = np.argmin(np.abs(p_sorted - 500 * units.hPa))
    li = t_sorted[where_500] - t_profile[where_500]

    ds['surface_based_cape'] = sbcape.magnitude
    ds['surface_based_cape'].attrs['units'] = 'J/kg'
    ds['surface_based_cape'].attrs['long_name'] = 'Surface-based CAPE'
    ds['surface_based_cin'] = sbcin.magnitude
    ds['surface_based_cin'].attrs['units'] = 'J/kg'
    ds['surface_based_cin'].attrs['long_name'] = 'Surface-based CIN'
    ds['most_unstable_cape'] = mucape.magnitude
    ds['most_unstable_cape'].attrs['units'] = 'J/kg'
    ds['most_unstable_cape'].attrs['long_name'] = 'Most unstable CAPE'
    ds['most_unstable_cin'] = mucin.magnitude
    ds['most_unstable_cin'].attrs['units'] = 'J/kg'
    ds['most_unstable_cin'].attrs['long_name'] = 'Most unstable CIN'
    ds['lifted_index'] = li.magnitude
    ds['lifted_index'].attrs['units'] = t_profile.units
    ds['lifted_index'].attrs['long_name'] = 'Lifted index'
    ds['level_of_free_convection'] = lfc.magnitude
    ds['level_of_free_convection'].attrs['units'] = lfc.units
    ds['level_of_free_convection'].attrs['long_name'] = 'Level of free convection'
    ds['lifted_condensation_level_temperature'] = lcl[1].magnitude
    ds['lifted_condensation_level_temperature'].attrs['units'] = lcl[1].units
    ds['lifted_condensation_level_temperature'].attrs[
        'long_name'
    ] = 'Lifted condensation level temperature'
    ds['lifted_condensation_level_pressure'] = lcl[0].magnitude
    ds['lifted_condensation_level_pressure'].attrs['units'] = lcl[0].units
    ds['lifted_condensation_level_pressure'].attrs[
        'long_name'
    ] = 'Lifted condensation level pressure'
    return ds


[docs]def calculate_pbl_liu_liang(
    ds,
    temperature='tdry',
    pressure='pres',
    windspeed='wspd',
    height='alt',
    smooth_height=3,
    land_parameter=True,
    llj_max_alt=1500.0,
    llj_max_wspd=2.0,
):
    """
    Function for calculating the PBL height from a radiosonde profile
    using the Liu-Liang 2010 technique.  There are some slight descrepencies
    in the function from the ARM implementation 1.) it imposes a 1500m (keyword)
    height on the definition of the LLJ and 2.) the interpolation is slightly different
    using python functions

    Parameters
    ----------
    ds : xarray Dataset
        Dataset housing radiosonde profile for calculations
    temperature : str
        The name of the temperature field.
    pressure : str
        The name of the pressure field.
    windspeed : str
        The name of the  wind speed field.
    height : str
        The name of the height field
    smooth_height : int
        Number of points to do a moving average on sounding height data to reduce noise
    land_parameter : boolean
        Set to True if retrievals over land or false to retrievals over water
    llj_max_alt : float
        Maximum altitude the LLJ 2 m/s difference should be checked against
    llj_max_wspd : float
        Maximum wind speed threshold to use to define LLJ

    Returns
    -------
    ds : xarray Dataset
        xarray dataset with results stored in pblht_liu_liang variable

    References
    ----------
    Liu, Shuyan, and Xin-Zhong Liang. "Observed diurnal cycle climatology of planetary
        boundary layer height." Journal of Climate 23, no. 21 (2010): 5790-5809.

    Sivaraman, C., S. McFarlane, E. Chapman, M. Jensen, T. Toto, S. Liu, and M. Fischer.
        "Planetary boundary layer (PBL) height value added product (VAP): Radiosonde retrievals."
        Department of Energy Office of Science Atmospheric Radiation Measurement (ARM) Program
        (United States) (2013).

    """

    # Preprocess the sonde data to ensure the same methods across all retrievals
    ds2 = preprocess_sonde_data(
        ds,
        temperature=temperature,
        pressure=pressure,
        height=height,
        smooth_height=smooth_height,
        base=5.0,
    )

    pres = ds2[pressure].values
    wspd = ds2[windspeed].values
    alt = ds2[height].values

    theta = ds2['potential_temperature'].values

    # Calculate the lapse rate
    theta_gradient = np.diff(theta) / np.diff(alt)

    # Calculate AGL
    if np.isnan(alt[0]):
        idx = np.where(~np.isnan(alt))[0]
        agl = alt - alt[idx[0]]
    else:
        agl = alt - alt[0]

    theta_diff = theta[4] - theta[1]
    theta_gradient = np.diff(theta) / np.diff(alt / 1000.0)

    # Set up threshold values
    if land_parameter:
        stability_thresh = 1.0  # K
        inst_thresh = 0.5  # K
        overshoot_thresh = 4.0  # K/km
    else:
        stability_thresh = 0.2  # K
        inst_thresh = 0.1  # K
        overshoot_thresh = 0.5  # K/km

    # Check Regimes
    if theta_diff < 0 - stability_thresh:
        regime = 'CBL'
    if theta_diff > abs(stability_thresh):
        regime = 'SBL'
    if (0 - stability_thresh) <= theta_diff <= abs(stability_thresh):
        regime = 'NRL'

    # Calculate for CBL/NRL regimes
    pbl_stable = np.nan
    pbl_shear = np.nan

    if regime == 'CBL' or regime == 'NRL':
        # Calculate gradient from first level
        theta_gradient_0 = theta - theta[0]

        # Only process data above 150m ARM
        idx = np.where(agl > 150)[0][0]
        theta_gradient_0[0:idx] = np.nan

        # Scan upward to find lowest level that meets condition
        idx = np.where(theta_gradient_0 >= inst_thresh)[0]
        theta_gradient[0 : idx[0]] = np.nan

        # Scan upward from previous level to search for overlying inversion layer
        idx = np.where(theta_gradient >= overshoot_thresh)[0]
        pbl = alt[idx[0]]
    else:
        idx = np.array(
            [
                i
                for i, t in enumerate(theta_gradient[1:-1])
                if theta_gradient[i] < theta_gradient[i - 1]
                and theta_gradient[i] < theta_gradient[i + 1]
            ]
        )

        for i in idx:
            cond1 = (theta_gradient[i] - theta_gradient[i - 1]) < -40.0
            cond2 = (theta_gradient[i + 1] < overshoot_thresh) or (
                theta_gradient[i + 2] < overshoot_thresh
            )
            if cond1 or cond2:
                # This gets the ARM answer
                pbl_stable = (alt[i + 1] + alt[i]) / 2.0
                # pbl_stable = alt[i]
                break

        # Check for low-level jet
        # Find the height of the maximum windspeed and look up to find layer 2m/s lower
        # Stull 1988 indicates LLJ is defined as where there is a relative wind speed
        # maximum that is more than 2 m/s faster than the wind speeds above it within
        # the lowest 1500m of the atmosphere. Keywords to adjust are provided
        idh = np.where(alt <= llj_max_alt)[0]
        max_wspd_ind = [i for i, w in enumerate(wspd[:-1]) if wspd[i] > wspd[i + 1]][0]
        diff = wspd[max_wspd_ind] - wspd[max_wspd_ind : idh[-1]]
        idx = np.where(diff > llj_max_wspd)[0]
        if len(idx) > 0:
            wspd_to_surf = np.diff(np.flip(wspd[0:max_wspd_ind]))
            wspd_monotonic = np.all(wspd_to_surf <= 0.0)
            if wspd_monotonic:
                pbl_shear = alt[max_wspd_ind]

        if ~np.all(np.isnan([pbl_stable, pbl_shear])):
            pbl = np.nanmin([pbl_stable, pbl_shear])
        else:
            pbl = -9999.0

    atts = {'units': 'm', 'long_name': 'Planteary Boundary Layer Height Liu-Liang'}
    da = xr.DataArray(pbl, attrs=atts)
    ds['pblht_liu_liang'] = da

    atts = {
        'units': '',
        'long_name': 'Planteary Boundary Layer Regime Classification Liu-Liang',
    }
    da = xr.DataArray(regime, attrs=atts)
    ds['pblht_regime_liu_liang'] = da

    atts = {'units': 'mb', 'long_name': 'Gridded pressure'}
    da = xr.DataArray(pres, coords={'atm_pres_ss': pres}, dims=['atm_pres_ss'], attrs=atts)
    ds['atm_pres_ss'] = da

    atts = {'units': 'K', 'long_name': 'Gridded potential temperature'}
    da = xr.DataArray(theta, coords={'atm_pres_ss': pres}, dims=['atm_pres_ss'], attrs=atts)
    ds['potential_temperature_ss'] = da

    atts = {'units': 'm', 'long_name': 'Gridded altitude'}
    da = xr.DataArray(alt, coords={'atm_pres_ss': pres}, dims=['atm_pres_ss'], attrs=atts)
    ds['alt_ss'] = da

    atts = {'units': 'm', 'long_name': 'PBL Stable Condition 1'}
    da = xr.DataArray(pbl_stable, attrs=atts)
    ds['pblht_liu_liang_stable_cond'] = da

    atts = {'units': 'm', 'long_name': 'PBL Shear Condition 2'}
    da = xr.DataArray(pbl_shear, attrs=atts)
    ds['pblht_liu_liang_shear_cond'] = da

    return ds


[docs]def calculate_pbl_heffter(
    ds,
    temperature='tdry',
    pressure='pres',
    height='alt',
    smooth_height=3,
    base=5.0,
):
    """
    Function for calculating the PBL height from a radiosonde profile
    using the Heffter technique. There are differences from the ARM
    VAP at times due to different averaging schemes.  Larger differences
    do occur at times and are unknown as to the cause but it is being
    investigated and is potential a code issue with the VAP.

    Parameters
    ----------
    ds : xarray Dataset
        Dataset housing radiosonde profile for calculations
    temperature : str
        The name of the temperature field.
    pressure : str
        The name of the pressure field.
    height : str
        The name of the height field
    smooth_height : int
        Number of points to do a moving average on sounding height data to reduce noise
    base : int
        Interval for pressure gridding.  In testing, 5 mb was found to produce results with
        the lowest RMS

    Returns
    -------
    ds : xarray Dataset
        xarray dataset with results stored in pblht_liu_liang variable

    References
    ----------
    Heffter JL. 1980. “Transport Layer Depth Calculations.” Second Joint Conference on
        Applications of Air Pollution Meteorology, New Orleans, Louisiana.

    Sivaraman, C., S. McFarlane, E. Chapman, M. Jensen, T. Toto, S. Liu, and M. Fischer.
        "Planetary boundary layer (PBL) height value added product (VAP): Radiosonde retrievals."
        Department of Energy Office of Science Atmospheric Radiation Measurement (ARM) Program
        (United States) (2013).

    """

    # Preprocess the sonde data to ensure the same methods across all retrievals
    ds2 = preprocess_sonde_data(
        ds,
        temperature=temperature,
        pressure=pressure,
        height=height,
        smooth_height=smooth_height,
        base=base,
    )

    # Get data
    pres = ds2[pressure].values
    alt = ds2[height].values
    theta = ds2['potential_temperature'].values

    # Calculate the lapse rate
    theta_gradient = np.diff(theta) / np.diff(alt)

    # Calculate AGL
    if np.isnan(alt[0]):
        idx = np.where(~np.isnan(alt))[0]
        agl = alt - alt[idx[0]]
    else:
        agl = alt - alt[0]

    # Find where the lapse rate is greater than 0.005 K/m
    idx = np.where(theta_gradient >= 0.005)[0]

    # Find the consistent layers by grouping the indices together
    # Does not include a single height as a layer
    ranges = []
    for key, group in groupby(enumerate(idx), lambda i: i[0] - i[1]):
        group = list(map(itemgetter(1), group))
        if group[-1] - group[0] > 0:
            ranges.append((group[0], group[-1]))

    # Subset ranges to lowest 5
    if len(ranges) > 5:
        ranges = ranges[0:5]

    # For each layer, calculate the difference in theta from
    # top and bottom of the layer.  The lowest layer where the
    # difference is > 2 K is set as the PBL.
    pbl = 0.0
    theta_diff_layer = []
    bottom_inversion = []
    top_inversion = []
    for r in ranges:
        if agl[r[1]] > 4000.0:
            continue
        theta_diff = theta[r[1]] - theta[r[0]]
        theta_diff_layer.append(theta_diff)
        bottom_inversion.append(alt[r[0]])
        top_inversion.append(alt[r[1]])
        if pbl == 0.0 and theta_diff > 2.0:
            pbl = alt[r[0]]

    if len(theta_diff_layer) == 0:
        pbl = -9999.0

    # If PBL is not set, set it to the layer with the max theta diff
    if pbl == 0.0:
        idx = np.argmax(theta_diff_layer)
        pbl = bottom_inversion[idx]

    # Add variables to the dataset
    atts = {'units': 'm', 'long_name': 'Planteary Boundary Layer Height Heffter'}
    da = xr.DataArray(pbl, attrs=atts)
    ds['pblht_heffter'] = da

    atts = {'units': 'mb', 'long_name': 'Gridded pressure'}
    da = xr.DataArray(pres, coords={'atm_pres_ss': pres}, dims=['atm_pres_ss'], attrs=atts)
    ds['atm_pres_ss'] = da

    atts = {'units': 'K', 'long_name': 'Gridded potential temperature'}
    da = xr.DataArray(theta, coords={'atm_pres_ss': pres}, dims=['atm_pres_ss'], attrs=atts)
    ds['potential_temperature_ss'] = da

    atts = {'units': 'm', 'long_name': 'Gridded altitude'}
    da = xr.DataArray(alt, coords={'atm_pres_ss': pres}, dims=['atm_pres_ss'], attrs=atts)
    ds['alt_ss'] = da

    atts = {'units': 'm', 'long_name': 'Bottom height of inversion layers'}
    da = xr.DataArray(
        bottom_inversion,
        coords={'layers': list(range(len(bottom_inversion)))},
        dims=['layers'],
        attrs=atts,
    )
    ds['bottom_inversion'] = da

    atts = {'units': 'm', 'long_name': 'Top height of inversion layers'}
    da = xr.DataArray(
        top_inversion,
        coords={'layers': list(range(len(top_inversion)))},
        dims=['layers'],
        attrs=atts,
    )
    ds['top_inversion'] = da

    return ds


def preprocess_sonde_data(
    ds,
    temperature='tdry',
    pressure='pres',
    height='alt',
    smooth_height=3,
    base=5.0,
):
    """
    Function for processing the SONDE data for the PBL calculations.
    This is to ensure consistency and also applies some QC to the
    processing.

    Parameters
    ----------
    ds : xarray Dataset
        Dataset housing radiosonde profile for calculations
    temperature : str
        The name of the temperature field.
    pressure : str
        The name of the pressure field.
    height : str
        The name of the height field
    smooth_height : int
        Number of points to do a moving average on sounding height data to reduce noise
    base : int
        Interval for pressure gridding.  In testing, 5 mb was found to produce results with
        the lowest RMS

    Returns
    -------
    ds : xarray.Dataset
        Xarray dataset containing processed sonde data.

    References
    ----------
    Sivaraman, C., S. McFarlane, E. Chapman, M. Jensen, T. Toto, S. Liu, and M. Fischer.
        "Planetary boundary layer (PBL) height value added product (VAP): Radiosonde retrievals."
        Department of Energy Office of Science Atmospheric Radiation Measurement (ARM) Program
        (United States) (2013).

    """

    # Get the initial time and temp values
    time_0 = ds['time'].values
    temp_0 = ds[temperature].values

    # Apply a rolling average to smooth the pressure out
    ds[pressure] = ds[pressure].rolling(time=smooth_height, min_periods=1, center=True).mean()

    # Swap time and pressure for doing the appropriate gridding
    ds2 = ds.swap_dims(dims_dict={'time': pressure})
    for var in ds2:
        ds2[var].attrs = ds2[var].attrs

    # Set up the pressure grid
    starting_pres = base * np.ceil(float(ds2[pressure].values[2]) / base)
    p_grid = np.flip(np.arange(100.0, starting_pres + base, base))

    # Pull out the data that's nearest to the pressure grid.  If it errors
    # it will smooth the data more.  This tends to happen if there are multiple
    # values of pressure that are the same
    try:
        ds2 = ds2.sel({pressure: p_grid}, method='nearest')
    except Exception:
        ds[pressure] = (
            ds[pressure].rolling(time=smooth_height + 4, min_periods=2, center=True).mean()
        )
        ds2 = ds.swap_dims(dims_dict={'time': pressure})
        for var in ds2:
            ds2[var].attrs = ds[var].attrs
        try:
            ds2 = ds2.sel({pressure: p_grid}, method='nearest')
        except Exception:
            raise ValueError('Sonde profile does not have unique pressures after smoothing')

    # Get data
    alt = ds2[height].values
    pres = ds2[pressure].values
    temp = ds2[temperature].values

    # Perform Pre-processing checks
    if len(temp) == 0.0:
        raise ValueError('No data in profile')

    if np.nanmax(alt) < 1000.0:
        raise ValueError('Max altitude < 1000m')

    if np.nanmax(pres) <= 200.0:
        raise ValueError('Max pressure <= 200 hPa')

    # Check temperature delta
    t1 = time_0[0]
    t2 = t1 + np.timedelta64(10, 's')
    idx = np.where((time_0 >= t1) & (time_0 <= t2))[0]
    t_delta = abs(temp_0[idx[-1]] - temp_0[idx[0]])
    if t_delta > 30.0:
        raise ValueError('Temperature changes by >30º in first 10 seconds')

    # Check min/max
    if np.nanmax(temp) > 50.0 or np.nanmin(temp) < -90:
        raise ValueError('Temperature outside acceptable range (-90, 50)')

    if np.isnan(pres[0]) or np.isnan(pres[1]):
        raise ValueError('First two pressure values bad')

    # Calculate potential temperature and subsequent gradients
    theta = (
        convert_to_potential_temp(ds=ds2, temp_var_name=temperature, press_var_name=pressure)
        + 273.15
    )

    # Set variables to return
    atts = {'units': 'K', 'long_name': 'Potential temperature'}
    da = xr.DataArray(theta, coords=ds2['tdry'].coords, dims=ds2[temperature].dims, attrs=atts)
    ds2['potential_temperature'] = da

    return ds2