Source code for act.plotting.histogramdisplay

""" Module for Histogram Plotting. """

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

from ..utils import datetime_utils as dt_utils
from .plot import Display


[docs]class HistogramDisplay(Display): """ This class is used to make histogram plots. It is inherited from Display and therefore contains all of Display's attributes and methods. Examples -------- To create a TimeSeriesDisplay with 3 rows, simply do: .. code-block:: python ds = act.read_netcdf(the_file) disp = act.plotting.HistogramDisplay(ds, subplot_shape=(3,), figsize=(15, 5)) The HistogramDisplay constructor takes in the same keyword arguments as plt.subplots. For more information on the plt.subplots keyword arguments, see the `matplotlib documentation <https://matplotlib.org/api/_as_gen/matplotlib.pyplot.subplots.html>`_. If no subplot_shape is provided, then no figure or axis will be created until add_subplots or plots is called. """ def __init__(self, ds, subplot_shape=(1,), ds_name=None, **kwargs): super().__init__(ds, subplot_shape, ds_name, **kwargs)
[docs] def set_xrng(self, xrng, subplot_index=(0,)): """ Sets the x range of the plot. Parameters ---------- xrng : 2 number array The x limits of the plot. subplot_index : 1 or 2D tuple, list, or array The index of the subplot to set the x range of. """ if self.axes is None: raise RuntimeError('set_xrng requires the plot to be displayed.') if not hasattr(self, 'xrng') and len(self.axes.shape) == 2: self.xrng = np.zeros((self.axes.shape[0], self.axes.shape[1], 2), dtype='datetime64[D]') elif not hasattr(self, 'xrng') and len(self.axes.shape) == 1: self.xrng = np.zeros((self.axes.shape[0], 2), dtype='datetime64[D]') self.axes[subplot_index].set_xlim(xrng) self.xrng[subplot_index, :] = np.array(xrng)
[docs] def set_yrng(self, yrng, subplot_index=(0,)): """ Sets the y range of the plot. Parameters ---------- yrng : 2 number array The y limits of the plot. subplot_index : 1 or 2D tuple, list, or array The index of the subplot to set the x range of. """ if self.axes is None: raise RuntimeError('set_yrng requires the plot to be displayed.') if not hasattr(self, 'yrng') and len(self.axes.shape) == 2: self.yrng = np.zeros((self.axes.shape[0], self.axes.shape[1], 2)) elif not hasattr(self, 'yrng') and len(self.axes.shape) == 1: self.yrng = np.zeros((self.axes.shape[0], 2)) if yrng[0] == yrng[1]: yrng[1] = yrng[1] + 1 self.axes[subplot_index].set_ylim(yrng) self.yrng[subplot_index, :] = yrng
def _get_data(self, dsname, fields): if isinstance(fields, str): fields = [fields] return self._ds[dsname][fields].dropna('time')
[docs] def plot_stacked_bar_graph( self, field, dsname=None, bins=None, sortby_field=None, sortby_bins=None, subplot_index=(0,), set_title=None, density=False, **kwargs, ): """ This procedure will plot a stacked bar graph of a histogram. Parameters ---------- field : str The name of the field to take the histogram of. dsname : str or None The name of the datastream the field is contained in. Set to None to let ACT automatically determine this. bins : array-like or None The histogram bin boundaries to use. Set to None to use numpy's default boundaries. sortby_field : str or None Set this option to a field name in order to sort the histograms by a given field parameter. For example, one can sort histograms of CO2 concentration by temperature. sortby_bins : array-like or None The bins to sort the histograms by. subplot_index : tuple The subplot index to place the plot in set_title : str The title of the plot. density: bool Set to True to plot a p.d.f. instead of a frequency histogram. Other keyword arguments will be passed into :func:`matplotlib.pyplot.bar`. Returns ------- return_dict : dict A dictionary containing the plot axis handle, bin boundaries, and generated histogram. """ if dsname is None and len(self._ds.keys()) > 1: raise ValueError( 'You must choose a datastream when there are 2 ' + 'or more datasets in the TimeSeriesDisplay ' + 'object.' ) elif dsname is None: dsname = list(self._ds.keys())[0] if sortby_field is not None: ds = self._get_data(dsname, [field, sortby_field]) xdata, ydata = ds[field], ds[sortby_field] else: xdata = self._get_data(dsname, field)[field] if 'units' in xdata.attrs: xtitle = ''.join(['(', xdata.attrs['units'], ')']) else: xtitle = field if bins is not None and sortby_bins is None and sortby_field is not None: # We will defaut the y direction to have the same # of bins as x sortby_bins = np.linspace(ydata.values.min(), ydata.values.max(), len(bins)) # Get the current plotting axis, add day/night background and plot data if self.fig is None: self.fig = plt.figure() if self.axes is None: self.axes = np.array([plt.axes()]) self.fig.add_axes(self.axes[0]) if sortby_field is not None: if 'units' in ydata.attrs: ytitle = ''.join(['(', ydata.attrs['units'], ')']) else: ytitle = field if bins is None: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density ) else: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density, bins=[bins, sortby_bins], ) x_inds = (x_bins[:-1] + x_bins[1:]) / 2.0 self.axes[subplot_index].bar( x_inds, my_hist[:, 0].flatten(), label=(str(y_bins[0]) + ' to ' + str(y_bins[1])), **kwargs, ) for i in range(1, len(y_bins) - 1): self.axes[subplot_index].bar( x_inds, my_hist[:, i].flatten(), bottom=my_hist[:, i - 1], label=(str(y_bins[i]) + ' to ' + str(y_bins[i + 1])), **kwargs, ) self.axes[subplot_index].legend() else: if bins is None: bmin = np.nanmin(xdata) bmax = np.nanmax(xdata) bins = np.arange(bmin, bmax, (bmax - bmin) / 10.0) my_hist, bins = np.histogram(xdata.values.flatten(), bins=bins, density=density) x_inds = (bins[:-1] + bins[1:]) / 2.0 self.axes[subplot_index].bar(x_inds, my_hist) # Set Title if set_title is None: set_title = ' '.join( [ dsname, field, 'on', dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), ] ) self.axes[subplot_index].set_title(set_title) self.axes[subplot_index].set_ylabel('count') self.axes[subplot_index].set_xlabel(xtitle) return_dict = {} return_dict['plot_handle'] = self.axes[subplot_index] if 'x_bins' in locals(): return_dict['x_bins'] = x_bins return_dict['y_bins'] = y_bins else: return_dict['bins'] = bins return_dict['histogram'] = my_hist return return_dict
[docs] def plot_size_distribution( self, field, bins, time=None, dsname=None, subplot_index=(0,), set_title=None, **kwargs ): """ This procedure plots a stairstep plot of a size distribution. This is useful for plotting size distributions and waveforms. Parameters ---------- field : str The name of the field to plot the spectrum from. bins : str or array-like The name of the field that stores the bins for the spectra. time : none or datetime If None, spectra to plot will be automatically determined. Otherwise, specify this field for the time period to plot. dsname : str The name of the Dataset to plot. Set to None to have ACT automatically determine this. subplot_index : tuple The subplot index to place the plot in. set_title : str or None Use this to set the title. Additional keyword arguments will be passed into :func:`matplotlib.pyplot.step` Returns ------- ax : matplotlib axis handle The matplotlib axis handle referring to the plot. """ if dsname is None and len(self._ds.keys()) > 1: raise ValueError( 'You must choose a datastream when there are 2 ' + 'or more datasets in the TimeSeriesDisplay ' + 'object.' ) elif dsname is None: dsname = list(self._ds.keys())[0] xdata = self._get_data(dsname, field)[field] if isinstance(bins, str): bins = self._ds[dsname][bins] else: bins = xr.DataArray(bins) if 'units' in bins.attrs: xtitle = ''.join(['(', bins.attrs['units'], ')']) else: xtitle = 'Bin #' if 'units' in xdata.attrs: ytitle = ''.join(['(', xdata.attrs['units'], ')']) else: ytitle = field if len(xdata.dims) > 1 and time is None: raise ValueError( 'Input data has more than one dimension, ' + 'you must specify a time to plot!' ) elif len(xdata.dims) > 1: xdata = xdata.sel(time=time, method='nearest') if len(bins.dims) > 1 or len(bins.values) != len(xdata.values): raise ValueError( 'Bins must be a one dimensional field whose ' + 'length is equal to the field length!' ) # Get the current plotting axis, add day/night background and plot data if self.fig is None: self.fig = plt.figure() if self.axes is None: self.axes = np.array([plt.axes()]) self.fig.add_axes(self.axes[0]) # Set Title if set_title is None: set_title = ' '.join( [ dsname, field, 'on', dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), ] ) self.axes[subplot_index].set_title(set_title) self.axes[subplot_index].step(bins.values, xdata.values) self.axes[subplot_index].set_xlabel(xtitle) self.axes[subplot_index].set_ylabel(ytitle) return self.axes[subplot_index]
[docs] def plot_stairstep_graph( self, field, dsname=None, bins=None, sortby_field=None, sortby_bins=None, subplot_index=(0,), set_title=None, density=False, **kwargs, ): """ This procedure will plot a stairstep plot of a histogram. Parameters ---------- field : str The name of the field to take the histogram of. dsname : str or None The name of the datastream the field is contained in. Set to None to let ACT automatically determine this. bins : array-like or None The histogram bin boundaries to use. Set to None to use numpy's default boundaries. sortby_field : str or None Set this option to a field name in order to sort the histograms by a given field parameter. For example, one can sort histograms of CO2 concentration by temperature. sortby_bins : array-like or None The bins to sort the histograms by. subplot_index : tuple The subplot index to place the plot in. set_title : str The title of the plot. density : bool Set to True to plot a p.d.f. instead of a frequency histogram. Other keyword arguments will be passed into :func:`matplotlib.pyplot.step`. Returns ------- return_dict : dict A dictionary containing the plot axis handle, bin boundaries, and generated histogram. """ if dsname is None and len(self._ds.keys()) > 1: raise ValueError( 'You must choose a datastream when there are 2 ' + 'or more datasets in the TimeSeriesDisplay ' + 'object.' ) elif dsname is None: dsname = list(self._ds.keys())[0] xdata = self._get_data(dsname, field)[field] if 'units' in xdata.attrs: xtitle = ''.join(['(', xdata.attrs['units'], ')']) else: xtitle = field if sortby_field is not None: ydata = self._ds[dsname][sortby_field] if bins is not None and sortby_bins is None and sortby_field is not None: # We will defaut the y direction to have the same # of bins as x sortby_bins = np.linspace(ydata.values.min(), ydata.values.max(), len(bins)) # Get the current plotting axis, add day/night background and plot data if self.fig is None: self.fig = plt.figure() if self.axes is None: self.axes = np.array([plt.axes()]) self.fig.add_axes(self.axes[0]) if sortby_field is not None: if 'units' in ydata.attrs: ytitle = ''.join(['(', ydata.attrs['units'], ')']) else: ytitle = field if bins is None: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density ) else: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density, bins=[bins, sortby_bins], ) x_inds = (x_bins[:-1] + x_bins[1:]) / 2.0 self.axes[subplot_index].step( x_inds, my_hist[:, 0].flatten(), label=(str(y_bins[0]) + ' to ' + str(y_bins[1])), **kwargs, ) for i in range(1, len(y_bins) - 1): self.axes[subplot_index].step( x_inds, my_hist[:, i].flatten(), label=(str(y_bins[i]) + ' to ' + str(y_bins[i + 1])), **kwargs, ) self.axes[subplot_index].legend() else: my_hist, bins = np.histogram(xdata.values.flatten(), bins=bins, density=density) x_inds = (bins[:-1] + bins[1:]) / 2.0 self.axes[subplot_index].step(x_inds, my_hist, **kwargs) # Set Title if set_title is None: set_title = ' '.join( [ dsname, field, 'on', dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), ] ) self.axes[subplot_index].set_title(set_title) self.axes[subplot_index].set_ylabel('count') self.axes[subplot_index].set_xlabel(xtitle) return_dict = {} return_dict['plot_handle'] = self.axes[subplot_index] if 'x_bins' in locals(): return_dict['x_bins'] = x_bins return_dict['y_bins'] = y_bins else: return_dict['bins'] = bins return_dict['histogram'] = my_hist return return_dict
[docs] def plot_heatmap( self, x_field, y_field, dsname=None, x_bins=None, y_bins=None, subplot_index=(0,), set_title=None, density=False, set_shading='auto', **kwargs, ): """ This procedure will plot a heatmap of a histogram from 2 variables. Parameters ---------- x_field : str The name of the field to take the histogram of on the X axis. y_field : str The name of the field to take the histogram of on the Y axis. dsname : str or None The name of the datastream the field is contained in. Set to None to let ACT automatically determine this. x_bins : array-like or None The histogram bin boundaries to use for the variable on the X axis. Set to None to use numpy's default boundaries. y_bins : array-like or None The histogram bin boundaries to use for the variable on the Y axis. Set to None to use numpy's default boundaries. subplot_index : tuple The subplot index to place the plot in set_title : str The title of the plot. density : bool Set to True to plot a p.d.f. instead of a frequency histogram. set_shading : string Option to to set the matplotlib.pcolormesh shading parameter. Default to 'auto' Other keyword arguments will be passed into :func:`matplotlib.pyplot.pcolormesh`. Returns ------- return_dict : dict A dictionary containing the plot axis handle, bin boundaries, and generated histogram. """ if dsname is None and len(self._ds.keys()) > 1: raise ValueError( 'You must choose a datastream when there are 2 ' 'or more datasets in the TimeSeriesDisplay ' 'object.' ) elif dsname is None: dsname = list(self._ds.keys())[0] ds = self._get_data(dsname, [x_field, y_field]) xdata, ydata = ds[x_field], ds[y_field] if 'units' in xdata.attrs: xtitle = ''.join(['(', xdata.attrs['units'], ')']) else: xtitle = x_field if x_bins is not None and y_bins is None: # We will defaut the y direction to have the same # of bins as x y_bins = np.linspace(ydata.values.min(), ydata.values.max(), len(x_bins)) # Get the current plotting axis, add day/night background and plot data if self.fig is None: self.fig = plt.figure() if self.axes is None: self.axes = np.array([plt.axes()]) self.fig.add_axes(self.axes[0]) if 'units' in ydata.attrs: ytitle = ''.join(['(', ydata.attrs['units'], ')']) else: ytitle = y_field if x_bins is None: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density ) else: my_hist, x_bins, y_bins = np.histogram2d( xdata.values.flatten(), ydata.values.flatten(), density=density, bins=[x_bins, y_bins], ) x_inds = (x_bins[:-1] + x_bins[1:]) / 2.0 y_inds = (y_bins[:-1] + y_bins[1:]) / 2.0 xi, yi = np.meshgrid(x_inds, y_inds, indexing='ij') mesh = self.axes[subplot_index].pcolormesh(xi, yi, my_hist, shading=set_shading, **kwargs) # Set Title if set_title is None: set_title = ' '.join( [ dsname, 'on', dt_utils.numpy_to_arm_date(self._ds[dsname].time.values[0]), ] ) self.axes[subplot_index].set_title(set_title) self.axes[subplot_index].set_ylabel(ytitle) self.axes[subplot_index].set_xlabel(xtitle) self.add_colorbar(mesh, title='count', subplot_index=subplot_index) return_dict = {} return_dict['plot_handle'] = self.axes[subplot_index] return_dict['x_bins'] = x_bins return_dict['y_bins'] = y_bins return_dict['histogram'] = my_hist return return_dict