Source code for pyart.correct.region_dealias

"""
Region based dealiasing using a dynamic network reduction for region joining.

"""

import warnings

import numpy as np
import scipy.ndimage as ndimage

from scipy.optimize import fmin_l_bfgs_b

from ..config import get_metadata, get_fillvalue
from ._common_dealias import _parse_fields, _parse_gatefilter, _set_limits
from ._common_dealias import _parse_rays_wrap_around, _parse_nyquist_vel
from ._fast_edge_finder import _fast_edge_finder


# Possible future improvements to the region based dealiasing algorithm:
#
# * Find connected regions in the entire volume (3D) an dealias the entire
#   volume as apposed to the current sweep-by-sweep approach.
# * Scale the Doppler velocities by the Nyquist interval so that
#   calculations are done in units of percentage of the Nyquist interval.
#   This scaling is already done in the _EdgeTracker object but could also be
#   applied before finding regions of similar velocities.
# * Currently no attmpt is made to unfold gates which are included in the
#   gatefilter.  One could add a second pass to the algorithms where gates
#   excluded by the filter are unfolded using a more elemenary approach.  In
#   this manner the filter could define only "good" gates which establish the
#   folding pattern which is then applied to all gates.
# * Improve perfornace by implementing a priority queue in the _EdgeTracker
#   object. See comments in the class for details.


[docs]def dealias_region_based( radar, ref_vel_field=None, interval_splits=3, interval_limits=None, skip_between_rays=100, skip_along_ray=100, centered=True, nyquist_vel=None, check_nyquist_uniform=True, gatefilter=False, rays_wrap_around=None, keep_original=False, set_limits=True, vel_field=None, corr_vel_field=None, **kwargs): """ Dealias Doppler velocities using a region based algorithm. Performs Doppler velocity dealiasing by finding regions of similar velocities and unfolding and merging pairs of regions until all regions are unfolded. Unfolding and merging regions is accomplished by modeling the problem as a dynamic network reduction. Parameters ---------- radar : Radar Radar object containing Doppler velocities to dealias. ref_vel_field : str or None, optional Field in radar containing a reference velocity field used to anchor the unfolded velocities once the algorithm completes. Typically this field is created by simulating the radial velocities from wind data from an atmospheric sonding using :py:func:`pyart.util.simulated_vel_from_profile`. interval_splits : int, optional Number of segments to split the nyquist interval into when finding regions of similar velocity. More splits creates a larger number of initial regions which takes longer to process but may result in better dealiasing. The default value of 3 seems to be a good compromise between performance and artifact free dealiasing. This value is not used if the interval_limits parameter is not None. interval_limits : array like or None, optional Velocity limits used for finding regions of similar velocity. Should cover the entire nyquist interval. None, the default value, will split the Nyquist interval into interval_splits equal sized intervals. skip_between_rays, skip_along_ray : int, optional Maximum number of filtered gates to skip over when joining regions, gaps between region larger than this will not be connected. Parameters specify the maximum number of filtered gates between and along a ray. Set these parameters to 0 to disable unfolding across filtered gates. centered : bool, optional True to apply centering to each sweep after the dealiasing algorithm so that the average number of unfolding is near 0. False does not apply centering which may results in individual sweeps under or over folded by the nyquist interval. nyquist_velocity : array like or float, optional Nyquist velocity in unit identical to those stored in the radar's velocity field, either for each sweep or a single value which will be used for all sweeps. None will attempt to determine this value from the Radar object. check_nyquist_uniform : bool, optional True to check if the Nyquist velocities are uniform for all rays within a sweep, False will skip this check. This parameter is ignored when the nyquist_velocity parameter is not None. gatefilter : GateFilter, None or False, optional. A GateFilter instance which specified which gates should be ignored when performing de-aliasing. A value of None created this filter from the radar moments using any additional arguments by passing them to :py:func:`moment_based_gate_filter`. False, the default, disables filtering including all gates in the dealiasing. rays_wrap_around : bool or None, optional True when the rays at the beginning of the sweep and end of the sweep should be interpreted as connected when de-aliasing (PPI scans). False if they edges should not be interpreted as connected (other scan types). None will determine the correct value from the radar scan type. keep_original : bool, optional True to retain the original Doppler velocity values at gates where the dealiasing procedure fails or was not applied. False does not replacement and these gates will be masked in the corrected velocity field. set_limits : bool, optional True to set valid_min and valid_max elements in the returned dictionary. False will not set these dictionary elements. vel_field : str, optional Field in radar to use as the Doppler velocities during dealiasing. None will use the default field name from the Py-ART configuration file. corr_vel_field : str, optional Name to use for the dealiased Doppler velocity field metadata. None will use the default field name from the Py-ART configuration file. Returns ------- corr_vel : dict Field dictionary containing dealiased Doppler velocities. Dealiased array is stored under the 'data' key. """ # parse function parameters vel_field, corr_vel_field = _parse_fields(vel_field, corr_vel_field) gatefilter = _parse_gatefilter(gatefilter, radar, **kwargs) rays_wrap_around = _parse_rays_wrap_around(rays_wrap_around, radar) nyquist_vel = _parse_nyquist_vel(nyquist_vel, radar, check_nyquist_uniform) # parse ref_vel_field if ref_vel_field is None: ref_vdata = None else: ref_vdata = radar.fields[ref_vel_field]['data'] # exclude masked and invalid velocity gates gatefilter.exclude_masked(vel_field) gatefilter.exclude_invalid(vel_field) gfilter = gatefilter.gate_excluded # perform dealiasing vdata = radar.fields[vel_field]['data'].view(np.ndarray) data = vdata.copy() # dealiased velocities # loop over sweeps for nsweep, sweep_slice in enumerate(radar.iter_slice()): # extract sweep data sdata = vdata[sweep_slice].copy() # is a copy needed here? scorr = data[sweep_slice] sfilter = gfilter[sweep_slice] # find nyquist velocity and interval segmentation limits nyquist_interval = nyquist_vel[nsweep] * 2. if interval_limits is None: nvel = nyquist_vel[nsweep] valid_sdata = sdata[~sfilter] s_interval_limits = _find_sweep_interval_splits( nvel, interval_splits, valid_sdata, nsweep) else: s_interval_limits = interval_limits # find regions in original data labels, nfeatures = _find_regions(sdata, sfilter, s_interval_limits) # skip sweep if all gates are masked or only a single region if nfeatures < 2: continue bincount = np.bincount(labels.ravel()) num_masked_gates = bincount[0] region_sizes = bincount[1:] # find all edges between regions indices, edge_count, velos = _edge_sum_and_count( labels, num_masked_gates, sdata, rays_wrap_around, skip_between_rays, skip_along_ray) # no unfolding required if no edges exist between regions if len(edge_count) == 0: continue # find the number of folds in the regions region_tracker = _RegionTracker(region_sizes) edge_tracker = _EdgeTracker(indices, edge_count, velos, nyquist_interval, nfeatures+1) while True: if _combine_regions(region_tracker, edge_tracker): break # center sweep if requested, determine a global sweep unfold number # so that the average number of gate folds is zero. if centered: gates_dealiased = region_sizes.sum() total_folds = np.sum( region_sizes * region_tracker.unwrap_number[1:]) sweep_offset = int(round(float(total_folds) / gates_dealiased)) if sweep_offset != 0: region_tracker.unwrap_number -= sweep_offset # dealias the data using the fold numbers nwrap = np.take(region_tracker.unwrap_number, labels) scorr += nwrap * nyquist_interval # anchor unfolded velocities against reference velocity if ref_vdata is not None: sref = ref_vdata[sweep_slice] gfold = (sref-scorr).mean()/nyquist_interval gfold = round(gfold) # Anchor specific regions against reference velocity # Do this by constraining cost function due to difference # from reference velocity and to 2D continuity new_interval_limits = np.linspace(scorr.min(), scorr.max(), 10) labels_corr, nfeatures_corr = _find_regions( scorr, sfilter, new_interval_limits) # If we only have one region, just adjust the whole sweep by # x nyquist intervals if nfeatures_corr < 2: scorr = scorr + gfold * nyquist_interval else: bounds_list = [(x, y) for (x, y) in zip( -6*np.ones(nfeatures_corr), 5*np.ones(nfeatures_corr))] scorr_means = np.zeros(nfeatures_corr) sref_means = np.zeros(nfeatures_corr) for reg in range(1, nfeatures_corr+1): scorr_means[reg-1] = np.ma.mean(scorr[labels_corr == reg]) sref_means[reg-1] = np.ma.mean(sref[labels_corr == reg]) def cost_function(x): return _cost_function(x, scorr_means, sref_means, nyquist_interval, nfeatures_corr) def gradient(x): return _gradient(x, scorr_means, sref_means, nyquist_interval, nfeatures_corr) nyq_adjustments = fmin_l_bfgs_b( cost_function, gfold*np.ones((nfeatures_corr)), disp=True, fprime=gradient, bounds=bounds_list, maxiter=200, pgtol=nyquist_interval) i = 0 for reg in range(1, nfeatures_corr): scorr[labels == reg] += (nyquist_interval * np.round(nyq_adjustments[0][i])) i = i + 1 # fill_value from the velocity dictionary if present fill_value = radar.fields[vel_field].get('_FillValue', get_fillvalue()) # mask filtered gates if np.any(gfilter): data = np.ma.array(data, mask=gfilter, fill_value=fill_value) # restore original values where dealiasing not applied if keep_original: data[gfilter] = radar.fields[vel_field]['data'][gfilter] # return field dictionary containing dealiased Doppler velocities corr_vel = get_metadata(corr_vel_field) corr_vel['data'] = data corr_vel['_FillValue'] = fill_value if set_limits: # set valid_min and valid_max in corr_vel _set_limits(data, nyquist_vel, corr_vel) return corr_vel
def _find_sweep_interval_splits(nyquist, interval_splits, velocities, nsweep): """ Return the interval limits for a given sweep. """ # The Nyquist interval is split into interval_splits equal sized areas. # If velocities outside the Nyquist are present the number and # limits of the interval splits must be adjusted so that theses # velocities are included in one of the splits. add_start = add_end = 0 interval = (2. * nyquist) / (interval_splits) # no change from default if all gates filtered if len(velocities) != 0: max_vel = velocities.max() min_vel = velocities.min() if max_vel > nyquist or min_vel < -nyquist: msg = ("Velocities outside of the Nyquist interval found in " "sweep %i." % (nsweep)) warnings.warn(msg, UserWarning) # additional intervals must be added to capture the velocities # outside the nyquist limits add_start = int(np.ceil((max_vel - nyquist) / (interval))) add_end = int(np.ceil(-(min_vel + nyquist) / (interval))) start = -nyquist - add_start * interval end = nyquist + add_end * interval num = interval_splits + 1 + add_start + add_end return np.linspace(start, end, num, endpoint=True) def _find_regions(vel, gfilter, limits): """ Find regions of similar velocity. For each pair of values in the limits array (or list) find all connected velocity regions within these limits. Parameters ---------- vel : 2D ndarray Array containing velocity data for a single sweep. gfilter : 2D ndarray Filter indicating if a particular gate should be masked. True indicates the gate should be masked (excluded). limits : array like Velocity limits for region finding. For each pair of limits, taken from elements i and i+1 of the array, all connected regions with velocities within these limits will be found. Returns ------- label : ndarray Interger array with each region labeled by a value. The array ranges from 0 to nfeatures, inclusive, where a value of 0 indicates masked gates and non-zero indicates a region of connected gates. nfeatures : int Number of regions found. """ mask = ~gfilter label = np.zeros(vel.shape, dtype=np.int32) nfeatures = 0 for lmin, lmax in zip(limits[:-1], limits[1:]): # find connected regions within the limits inp = (lmin <= vel) & (vel < lmax) & mask limit_label, limit_nfeatures = ndimage.label(inp) # add these regions to the global regions limit_label[np.nonzero(limit_label)] += nfeatures label += limit_label nfeatures += limit_nfeatures return label, nfeatures def _edge_sum_and_count(labels, num_masked_gates, data, rays_wrap_around, max_gap_x, max_gap_y): """ Find all edges between labels regions. Returns the indices, count and velocities of all edges. """ total_nodes = labels.shape[0] * labels.shape[1] - num_masked_gates if rays_wrap_around: total_nodes += labels.shape[0] * 2 indices, velocities = _fast_edge_finder( labels.astype('int32'), data.astype('float32'), rays_wrap_around, max_gap_x, max_gap_y, total_nodes) index1, index2 = indices vel1, vel2 = velocities count = np.ones_like(vel1, dtype=np.int32) # return early if not edges were found if len(vel1) == 0: return ([], []), [], ([], []) # find the unique edges, procedure based on method in # scipy.sparse.coo_matrix.sum_duplicates # except we have three data arrays, vel1, vel2, and count order = np.lexsort((index1, index2)) index1 = index1[order] index2 = index2[order] vel1 = vel1[order] vel2 = vel2[order] count = count[order] unique_mask = ((index1[1:] != index1[:-1]) | (index2[1:] != index2[:-1])) unique_mask = np.append(True, unique_mask) index1 = index1[unique_mask] index2 = index2[unique_mask] unique_inds, = np.nonzero(unique_mask) vel1 = np.add.reduceat(vel1, unique_inds, dtype=vel1.dtype) vel2 = np.add.reduceat(vel2, unique_inds, dtype=vel2.dtype) count = np.add.reduceat(count, unique_inds, dtype=count.dtype) return (index1, index2), count, (vel1, vel2) def _combine_regions(region_tracker, edge_tracker): """ Returns True when done. """ # Edge parameters from edge with largest weight status, extra = edge_tracker.pop_edge() if status: return True node1, node2, weight, diff, edge_number = extra rdiff = int(np.round(diff)) # node sizes of nodes to be merged node1_size = region_tracker.get_node_size(node1) node2_size = region_tracker.get_node_size(node2) # determine which nodes should be merged if node1_size > node2_size: base_node, merge_node = node1, node2 else: base_node, merge_node = node2, node1 rdiff = -rdiff # unwrap merge_node if rdiff != 0: region_tracker.unwrap_node(merge_node, rdiff) edge_tracker.unwrap_node(merge_node, rdiff) # merge nodes region_tracker.merge_nodes(base_node, merge_node) edge_tracker.merge_nodes(base_node, merge_node, edge_number) return False # Minimize cost function that is sum of difference between regions and # sounding def _cost_function(nyq_vector, vels_slice_means, svels_slice_means, v_nyq_vel, nfeatures): """ Cost function for minimization in region based algorithm. """ cost = 0 i = 0 for reg in range(nfeatures): add_value = 0 # Deviance from sounding add_value = (vels_slice_means[reg] + np.round(nyq_vector[i]) * v_nyq_vel - svels_slice_means[reg])**2 if np.isfinite(add_value): cost += add_value i = i + 1 return cost def _gradient(nyq_vector, vels_slice_means, svels_slice_means, v_nyq_vel, nfeatures): """ Gradient of cost function for minimization in region based algorithm. """ gradient_vector = np.zeros(len(nyq_vector)) i = 0 for reg in range(nfeatures): add_value = (vels_slice_means[reg] + np.round(nyq_vector[i])*v_nyq_vel - svels_slice_means[reg]) if np.isfinite(add_value): gradient_vector[i] = 2*add_value*v_nyq_vel # Regional continuity vels_without_cur = np.delete(vels_slice_means, reg) diffs = np.square(vels_slice_means[reg] - vels_without_cur) if len(diffs) > 0: the_min = np.argmin(diffs) vel_wo_cur = vels_without_cur[the_min] else: vel_wo_cur = vels_slice_means[reg] if the_min < v_nyq_vel: gradient_vector[i] = 0 i = i + 1 return gradient_vector class _RegionTracker(object): """ Tracks the location of radar volume regions contained in each node as the network is reduced. """ def __init__(self, region_sizes): """ initalize. """ # number of gates in each node nregions = len(region_sizes) + 1 self.node_size = np.zeros(nregions, dtype='int32') self.node_size[1:] = region_sizes[:] # array of lists containing the regions in each node self.regions_in_node = np.zeros(nregions, dtype='object') for i in range(nregions): self.regions_in_node[i] = [i] # number of unwrappings to apply to dealias each region self.unwrap_number = np.zeros(nregions, dtype='int32') def merge_nodes(self, node_a, node_b): """ Merge node b into node a. """ # move all regions from node_b to node_a regions_to_merge = self.regions_in_node[node_b] self.regions_in_node[node_a].extend(regions_to_merge) self.regions_in_node[node_b] = [] # update node sizes self.node_size[node_a] += self.node_size[node_b] self.node_size[node_b] = 0 return def unwrap_node(self, node, nwrap): """ Unwrap all gates contained a node. """ if nwrap == 0: return # for each region in node add nwrap regions_to_unwrap = self.regions_in_node[node] self.unwrap_number[regions_to_unwrap] += nwrap return def get_node_size(self, node): """ Return the number of gates in a node. """ return self.node_size[node] class _EdgeTracker(object): """ A class for tracking edges in a dynamic network. """ def __init__(self, indices, edge_count, velocities, nyquist_interval, nnodes): """ initialize """ nedges = int(len(indices[0]) / 2) # node number and different in sum for each edge self.node_alpha = np.zeros(nedges, dtype=np.int32) self.node_beta = np.zeros(nedges, dtype=np.int32) self.sum_diff = np.zeros(nedges, dtype=np.float32) # number of connections between the regions self.weight = np.zeros(nedges, dtype=np.int32) # fast finding self._common_finder = np.zeros(nnodes, dtype=np.bool_) self._common_index = np.zeros(nnodes, dtype=np.int32) self._last_base_node = -1 # array of linked lists pointing to each node self.edges_in_node = np.zeros(nnodes, dtype='object') for i in range(nnodes): self.edges_in_node[i] = [] # fill out data from the provides indicies, edge counts and velocities edge = 0 idx1, idx2 = indices vel1, vel2 = velocities for i, j, count, vel, nvel in zip(idx1, idx2, edge_count, vel1, vel2): if i < j: continue self.node_alpha[edge] = i self.node_beta[edge] = j self.sum_diff[edge] = ((vel - nvel) / nyquist_interval) self.weight[edge] = count self.edges_in_node[i].append(edge) self.edges_in_node[j].append(edge) edge += 1 # list which orders edges according to their weight, highest first # TODO self.priority_queue = [] def merge_nodes(self, base_node, merge_node, foo_edge): """ Merge nodes. """ # remove edge between base and merge nodes self.weight[foo_edge] = -999 self.edges_in_node[merge_node].remove(foo_edge) self.edges_in_node[base_node].remove(foo_edge) self._common_finder[merge_node] = False # find all the edges in the two nodes edges_in_merge = list(self.edges_in_node[merge_node]) # loop over base_node edges if last base_node was different if self._last_base_node != base_node: self._common_finder[:] = False edges_in_base = list(self.edges_in_node[base_node]) for edge_num in edges_in_base: # reverse edge if needed so node_alpha is base_node if self.node_beta[edge_num] == base_node: self._reverse_edge_direction(edge_num) assert self.node_alpha[edge_num] == base_node # find all neighboring nodes to base_node neighbor = self.node_beta[edge_num] self._common_finder[neighbor] = True self._common_index[neighbor] = edge_num # loop over edge nodes for edge_num in edges_in_merge: # reverse edge so that node alpha is the merge_node if self.node_beta[edge_num] == merge_node: self._reverse_edge_direction(edge_num) assert self.node_alpha[edge_num] == merge_node # update all the edges to point to the base node self.node_alpha[edge_num] = base_node # if base_node also has an edge with the neighbor combine them neighbor = self.node_beta[edge_num] if self._common_finder[neighbor]: base_edge_num = self._common_index[neighbor] self._combine_edges(base_edge_num, edge_num, merge_node, neighbor) # if not fill in _common_ arrays. else: self._common_finder[neighbor] = True self._common_index[neighbor] = edge_num # move all edges from merge_node to base_node edges = self.edges_in_node[merge_node] self.edges_in_node[base_node].extend(edges) self.edges_in_node[merge_node] = [] self._last_base_node = int(base_node) return def _combine_edges(self, base_edge, merge_edge, merge_node, neighbor_node): """ Combine edges into a single edge. """ # Merging nodes MUST be set to alpha prior to calling this function # combine edge weights self.weight[base_edge] += self.weight[merge_edge] self.weight[merge_edge] = -999. # combine sums self.sum_diff[base_edge] += self.sum_diff[merge_edge] # remove merge_edge from both node lists self.edges_in_node[merge_node].remove(merge_edge) self.edges_in_node[neighbor_node].remove(merge_edge) # TODO priority queue # remove merge_edge from edge priority queue # self.priority_queue.remove(merge_edge) # relocate base_edge in priority queue # self.priority_queue.sort() def _reverse_edge_direction(self, edge): """ Reverse an edges direction, change alpha and beta. """ # swap nodes old_alpha = int(self.node_alpha[edge]) old_beta = int(self.node_beta[edge]) self.node_alpha[edge] = old_beta self.node_beta[edge] = old_alpha # swap sums self.sum_diff[edge] = -1. * self.sum_diff[edge] return def unwrap_node(self, node, nwrap): """ Unwrap a node. """ if nwrap == 0: return # add weight * nwrap to each edge in node for edge in self.edges_in_node[node]: weight = self.weight[edge] if node == self.node_alpha[edge]: self.sum_diff[edge] += weight * nwrap else: assert self.node_beta[edge] == node self.sum_diff[edge] += -weight * nwrap return def pop_edge(self): """ Pop edge with largest weight. Return node numbers and diff. """ # if len(priority_queue) == 0: # return True, None # edge_num = self.priority_queue[0] edge_num = np.argmax(self.weight) node1 = self.node_alpha[edge_num] node2 = self.node_beta[edge_num] weight = self.weight[edge_num] diff = self.sum_diff[edge_num] / (float(weight)) if weight < 0: return True, None return False, (node1, node2, weight, diff, edge_num)