Source code for act.io.noaapsl

"""
Modules for reading in NOAA PSL data.
"""

import datetime as dt
import re
from datetime import datetime, timedelta
from itertools import groupby
from os import path as ospath

import fsspec
import numpy as np
import pandas as pd
import xarray as xr
import yaml

from .text import read_csv


[docs]def read_psl_wind_profiler(filepath, transpose=True): """ Returns two `xarray.Datasets` with stored data and metadata from a user-defined NOAA PSL wind profiler file each containing a different mode. This works for both 449 MHz and 915 MHz Weber Wuertz and Weber Wuertz sub-hourly files. Parameters ---------- filepath : str Name of file(s) to read. transpose : bool True to transpose the data. Return ------ mode_one_ds : xarray.Dataset Standard Xarray dataset with the first mode data. mode_two_ds : xarray.Dataset Standard Xarray dataset with the second mode data. """ # Open the file, read in the lines as a list, and return that list file = fsspec.open(filepath).open() lines = file.readlines() lines = [x.decode().rstrip()[:] for x in lines][1:] # Separate sections based on the $ separator in the file sections_of_file = (list(g) for _, g in groupby(lines, key='$'.__ne__)) # Count how many lines need to be skipped when reading into pandas start_line = 0 list_of_datasets = [] for section in sections_of_file: if section[0] != '$': list_of_datasets.append( _parse_psl_wind_lines(filepath, section, line_offset=start_line) ) start_line += len(section) # Return two datasets for each mode and the merge of datasets of the # same mode. mode_one_ds = xr.concat(list_of_datasets[0::2], dim='time') mode_two_ds = xr.concat(list_of_datasets[1::2], dim='time') if transpose: mode_one_ds = mode_one_ds.transpose('HT', 'time') mode_two_ds = mode_two_ds.transpose('HT', 'time') return mode_one_ds, mode_two_ds
[docs]def read_psl_wind_profiler_temperature(filepath, transpose=True): """ Returns `xarray.Dataset` with stored data and metadata from a user-defined NOAA PSL wind profiler temperature file. Parameters ---------- filepath : str Name of file(s) to read. transpose : bool True to transpose the data. Return ------ ds : xarray.Dataset Standard Xarray dataset with the data. """ # Open the file, read in the lines as a list, and return that list file = fsspec.open(filepath).open() lines = file.readlines() lines = [x.decode().rstrip()[:] for x in lines][1:] # Separate sections based on the $ separator in the file sections_of_file = (list(g) for _, g in groupby(lines, key='$'.__ne__)) # Count how many lines need to be skipped when reading into pandas start_line = 0 list_of_datasets = [] for section in sections_of_file: if section[0] != '$': list_of_datasets.append( _parse_psl_temperature_lines(filepath, section, line_offset=start_line) ) start_line += len(section) # Merge the resultant datasets together if transpose: return xr.concat(list_of_datasets, dim='time').transpose('HT', 'time') else: return xr.concat(list_of_datasets, dim='time')
def _parse_psl_wind_lines(filepath, lines, line_offset=0): """ Reads lines related to wind in a psl file. Parameters ---------- filepath : str Name of file(s) to read. lines : list List of strings containing the lines to parse. line_offset : int (default = 0) Offset to start reading the pandas data table. Returns ------- ds : xarray.Dataset Xarray dataset with wind data. """ # 1 - site site = lines[0] # 2 - datetype datatype, _, version = filter_list(lines[1].split(' ')) # 3 - station lat, lon, elevation latitude, longitude, elevation = filter_list(lines[2].split(' ')).astype(float) # 4 - year, month, day, hour, minute, second, utc time = parse_date_line(lines[3]) # 5 - Consensus averaging time, number of beams, number of range gates consensus_average_time, number_of_beams, number_of_range_gates = filter_list( lines[4].split(' ') ).astype(int) # 7 - number of coherent integrations, number of spectral averages, # pulse width, inner pulse period' # Values duplicate as oblique and vertical values ( number_coherent_integrations_obl, number_coherent_integrations_vert, number_spectral_averages_obl, number_spectral_averages_vert, pulse_width_obl, pulse_width_vert, inner_pulse_period_obl, inner_pulse_period_vert, ) = filter_list(lines[6].split(' ')).astype(int) # 8 - full-scale doppler value, delay to first gate, number of gates, # spacing of gates. Values duplicate as oblique and vertical values. ( full_scale_doppler_obl, full_scale_doppler_vert, beam_vertical_correction, delay_first_gate_obl, delay_first_gate_vert, number_of_gates_obl, number_of_gates_vert, spacing_of_gates_obl, spacing_of_gates_vert, ) = filter_list(lines[7].split(' ')).astype(float) # 9 - beam azimuth (degrees clockwise from north) ( beam_azimuth1, beam_elevation1, beam_azimuth2, beam_elevation2, beam_azimuth3, beam_elevation3, ) = filter_list(lines[8].split(' ')).astype(float) beam_azimuth = np.array([beam_azimuth1, beam_azimuth2, beam_azimuth3], dtype='float32') beam_elevation = np.array([beam_elevation1, beam_elevation2, beam_elevation3], dtype='float32') # Read in the data table section using pandas df = pd.read_csv(filepath, skiprows=line_offset + 10, sep=r'\s+') # Only read in the number of rows for a given set of gates df = df.iloc[: int(number_of_range_gates)] # Grab a list of valid columns, except time columns = set(list(df.columns)) - {'time'} # Set the data types to be floats df = df[list(columns)].astype(float) # Nan values are encoded as 999999 - let's reflect that df = df.replace(999999.0, np.nan) # Ensure the height array is stored as a float df['HT'] = df.HT.astype(float) # Set the height as an index df = df.set_index('HT') # Rename the count and snr columns more usefully df = df.rename( columns={ 'RAD': 'RAD1', 'RAD.1': 'RAD2', 'RAD.2': 'RAD3', 'CNT': 'CNT1', 'CNT.1': 'CNT2', 'CNT.2': 'CNT3', 'SNR': 'SNR1', 'SNR.1': 'SNR2', 'SNR.2': 'SNR3', 'QC': 'QC1', 'QC.1': 'QC2', 'QC.2': 'QC3', } ) # Convert to an xaray dataset ds = df.to_xarray() # Add attributes to variables # Height ds['HT'].attrs['long_name'] = 'height_above_ground' ds['HT'].attrs['units'] = 'km' # Add time to our dataset ds['time'] = time # Add in our additional attributes ds.attrs['site_identifier'] = site.strip() ds.attrs['data_type'] = datatype ds.attrs['latitude'] = latitude ds.attrs['longitude'] = longitude ds.attrs['elevation'] = elevation ds.attrs['beam_elevation'] = beam_elevation ds.attrs['beam_azimuth'] = beam_azimuth ds.attrs['revision_number'] = version ds.attrs[ 'data_description' ] = 'https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5855&OperationalID=2371' ds.attrs['consensus_average_time'] = consensus_average_time ds.attrs['oblique-beam_vertical_correction'] = int(beam_vertical_correction) ds.attrs['number_of_beams'] = int(number_of_beams) ds.attrs['number_of_range_gates'] = int(number_of_range_gates) # Handle oblique and vertical attributes. ds.attrs['number_of_gates_oblique'] = int(number_of_gates_obl) ds.attrs['number_of_gates_vertical'] = int(number_of_gates_vert) ds.attrs['number_spectral_averages_oblique'] = int(number_spectral_averages_obl) ds.attrs['number_spectral_averages_vertical'] = int(number_spectral_averages_vert) ds.attrs['pulse_width_oblique'] = int(pulse_width_obl) ds.attrs['pulse_width_vertical'] = int(pulse_width_vert) ds.attrs['inner_pulse_period_oblique'] = int(inner_pulse_period_obl) ds.attrs['inner_pulse_period_vertical'] = int(inner_pulse_period_vert) ds.attrs['full_scale_doppler_value_oblique'] = float(full_scale_doppler_obl) ds.attrs['full_scale_doppler_value_vertical'] = float(full_scale_doppler_vert) ds.attrs['delay_to_first_gate_oblique'] = int(delay_first_gate_obl) ds.attrs['delay_to_first_gate_vertical'] = int(delay_first_gate_vert) ds.attrs['spacing_of_gates_oblique'] = int(spacing_of_gates_obl) ds.attrs['spacing_of_gates_vertical'] = int(spacing_of_gates_vert) return ds def _parse_psl_temperature_lines(filepath, lines, line_offset=0): """ Reads lines related to temperature in a psl file. Parameters ---------- filepath : str Name of file(s) to read. lines : list List of strings containing the lines to parse. line_offset : int (default = 0) Offset to start reading the pandas data table. Returns ------- ds : xarray.Dataset Xarray dataset with temperature data. """ # 1 - site site = lines[0] # 2 - datetype datatype, _, version = filter_list(lines[1].split(' ')) # 3 - station lat, lon, elevation latitude, longitude, elevation = filter_list(lines[2].split(' ')).astype(float) # 4 - year, month, day, hour, minute, second, utc time = parse_date_line(lines[3]) # 5 - Consensus averaging time, number of beams, number of range gates. consensus_average_time, number_of_beams, number_of_range_gates = filter_list( lines[4].split(' ') ).astype(int) # 7 - number of coherent integrations, number of spectral averages, # pulse width, inner pulse period. ( number_coherent_integrations, number_spectral_averages, pulse_width, inner_pulse_period, ) = filter_list(lines[6].split(' ')).astype(int) # 8 - full-scale doppler value, delay to first gate, number of gates, # spacing of gates. full_scale_doppler, delay_first_gate, number_of_gates, spacing_of_gates = filter_list( lines[7].split(' ') ).astype(float) # 9 - beam azimuth (degrees clockwise from north) beam_azimuth, beam_elevation = filter_list(lines[8].split(' ')).astype(float) # Read in the data table section using pandas df = pd.read_csv(filepath, skiprows=line_offset + 10, sep=r'\s+') # Only read in the number of rows for a given set of gates df = df.iloc[: int(number_of_gates)] # Grab a list of valid columns, except time columns = set(list(df.columns)) - {'time'} # Set the data types to be floats df = df[list(columns)].astype(float) # Nan values are encoded as 999999 - let's reflect that df = df.replace(999999.0, np.nan) # Ensure the height array is stored as a float df['HT'] = df.HT.astype(float) # Set the height as an index df = df.set_index('HT') # Rename the count and snr columns more usefully df = df.rename( columns={ 'CNT': 'CNT_T', 'CNT.1': 'CNT_Tc', 'CNT.2': 'CNT_W', 'SNR': 'SNR_T', 'SNR.1': 'SNR_Tc', 'SNR.2': 'SNR_W', } ) # Convert to an xaray dataset ds = df.to_xarray() # Add attributes to variables # Height ds['HT'].attrs['long_name'] = 'height_above_ground' ds['HT'].attrs['units'] = 'km' # Temperature ds['T'].attrs['long_name'] = 'average_uncorrected_RASS_temperature' ds['T'].attrs['units'] = 'degC' ds['Tc'].attrs['long_name'] = 'average_corrected_RASS_temperature' ds['Tc'].attrs['units'] = 'degC' # Vertical motion (w) ds['W'].attrs['long_name'] = 'average_vertical_wind' ds['W'].attrs['units'] = 'm/s' # Add time to our dataset ds['time'] = time # Add in our additional attributes ds.attrs['site_identifier'] = site.strip() ds.attrs['data_type'] = datatype ds.attrs['latitude'] = latitude ds.attrs['longitude'] = longitude ds.attrs['elevation'] = elevation ds.attrs['beam_elevation'] = beam_elevation ds.attrs['beam_azimuth'] = beam_azimuth ds.attrs['revision_number'] = version ds.attrs[ 'data_description' ] = 'https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5855&OperationalID=2371' ds.attrs['consensus_average_time'] = consensus_average_time ds.attrs['number_of_beams'] = int(number_of_beams) ds.attrs['number_of_gates'] = int(number_of_gates) ds.attrs['number_of_range_gates'] = int(number_of_range_gates) ds.attrs['number_spectral_averages'] = int(number_spectral_averages) ds.attrs['pulse_width'] = pulse_width ds.attrs['inner_pulse_period'] = inner_pulse_period ds.attrs['full_scale_doppler_value'] = full_scale_doppler ds.attrs['spacing_of_gates'] = spacing_of_gates return ds def filter_list(list_of_strings): """ Parses a list of strings, remove empty strings, and return a numpy array """ return np.array(list(filter(None, list_of_strings))) def parse_date_line(list_of_strings): """ Parses the date line in PSL files """ year, month, day, hour, minute, second, utc_offset = filter_list( list_of_strings.split(' ') ).astype(int) year += 2000 return datetime(year, month, day, hour, minute, second)
[docs]def read_psl_surface_met(filenames, conf_file=None): """ Returns `xarray.Dataset` with stored data and metadata from a user-defined NOAA PSL SurfaceMet file. Parameters ---------- filenames : str, list of str Name of file(s) to read. conf_file : str or Pathlib.path Default to ./conf/noaapsl_SurfaceMet.yaml Filename containing relative or full path to configuration YAML file used to describe the file format for each PSL site. If the site is not defined in the default file, the default file can be copied to a local location, and the missing site added. Then point to that updated configuration file. An issue can be opened on GitHub to request the missing site to the configuration file. Return ------ ds : xarray.Dataset Standard Xarray dataset with the data """ if isinstance(filenames, str): site = ospath.basename(filenames)[:3] else: site = ospath.basename(filenames[0])[:3] if conf_file is None: conf_file = ospath.join(ospath.dirname(__file__), 'conf', 'noaapsl_SurfaceMet.yaml') # Read configuration YAML file with open(conf_file) as fp: try: result = yaml.load(fp, Loader=yaml.FullLoader) except AttributeError: result = yaml.load(fp) # Extract dictionary of just corresponding site try: result = result[site] except KeyError: raise RuntimeError( f"Configuration for site '{site}' currently not available. " 'You can manually add the site configuration to a copy of ' 'noaapsl_SurfaceMet.yaml and set conf_file= name of copied file ' 'until the site is added.' ) # Extract date and time from filename to use in extracting format from YAML file. search_result = re.match(r'[a-z]{3}(\d{2})(\d{3})\.(\d{2})m', ospath.basename(filenames[0])) yy, doy, hh = search_result.groups() if yy > '70': yy = f'19{yy}' else: yy = f'20{yy}' # Extract location information from configuration file. try: location_info = result['info'] except KeyError: location_info = None # Loop through each date range for the site to extract the correct file format from conf file. file_datetime = ( datetime.strptime(f'{yy}-01-01', '%Y-%m-%d') + timedelta(int(doy) - 1) + timedelta(hours=int(hh)) ) for ii in result.keys(): if ii == 'info': continue date_range = [ datetime.strptime(jj, '%Y-%m-%d %H:%M:%S') for jj in result[ii]['_date_range'] ] if file_datetime >= date_range[0] and file_datetime <= date_range[1]: result = result[ii] del result['_date_range'] break # Read data files by passing in column names from configuration file. ds = read_csv(filenames, column_names=list(result.keys())) # Calculate numpy datetime64 values from first 4 columns of the data file. time = np.array(ds['Year'].values - 1970, dtype='datetime64[Y]') day = np.array(np.array(ds['J_day'].values - 1, dtype='timedelta64[D]')) hourmin = ds['HoursMinutes'].values + 10000 hour = [int(str(ii)[1:3]) for ii in hourmin] hour = np.array(hour, dtype='timedelta64[h]') minute = [int(str(ii)[3:]) for ii in hourmin] minute = np.array(minute, dtype='timedelta64[m]') time = time + day + hour + minute time = time.astype('datetime64[ns]') # Update Dataset to use "time" coordinate and assigned calculated times ds = ds.assign_coords(index=time) ds = ds.rename(index='time') # Loop through configuraton dictionary and apply attributes or # perform action for specific attributes. for var_name in result: for key, value in result[var_name].items(): if key == '_delete' and value is True: del ds[var_name] continue if key == '_type': dtype = result[var_name][key] ds[var_name] = ds[var_name].astype(dtype) continue if key == '_missing_value': data_values = ds[var_name].values data_values[data_values == result[var_name][key]] = np.nan ds[var_name].values = data_values continue ds[var_name].attrs[key] = value # Add location information to Dataset if location_info is not None: ds.attrs['location_description'] = location_info['name'] for var_name in ['lat', 'lon', 'alt']: value = location_info[var_name]['value'] del location_info[var_name]['value'] ds[var_name] = xr.DataArray(data=value, attrs=location_info[var_name]) return ds
[docs]def read_psl_parsivel(files): """ Returns `xarray.Dataset` with stored data and metadata from a user-defined NOAA PSL parsivel Parameters ---------- files : str or list Name of file(s) or urls to read. Return ------ ds : xarray.Dataset Standard Xarray dataset with the data for the parsivel """ # Define the names for the variables names = [ 'time', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'B11', 'B12', 'B13', 'B14', 'B15', 'B16', 'B17', 'B18', 'B19', 'B20', 'B21', 'B22', 'B23', 'B24', 'B25', 'B26', 'B27', 'B28', 'B29', 'B30', 'B31', 'B32', 'blackout', 'good', 'bad', 'number_detected_particles', 'precip_rate', 'precip_amount', 'precip_accumulation', 'equivalent_radar_reflectivity', 'number_in_error', 'dirty', 'very_dirty', 'damaged', 'laserband_amplitude', 'laserband_amplitude_stdev', 'sensor_temperature', 'sensor_temperature_stdev', 'sensor_voltage', 'sensor_voltage_stdev', 'heating_current', 'heating_current_stdev', 'number_rain_particles', 'number_non_rain_particles', 'number_ambiguous_particles', 'precip_type', ] # Define the particle sizes and class width sizes based on # https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5890 vol_equiv_diam = [ 0.062, 0.187, 0.312, 0.437, 0.562, 0.687, 0.812, 0.937, 1.062, 1.187, 1.375, 1.625, 1.875, 2.125, 2.375, 2.75, 3.25, 3.75, 4.25, 4.75, 5.5, 6.5, 7.5, 8.5, 9.5, 11.0, 13.0, 15.0, 17.0, 19.0, 21.5, 24.5, ] class_size_width = [ 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.250, 0.250, 0.250, 0.250, 0.250, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, ] if not isinstance(files, list): files = [files] # Loop through each file or url and append the dataframe into data for concatenations data = [] end_time = [] for f in files: df = pd.read_table(f, skiprows=[0, 1, 2], names=names, index_col=0, sep=r'\s+') # Reading the table twice to get the date so it can be parsed appropriately date = pd.read_table(f, nrows=0).to_string().split(' ')[-3] time = df.index start_time = [] form = '%y%j%H:%M:%S:%f' for t in time: start_time.append(pd.to_datetime(date + ':' + t.split('-')[0], format=form)) end_time.append(pd.to_datetime(date + ':' + t.split('-')[1], format=form)) df.index = start_time data.append(df) df = pd.concat(data) # Create a 2D size distribution variable from all the B* variables dsd = [] for n in names: if 'B' not in n: continue dsd.append(list(df[n])) # Convert the dataframe to xarray DataSet and add variables ds = df.to_xarray() ds = ds.rename({'index': 'time'}) long_name = 'Drop Size Distribution' attrs = {'long_name': long_name, 'units': 'count'} da = xr.DataArray( np.transpose(dsd), dims=['time', 'particle_size'], coords=[ds['time'].values, vol_equiv_diam], ) ds['number_density_drops'] = da attrs = {'long_name': 'Particle class size average', 'units': 'mm'} da = xr.DataArray( class_size_width, dims=['particle_size'], coords=[vol_equiv_diam], attrs=attrs ) ds['class_size_width'] = da attrs = {'long_name': 'Class size width', 'units': 'mm'} da = xr.DataArray(vol_equiv_diam, dims=['particle_size'], coords=[vol_equiv_diam], attrs=attrs) ds['particle_size'] = da attrs = {'long_name': 'End time of averaging interval'} da = xr.DataArray(end_time, dims=['time'], coords=[ds['time'].values], attrs=attrs) ds['interval_end_time'] = da # Define the attribuets and metadata and add into the DataSet attrs = { 'blackout': { 'long_name': 'Number of samples excluded during PC clock sync', 'units': 'count', }, 'good': {'long_name': 'Number of samples that passed QC checks', 'units': 'count'}, 'bad': {'long_name': 'Number of samples that failed QC checks', 'units': 'count'}, 'number_detected_particles': { 'long_name': 'Total number of detected particles', 'units': 'count', }, 'precip_rate': {'long_name': 'Precipitation rate', 'units': 'mm/hr'}, 'precip_amount': {'long_name': 'Interval accumulation', 'units': 'mm'}, 'precip_accumulation': {'long_name': 'Event accumulation', 'units': 'mm'}, 'equivalent_radar_reflectivity': {'long_name': 'Radar Reflectivity', 'units': 'dB'}, 'number_in_error': { 'long_name': 'Number of samples that were reported dirt, very dirty, or damaged', 'units': 'count', }, 'dirty': { 'long_name': 'Laser glass is dirty but measurement is still possible', 'units': 'unitless', }, 'very_dirty': { 'long_name': 'Laser glass is dirty, partially covered no further measurements are possible', 'units': 'unitless', }, 'damaged': {'long_name': 'Laser damaged', 'units': 'unitless'}, 'laserband_amplitude': { 'long_name': 'Average signal amplitude of the laser strip', 'units': 'unitless', }, 'laserband_amplitude_stdev': { 'long_name': 'Standard deviation of the signal amplitude of the laser strip', 'units': 'unitless', }, 'sensor_temperature': {'long_name': 'Average sensor temperature', 'units': 'degC'}, 'sensor_temperature_stdev': { 'long_name': 'Standard deviation of sensor temperature', 'units': 'degC', }, 'sensor_voltage': {'long_name': 'Sensor power supply voltage', 'units': 'V'}, 'sensor_voltage_stdev': { 'long_name': 'Standard deviation of the sensor power supply voltage', 'units': 'V', }, 'heating_current': {'long_name': 'Average heating system current', 'units': 'A'}, 'heating_current_stdev': { 'long_name': 'Standard deviation of heating system current', 'units': 'A', }, 'number_rain_particles': { 'long_name': 'Number of particles detected as rain', 'units': 'unitless', }, 'number_non_rain_particles': { 'long_name': 'Number of particles detected not as rain', 'units': 'unitless', }, 'number_ambiguous_particles': { 'long_name': 'Number of particles detected as ambiguous', 'units': 'unitless', }, 'precip_type': { 'long_name': 'Precipitation type (1=rain; 2=mixed; 3=snow)', 'units': 'unitless', }, 'number_density_drops': {'long_name': 'Drop Size Distribution', 'units': 'count'}, } for v in ds: if v in attrs: ds[v].attrs = attrs[v] return ds
[docs]def read_psl_radar_fmcw_moment(files): """ Returns `xarray.Dataset` with stored data and metadata from NOAA PSL FMCW Radar files. See References section for details. Parameters ---------- files : str or list Name of file(s) to read. Currently does not support reading URLs but files can be downloaded easily using the act.discovery.download_noaa_psl_data function. Return ------ ds : xarray.Dataset Standard Xarray dataset with the data for the parsivel References ---------- Johnston, Paul E., James R. Jordan, Allen B. White, David A. Carter, David M. Costa, and Thomas E. Ayers. "The NOAA FM-CW snow-level radar." Journal of Atmospheric and Oceanic Technology 34, no. 2 (2017): 249-267. """ ds = _parse_psl_radar_moments(files) return ds
def read_psl_radar_sband_moment(files): """ Returns `xarray.Dataset` with stored data and metadata from NOAA PSL S-band Radar files. Parameters ---------- files : str or list Name of file(s) to read. Currently does not support reading URLs but files can be downloaded easily using the act.discovery.download_noaa_psl_data function. Return ------ ds : xarray.Dataset Standard Xarray dataset with the data for the parsivel """ ds = _parse_psl_radar_moments(files) return ds def _parse_psl_radar_moments(files): """ Returns `xarray.Dataset` with stored data and metadata from NOAA PSL FMCW and S-Band Radar files. Parameters ---------- files : str or list Name of file(s) to read. Currently does not support reading URLs but files can be downloaded easily using the act.discovery.download_noaa_psl_data function. Return ------ ds : xarray.Dataset Standard Xarray dataset with the data for the parsivel """ # Set the initial dictionary to convert to xarray dataset data = { 'site': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'NOAA site code'}}, 'lat': { 'dims': ['file'], 'data': [], 'attrs': {'long_name': 'North Latitude', 'units': 'degree_N'}, }, 'lon': { 'dims': ['file'], 'data': [], 'attrs': {'long_name': 'East Longitude', 'units': 'degree_E'}, }, 'alt': { 'dims': ['file'], 'data': [], 'attrs': {'long_name': 'Altitude above mean sea level', 'units': 'm'}, }, 'freq': { 'dims': ['file'], 'data': [], 'attrs': {'long_name': 'Operating Frequency; Ignore for FMCW', 'units': 'Hz'}, }, 'azimuth': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Azimuth angle', 'units': 'deg'}, }, 'elevation': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Elevation angle', 'units': 'deg'}, }, 'beam_direction_code': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Beam direction code', 'units': ''}, }, 'year': {'dims': ['time'], 'data': [], 'attrs': {'long_name': '2-digit year', 'units': ''}}, 'day_of_year': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Day of the year', 'units': ''}, }, 'hour': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Hour of the day', 'units': ''}, }, 'minute': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Minutes', 'units': ''}}, 'second': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Seconds', 'units': ''}}, 'interpulse_period': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Interpulse Period', 'units': 'ms'}, }, 'pulse_width': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Pulse width', 'units': 'ns'}, }, 'first_range_gate': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Range to first range gate', 'units': 'm'}, }, 'range_between_gates': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Distance between range gates', 'units': 'm'}, }, 'n_gates': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of range gates', 'units': 'count'}, }, 'n_coherent_integration': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of cohrent integration', 'units': 'count'}, }, 'n_averaged_spectra': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of average spectra', 'units': 'count'}, }, 'n_points_spectrum': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of points in spectra', 'units': 'count'}, }, 'n_code_bits': { 'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Number of code bits', 'units': 'count'}, }, 'radial_velocity': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Radial velocity', 'units': 'm/s'}, }, 'snr': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Signal-to-noise ratio - not range corrected', 'units': 'dB'}, }, 'signal_power': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Signal Power - not range corrected', 'units': 'dB'}, }, 'spectral_width': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Spectral width', 'units': 'm/s'}, }, 'noise_amplitude': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'noise_amplitude', 'units': 'dB'}, }, 'qc_variable': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'QC Value - not used', 'units': ''}, }, 'time': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Datetime', 'units': ''}}, 'range': {'dims': ['range'], 'data': [], 'attrs': {'long_name': 'Range', 'units': 'm'}}, 'reflectivity_uncalibrated': { 'dims': ['time', 'range'], 'data': [], 'attrs': {'long_name': 'Range', 'units': 'dB'}, }, } # Separate out the names as they will be accessed in different parts of the code h1_names = ['site', 'lat', 'lon', 'alt', 'freq'] h2_names = [ 'azimuth', 'elevation', 'beam_direction_code', 'year', 'day_of_year', 'hour', 'minute', 'second', ] h3_names = [ 'interpulse_period', 'pulse_width', 'first_range_gate', 'range_between_gates', 'n_gates', 'n_coherent_integration', 'n_averaged_spectra', 'n_points_spectrum', 'n_code_bits', ] names = { 'radial_velocity': 0, 'snr': 1, 'signal_power': 2, 'spectral_width': 3, 'noise_amplitude': 4, 'qc_variable': 5, } # If file is a string, convert to list for handling. if not isinstance(files, list): files = [files] # Run through each file and read the data in for f in files: # Read in the first line of the file which has site, lat, lon, etc... df = str(pd.read_table(f, nrows=0).columns[0]).split(' ') ctr = 0 for d in df: if len(d) > 0: if d == 'lat' or d == 'lon': data[h1_names[ctr]]['data'].append(float(d) / 100.0) else: data[h1_names[ctr]]['data'].append(d) ctr += 1 # Set counts and errors error = False error_ct = 0 ct = 0 # Loop through while there's no errors i.e. eof while error is False: try: # Read in the initial headers to get information used to parse data if ct == 0: df = str(pd.read_table(f, nrows=0, skiprows=[0]).columns[0]).split(' ') ctr = 0 for d in df: if len(d) > 0: data[h2_names[ctr]]['data'].append(d) ctr += 1 # Read in third row of header information df = str(pd.read_table(f, nrows=0, skiprows=[0, 1]).columns[0]).split(' ') ctr = 0 for d in df: if len(d) > 0: data[h3_names[ctr]]['data'].append(d) ctr += 1 # Read in the data based on number of gates df = pd.read_csv( f, skiprows=[0, 1, 2], nrows=int(data['n_gates']['data'][-1]) - 1, sep=r'\s+', names=list(names.keys()), ) index2 = 0 else: # Set indices for parsing data, reading 2 headers and then the columns of data index1 = ct * int(data['n_gates']['data'][-1]) index2 = index1 + int(data['n_gates']['data'][-1]) + 2 * ct + 4 df = str( pd.read_table(f, nrows=0, skiprows=list(range(index2 - 1))).columns[0] ).split(' ') ctr = 0 for d in df: if len(d) > 0: data[h2_names[ctr]]['data'].append(d) ctr += 1 df = str( pd.read_table(f, nrows=0, skiprows=list(range(index2))).columns[0] ).split(' ') ctr = 0 for d in df: if len(d) > 0: data[h3_names[ctr]]['data'].append(d) ctr += 1 df = pd.read_csv( f, skiprows=list(range(index2 + 1)), nrows=int(data['n_gates']['data'][-1]) - 1, sep=r'\s+', names=list(names.keys()), ) # Add data from the columns to the dictionary for n in names: data[n]['data'].append(df[n].to_list()) # Calculate the range based on number of gates, range to first gate and range between gates if len(data['range']['data']) == 0: ranges = float(data['first_range_gate']['data'][-1]) + np.array( range(int(data['n_gates']['data'][-1]) - 1) ) * float(data['range_between_gates']['data'][-1]) data['range']['data'] = ranges # Calculate a time time = dt.datetime( int('20' + data['year']['data'][-1]), 1, 1, int(data['hour']['data'][-1]), int(data['minute']['data'][-1]), int(data['second']['data'][-1]), ) + dt.timedelta(days=int(data['day_of_year']['data'][-1]) - 1) data['time']['data'].append(time) # Range correct the snr which converts it essentially to an uncalibrated reflectivity snr_rc = data['snr']['data'][-1] - 20.0 * np.log10(1.0 / (ranges / 1000.0) ** 2) data['reflectivity_uncalibrated']['data'].append(snr_rc) except Exception as e: # Handle errors, if end of file then continue on, if something else # try the next block of data but if it errors another time in this file move on if isinstance(e, pd.errors.EmptyDataError) or error_ct > 1: error = True else: print(e) pass error_ct += 1 ct += 1 # Convert dictionary to Dataset ds = xr.Dataset().from_dict(data) return ds