"""
Modules for reading in NOAA PSL data.
"""
import datetime as dt
import re
from datetime import datetime, timedelta
from itertools import groupby
from os import path as ospath
import fsspec
import numpy as np
import pandas as pd
import xarray as xr
import yaml
from .text import read_csv
[docs]def read_psl_wind_profiler(filepath, transpose=True):
"""
Returns two `xarray.Datasets` with stored data and metadata from a
user-defined NOAA PSL wind profiler file each containing
a different mode. This works for both 449 MHz and 915 MHz Weber
Wuertz and Weber Wuertz sub-hourly files.
Parameters
----------
filepath : str
Name of file(s) to read.
transpose : bool
True to transpose the data.
Return
------
mode_one_ds : xarray.Dataset
Standard Xarray dataset with the first mode data.
mode_two_ds : xarray.Dataset
Standard Xarray dataset with the second mode data.
"""
# Open the file, read in the lines as a list, and return that list
file = fsspec.open(filepath).open()
lines = file.readlines()
lines = [x.decode().rstrip()[:] for x in lines][1:]
# Separate sections based on the $ separator in the file
sections_of_file = (list(g) for _, g in groupby(lines, key='$'.__ne__))
# Count how many lines need to be skipped when reading into pandas
start_line = 0
list_of_datasets = []
for section in sections_of_file:
if section[0] != '$':
list_of_datasets.append(
_parse_psl_wind_lines(filepath, section, line_offset=start_line)
)
start_line += len(section)
# Return two datasets for each mode and the merge of datasets of the
# same mode.
mode_one_ds = xr.concat(list_of_datasets[0::2], dim='time')
mode_two_ds = xr.concat(list_of_datasets[1::2], dim='time')
if transpose:
mode_one_ds = mode_one_ds.transpose('HT', 'time')
mode_two_ds = mode_two_ds.transpose('HT', 'time')
return mode_one_ds, mode_two_ds
[docs]def read_psl_wind_profiler_temperature(filepath, transpose=True):
"""
Returns `xarray.Dataset` with stored data and metadata from a user-defined
NOAA PSL wind profiler temperature file.
Parameters
----------
filepath : str
Name of file(s) to read.
transpose : bool
True to transpose the data.
Return
------
ds : xarray.Dataset
Standard Xarray dataset with the data.
"""
# Open the file, read in the lines as a list, and return that list
file = fsspec.open(filepath).open()
lines = file.readlines()
lines = [x.decode().rstrip()[:] for x in lines][1:]
# Separate sections based on the $ separator in the file
sections_of_file = (list(g) for _, g in groupby(lines, key='$'.__ne__))
# Count how many lines need to be skipped when reading into pandas
start_line = 0
list_of_datasets = []
for section in sections_of_file:
if section[0] != '$':
list_of_datasets.append(
_parse_psl_temperature_lines(filepath, section, line_offset=start_line)
)
start_line += len(section)
# Merge the resultant datasets together
if transpose:
return xr.concat(list_of_datasets, dim='time').transpose('HT', 'time')
else:
return xr.concat(list_of_datasets, dim='time')
def _parse_psl_wind_lines(filepath, lines, line_offset=0):
"""
Reads lines related to wind in a psl file.
Parameters
----------
filepath : str
Name of file(s) to read.
lines : list
List of strings containing the lines to parse.
line_offset : int (default = 0)
Offset to start reading the pandas data table.
Returns
-------
ds : xarray.Dataset
Xarray dataset with wind data.
"""
# 1 - site
site = lines[0]
# 2 - datetype
datatype, _, version = filter_list(lines[1].split(' '))
# 3 - station lat, lon, elevation
latitude, longitude, elevation = filter_list(lines[2].split(' ')).astype(float)
# 4 - year, month, day, hour, minute, second, utc
time = parse_date_line(lines[3])
# 5 - Consensus averaging time, number of beams, number of range gates
consensus_average_time, number_of_beams, number_of_range_gates = filter_list(
lines[4].split(' ')
).astype(int)
# 7 - number of coherent integrations, number of spectral averages,
# pulse width, inner pulse period'
# Values duplicate as oblique and vertical values
(
number_coherent_integrations_obl,
number_coherent_integrations_vert,
number_spectral_averages_obl,
number_spectral_averages_vert,
pulse_width_obl,
pulse_width_vert,
inner_pulse_period_obl,
inner_pulse_period_vert,
) = filter_list(lines[6].split(' ')).astype(int)
# 8 - full-scale doppler value, delay to first gate, number of gates,
# spacing of gates. Values duplicate as oblique and vertical values.
(
full_scale_doppler_obl,
full_scale_doppler_vert,
beam_vertical_correction,
delay_first_gate_obl,
delay_first_gate_vert,
number_of_gates_obl,
number_of_gates_vert,
spacing_of_gates_obl,
spacing_of_gates_vert,
) = filter_list(lines[7].split(' ')).astype(float)
# 9 - beam azimuth (degrees clockwise from north)
(
beam_azimuth1,
beam_elevation1,
beam_azimuth2,
beam_elevation2,
beam_azimuth3,
beam_elevation3,
) = filter_list(lines[8].split(' ')).astype(float)
beam_azimuth = np.array([beam_azimuth1, beam_azimuth2, beam_azimuth3], dtype='float32')
beam_elevation = np.array([beam_elevation1, beam_elevation2, beam_elevation3], dtype='float32')
# Read in the data table section using pandas
df = pd.read_csv(filepath, skiprows=line_offset + 10, sep=r'\s+')
# Only read in the number of rows for a given set of gates
df = df.iloc[: int(number_of_range_gates)]
# Grab a list of valid columns, except time
columns = set(list(df.columns)) - {'time'}
# Set the data types to be floats
df = df[list(columns)].astype(float)
# Nan values are encoded as 999999 - let's reflect that
df = df.replace(999999.0, np.nan)
# Ensure the height array is stored as a float
df['HT'] = df.HT.astype(float)
# Set the height as an index
df = df.set_index('HT')
# Rename the count and snr columns more usefully
df = df.rename(
columns={
'RAD': 'RAD1',
'RAD.1': 'RAD2',
'RAD.2': 'RAD3',
'CNT': 'CNT1',
'CNT.1': 'CNT2',
'CNT.2': 'CNT3',
'SNR': 'SNR1',
'SNR.1': 'SNR2',
'SNR.2': 'SNR3',
'QC': 'QC1',
'QC.1': 'QC2',
'QC.2': 'QC3',
}
)
# Convert to an xaray dataset
ds = df.to_xarray()
# Add attributes to variables
# Height
ds['HT'].attrs['long_name'] = 'height_above_ground'
ds['HT'].attrs['units'] = 'km'
# Add time to our dataset
ds['time'] = time
# Add in our additional attributes
ds.attrs['site_identifier'] = site.strip()
ds.attrs['data_type'] = datatype
ds.attrs['latitude'] = latitude
ds.attrs['longitude'] = longitude
ds.attrs['elevation'] = elevation
ds.attrs['beam_elevation'] = beam_elevation
ds.attrs['beam_azimuth'] = beam_azimuth
ds.attrs['revision_number'] = version
ds.attrs[
'data_description'
] = 'https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5855&OperationalID=2371'
ds.attrs['consensus_average_time'] = consensus_average_time
ds.attrs['oblique-beam_vertical_correction'] = int(beam_vertical_correction)
ds.attrs['number_of_beams'] = int(number_of_beams)
ds.attrs['number_of_range_gates'] = int(number_of_range_gates)
# Handle oblique and vertical attributes.
ds.attrs['number_of_gates_oblique'] = int(number_of_gates_obl)
ds.attrs['number_of_gates_vertical'] = int(number_of_gates_vert)
ds.attrs['number_spectral_averages_oblique'] = int(number_spectral_averages_obl)
ds.attrs['number_spectral_averages_vertical'] = int(number_spectral_averages_vert)
ds.attrs['pulse_width_oblique'] = int(pulse_width_obl)
ds.attrs['pulse_width_vertical'] = int(pulse_width_vert)
ds.attrs['inner_pulse_period_oblique'] = int(inner_pulse_period_obl)
ds.attrs['inner_pulse_period_vertical'] = int(inner_pulse_period_vert)
ds.attrs['full_scale_doppler_value_oblique'] = float(full_scale_doppler_obl)
ds.attrs['full_scale_doppler_value_vertical'] = float(full_scale_doppler_vert)
ds.attrs['delay_to_first_gate_oblique'] = int(delay_first_gate_obl)
ds.attrs['delay_to_first_gate_vertical'] = int(delay_first_gate_vert)
ds.attrs['spacing_of_gates_oblique'] = int(spacing_of_gates_obl)
ds.attrs['spacing_of_gates_vertical'] = int(spacing_of_gates_vert)
return ds
def _parse_psl_temperature_lines(filepath, lines, line_offset=0):
"""
Reads lines related to temperature in a psl file.
Parameters
----------
filepath : str
Name of file(s) to read.
lines : list
List of strings containing the lines to parse.
line_offset : int (default = 0)
Offset to start reading the pandas data table.
Returns
-------
ds : xarray.Dataset
Xarray dataset with temperature data.
"""
# 1 - site
site = lines[0]
# 2 - datetype
datatype, _, version = filter_list(lines[1].split(' '))
# 3 - station lat, lon, elevation
latitude, longitude, elevation = filter_list(lines[2].split(' ')).astype(float)
# 4 - year, month, day, hour, minute, second, utc
time = parse_date_line(lines[3])
# 5 - Consensus averaging time, number of beams, number of range gates.
consensus_average_time, number_of_beams, number_of_range_gates = filter_list(
lines[4].split(' ')
).astype(int)
# 7 - number of coherent integrations, number of spectral averages,
# pulse width, inner pulse period.
(
number_coherent_integrations,
number_spectral_averages,
pulse_width,
inner_pulse_period,
) = filter_list(lines[6].split(' ')).astype(int)
# 8 - full-scale doppler value, delay to first gate, number of gates,
# spacing of gates.
full_scale_doppler, delay_first_gate, number_of_gates, spacing_of_gates = filter_list(
lines[7].split(' ')
).astype(float)
# 9 - beam azimuth (degrees clockwise from north)
beam_azimuth, beam_elevation = filter_list(lines[8].split(' ')).astype(float)
# Read in the data table section using pandas
df = pd.read_csv(filepath, skiprows=line_offset + 10, sep=r'\s+')
# Only read in the number of rows for a given set of gates
df = df.iloc[: int(number_of_gates)]
# Grab a list of valid columns, except time
columns = set(list(df.columns)) - {'time'}
# Set the data types to be floats
df = df[list(columns)].astype(float)
# Nan values are encoded as 999999 - let's reflect that
df = df.replace(999999.0, np.nan)
# Ensure the height array is stored as a float
df['HT'] = df.HT.astype(float)
# Set the height as an index
df = df.set_index('HT')
# Rename the count and snr columns more usefully
df = df.rename(
columns={
'CNT': 'CNT_T',
'CNT.1': 'CNT_Tc',
'CNT.2': 'CNT_W',
'SNR': 'SNR_T',
'SNR.1': 'SNR_Tc',
'SNR.2': 'SNR_W',
}
)
# Convert to an xaray dataset
ds = df.to_xarray()
# Add attributes to variables
# Height
ds['HT'].attrs['long_name'] = 'height_above_ground'
ds['HT'].attrs['units'] = 'km'
# Temperature
ds['T'].attrs['long_name'] = 'average_uncorrected_RASS_temperature'
ds['T'].attrs['units'] = 'degC'
ds['Tc'].attrs['long_name'] = 'average_corrected_RASS_temperature'
ds['Tc'].attrs['units'] = 'degC'
# Vertical motion (w)
ds['W'].attrs['long_name'] = 'average_vertical_wind'
ds['W'].attrs['units'] = 'm/s'
# Add time to our dataset
ds['time'] = time
# Add in our additional attributes
ds.attrs['site_identifier'] = site.strip()
ds.attrs['data_type'] = datatype
ds.attrs['latitude'] = latitude
ds.attrs['longitude'] = longitude
ds.attrs['elevation'] = elevation
ds.attrs['beam_elevation'] = beam_elevation
ds.attrs['beam_azimuth'] = beam_azimuth
ds.attrs['revision_number'] = version
ds.attrs[
'data_description'
] = 'https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5855&OperationalID=2371'
ds.attrs['consensus_average_time'] = consensus_average_time
ds.attrs['number_of_beams'] = int(number_of_beams)
ds.attrs['number_of_gates'] = int(number_of_gates)
ds.attrs['number_of_range_gates'] = int(number_of_range_gates)
ds.attrs['number_spectral_averages'] = int(number_spectral_averages)
ds.attrs['pulse_width'] = pulse_width
ds.attrs['inner_pulse_period'] = inner_pulse_period
ds.attrs['full_scale_doppler_value'] = full_scale_doppler
ds.attrs['spacing_of_gates'] = spacing_of_gates
return ds
def filter_list(list_of_strings):
"""
Parses a list of strings, remove empty strings, and return a numpy array
"""
return np.array(list(filter(None, list_of_strings)))
def parse_date_line(list_of_strings):
"""
Parses the date line in PSL files
"""
year, month, day, hour, minute, second, utc_offset = filter_list(
list_of_strings.split(' ')
).astype(int)
year += 2000
return datetime(year, month, day, hour, minute, second)
[docs]def read_psl_surface_met(filenames, conf_file=None):
"""
Returns `xarray.Dataset` with stored data and metadata from a user-defined
NOAA PSL SurfaceMet file.
Parameters
----------
filenames : str, list of str
Name of file(s) to read.
conf_file : str or Pathlib.path
Default to ./conf/noaapsl_SurfaceMet.yaml
Filename containing relative or full path to configuration
YAML file used to describe the file format for each PSL site.
If the site is not defined in the default file, the default file
can be copied to a local location, and the missing site added.
Then point to that updated configuration file. An issue can be
opened on GitHub to request the missing site to the configuration
file.
Return
------
ds : xarray.Dataset
Standard Xarray dataset with the data
"""
if isinstance(filenames, str):
site = ospath.basename(filenames)[:3]
else:
site = ospath.basename(filenames[0])[:3]
if conf_file is None:
conf_file = ospath.join(ospath.dirname(__file__), 'conf', 'noaapsl_SurfaceMet.yaml')
# Read configuration YAML file
with open(conf_file) as fp:
try:
result = yaml.load(fp, Loader=yaml.FullLoader)
except AttributeError:
result = yaml.load(fp)
# Extract dictionary of just corresponding site
try:
result = result[site]
except KeyError:
raise RuntimeError(
f"Configuration for site '{site}' currently not available. "
'You can manually add the site configuration to a copy of '
'noaapsl_SurfaceMet.yaml and set conf_file= name of copied file '
'until the site is added.'
)
# Extract date and time from filename to use in extracting format from YAML file.
search_result = re.match(r'[a-z]{3}(\d{2})(\d{3})\.(\d{2})m', ospath.basename(filenames[0]))
yy, doy, hh = search_result.groups()
if yy > '70':
yy = f'19{yy}'
else:
yy = f'20{yy}'
# Extract location information from configuration file.
try:
location_info = result['info']
except KeyError:
location_info = None
# Loop through each date range for the site to extract the correct file format from conf file.
file_datetime = (
datetime.strptime(f'{yy}-01-01', '%Y-%m-%d')
+ timedelta(int(doy) - 1)
+ timedelta(hours=int(hh))
)
for ii in result.keys():
if ii == 'info':
continue
date_range = [
datetime.strptime(jj, '%Y-%m-%d %H:%M:%S') for jj in result[ii]['_date_range']
]
if file_datetime >= date_range[0] and file_datetime <= date_range[1]:
result = result[ii]
del result['_date_range']
break
# Read data files by passing in column names from configuration file.
ds = read_csv(filenames, column_names=list(result.keys()))
# Calculate numpy datetime64 values from first 4 columns of the data file.
time = np.array(ds['Year'].values - 1970, dtype='datetime64[Y]')
day = np.array(np.array(ds['J_day'].values - 1, dtype='timedelta64[D]'))
hourmin = ds['HoursMinutes'].values + 10000
hour = [int(str(ii)[1:3]) for ii in hourmin]
hour = np.array(hour, dtype='timedelta64[h]')
minute = [int(str(ii)[3:]) for ii in hourmin]
minute = np.array(minute, dtype='timedelta64[m]')
time = time + day + hour + minute
time = time.astype('datetime64[ns]')
# Update Dataset to use "time" coordinate and assigned calculated times
ds = ds.assign_coords(index=time)
ds = ds.rename(index='time')
# Loop through configuraton dictionary and apply attributes or
# perform action for specific attributes.
for var_name in result:
for key, value in result[var_name].items():
if key == '_delete' and value is True:
del ds[var_name]
continue
if key == '_type':
dtype = result[var_name][key]
ds[var_name] = ds[var_name].astype(dtype)
continue
if key == '_missing_value':
data_values = ds[var_name].values
data_values[data_values == result[var_name][key]] = np.nan
ds[var_name].values = data_values
continue
ds[var_name].attrs[key] = value
# Add location information to Dataset
if location_info is not None:
ds.attrs['location_description'] = location_info['name']
for var_name in ['lat', 'lon', 'alt']:
value = location_info[var_name]['value']
del location_info[var_name]['value']
ds[var_name] = xr.DataArray(data=value, attrs=location_info[var_name])
return ds
[docs]def read_psl_parsivel(files):
"""
Returns `xarray.Dataset` with stored data and metadata from a user-defined
NOAA PSL parsivel
Parameters
----------
files : str or list
Name of file(s) or urls to read.
Return
------
ds : xarray.Dataset
Standard Xarray dataset with the data for the parsivel
"""
# Define the names for the variables
names = [
'time',
'B1',
'B2',
'B3',
'B4',
'B5',
'B6',
'B7',
'B8',
'B9',
'B10',
'B11',
'B12',
'B13',
'B14',
'B15',
'B16',
'B17',
'B18',
'B19',
'B20',
'B21',
'B22',
'B23',
'B24',
'B25',
'B26',
'B27',
'B28',
'B29',
'B30',
'B31',
'B32',
'blackout',
'good',
'bad',
'number_detected_particles',
'precip_rate',
'precip_amount',
'precip_accumulation',
'equivalent_radar_reflectivity',
'number_in_error',
'dirty',
'very_dirty',
'damaged',
'laserband_amplitude',
'laserband_amplitude_stdev',
'sensor_temperature',
'sensor_temperature_stdev',
'sensor_voltage',
'sensor_voltage_stdev',
'heating_current',
'heating_current_stdev',
'number_rain_particles',
'number_non_rain_particles',
'number_ambiguous_particles',
'precip_type',
]
# Define the particle sizes and class width sizes based on
# https://psl.noaa.gov/data/obs/data/view_data_type_info.php?SiteID=ctd&DataOperationalID=5890
vol_equiv_diam = [
0.062,
0.187,
0.312,
0.437,
0.562,
0.687,
0.812,
0.937,
1.062,
1.187,
1.375,
1.625,
1.875,
2.125,
2.375,
2.75,
3.25,
3.75,
4.25,
4.75,
5.5,
6.5,
7.5,
8.5,
9.5,
11.0,
13.0,
15.0,
17.0,
19.0,
21.5,
24.5,
]
class_size_width = [
0.125,
0.125,
0.125,
0.125,
0.125,
0.125,
0.125,
0.125,
0.125,
0.125,
0.250,
0.250,
0.250,
0.250,
0.250,
0.5,
0.5,
0.5,
0.5,
0.5,
1.0,
1.0,
1.0,
1.0,
1.0,
2.0,
2.0,
2.0,
2.0,
2.0,
3.0,
3.0,
]
if not isinstance(files, list):
files = [files]
# Loop through each file or url and append the dataframe into data for concatenations
data = []
end_time = []
for f in files:
df = pd.read_table(f, skiprows=[0, 1, 2], names=names, index_col=0, sep=r'\s+')
# Reading the table twice to get the date so it can be parsed appropriately
date = pd.read_table(f, nrows=0).to_string().split(' ')[-3]
time = df.index
start_time = []
form = '%y%j%H:%M:%S:%f'
for t in time:
start_time.append(pd.to_datetime(date + ':' + t.split('-')[0], format=form))
end_time.append(pd.to_datetime(date + ':' + t.split('-')[1], format=form))
df.index = start_time
data.append(df)
df = pd.concat(data)
# Create a 2D size distribution variable from all the B* variables
dsd = []
for n in names:
if 'B' not in n:
continue
dsd.append(list(df[n]))
# Convert the dataframe to xarray DataSet and add variables
ds = df.to_xarray()
ds = ds.rename({'index': 'time'})
long_name = 'Drop Size Distribution'
attrs = {'long_name': long_name, 'units': 'count'}
da = xr.DataArray(
np.transpose(dsd),
dims=['time', 'particle_size'],
coords=[ds['time'].values, vol_equiv_diam],
)
ds['number_density_drops'] = da
attrs = {'long_name': 'Particle class size average', 'units': 'mm'}
da = xr.DataArray(
class_size_width, dims=['particle_size'], coords=[vol_equiv_diam], attrs=attrs
)
ds['class_size_width'] = da
attrs = {'long_name': 'Class size width', 'units': 'mm'}
da = xr.DataArray(vol_equiv_diam, dims=['particle_size'], coords=[vol_equiv_diam], attrs=attrs)
ds['particle_size'] = da
attrs = {'long_name': 'End time of averaging interval'}
da = xr.DataArray(end_time, dims=['time'], coords=[ds['time'].values], attrs=attrs)
ds['interval_end_time'] = da
# Define the attribuets and metadata and add into the DataSet
attrs = {
'blackout': {
'long_name': 'Number of samples excluded during PC clock sync',
'units': 'count',
},
'good': {'long_name': 'Number of samples that passed QC checks', 'units': 'count'},
'bad': {'long_name': 'Number of samples that failed QC checks', 'units': 'count'},
'number_detected_particles': {
'long_name': 'Total number of detected particles',
'units': 'count',
},
'precip_rate': {'long_name': 'Precipitation rate', 'units': 'mm/hr'},
'precip_amount': {'long_name': 'Interval accumulation', 'units': 'mm'},
'precip_accumulation': {'long_name': 'Event accumulation', 'units': 'mm'},
'equivalent_radar_reflectivity': {'long_name': 'Radar Reflectivity', 'units': 'dB'},
'number_in_error': {
'long_name': 'Number of samples that were reported dirt, very dirty, or damaged',
'units': 'count',
},
'dirty': {
'long_name': 'Laser glass is dirty but measurement is still possible',
'units': 'unitless',
},
'very_dirty': {
'long_name': 'Laser glass is dirty, partially covered no further measurements are possible',
'units': 'unitless',
},
'damaged': {'long_name': 'Laser damaged', 'units': 'unitless'},
'laserband_amplitude': {
'long_name': 'Average signal amplitude of the laser strip',
'units': 'unitless',
},
'laserband_amplitude_stdev': {
'long_name': 'Standard deviation of the signal amplitude of the laser strip',
'units': 'unitless',
},
'sensor_temperature': {'long_name': 'Average sensor temperature', 'units': 'degC'},
'sensor_temperature_stdev': {
'long_name': 'Standard deviation of sensor temperature',
'units': 'degC',
},
'sensor_voltage': {'long_name': 'Sensor power supply voltage', 'units': 'V'},
'sensor_voltage_stdev': {
'long_name': 'Standard deviation of the sensor power supply voltage',
'units': 'V',
},
'heating_current': {'long_name': 'Average heating system current', 'units': 'A'},
'heating_current_stdev': {
'long_name': 'Standard deviation of heating system current',
'units': 'A',
},
'number_rain_particles': {
'long_name': 'Number of particles detected as rain',
'units': 'unitless',
},
'number_non_rain_particles': {
'long_name': 'Number of particles detected not as rain',
'units': 'unitless',
},
'number_ambiguous_particles': {
'long_name': 'Number of particles detected as ambiguous',
'units': 'unitless',
},
'precip_type': {
'long_name': 'Precipitation type (1=rain; 2=mixed; 3=snow)',
'units': 'unitless',
},
'number_density_drops': {'long_name': 'Drop Size Distribution', 'units': 'count'},
}
for v in ds:
if v in attrs:
ds[v].attrs = attrs[v]
return ds
[docs]def read_psl_radar_fmcw_moment(files):
"""
Returns `xarray.Dataset` with stored data and metadata from
NOAA PSL FMCW Radar files. See References section for details.
Parameters
----------
files : str or list
Name of file(s) to read. Currently does not support reading URLs but files can
be downloaded easily using the act.discovery.download_noaa_psl_data function.
Return
------
ds : xarray.Dataset
Standard Xarray dataset with the data for the parsivel
References
----------
Johnston, Paul E., James R. Jordan, Allen B. White, David A. Carter, David M. Costa, and Thomas E. Ayers.
"The NOAA FM-CW snow-level radar." Journal of Atmospheric and Oceanic Technology 34, no. 2 (2017): 249-267.
"""
ds = _parse_psl_radar_moments(files)
return ds
def read_psl_radar_sband_moment(files):
"""
Returns `xarray.Dataset` with stored data and metadata from
NOAA PSL S-band Radar files.
Parameters
----------
files : str or list
Name of file(s) to read. Currently does not support reading URLs but files can
be downloaded easily using the act.discovery.download_noaa_psl_data function.
Return
------
ds : xarray.Dataset
Standard Xarray dataset with the data for the parsivel
"""
ds = _parse_psl_radar_moments(files)
return ds
def _parse_psl_radar_moments(files):
"""
Returns `xarray.Dataset` with stored data and metadata from
NOAA PSL FMCW and S-Band Radar files.
Parameters
----------
files : str or list
Name of file(s) to read. Currently does not support reading URLs but files can
be downloaded easily using the act.discovery.download_noaa_psl_data function.
Return
------
ds : xarray.Dataset
Standard Xarray dataset with the data for the parsivel
"""
# Set the initial dictionary to convert to xarray dataset
data = {
'site': {'dims': ['file'], 'data': [], 'attrs': {'long_name': 'NOAA site code'}},
'lat': {
'dims': ['file'],
'data': [],
'attrs': {'long_name': 'North Latitude', 'units': 'degree_N'},
},
'lon': {
'dims': ['file'],
'data': [],
'attrs': {'long_name': 'East Longitude', 'units': 'degree_E'},
},
'alt': {
'dims': ['file'],
'data': [],
'attrs': {'long_name': 'Altitude above mean sea level', 'units': 'm'},
},
'freq': {
'dims': ['file'],
'data': [],
'attrs': {'long_name': 'Operating Frequency; Ignore for FMCW', 'units': 'Hz'},
},
'azimuth': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Azimuth angle', 'units': 'deg'},
},
'elevation': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Elevation angle', 'units': 'deg'},
},
'beam_direction_code': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Beam direction code', 'units': ''},
},
'year': {'dims': ['time'], 'data': [], 'attrs': {'long_name': '2-digit year', 'units': ''}},
'day_of_year': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Day of the year', 'units': ''},
},
'hour': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Hour of the day', 'units': ''},
},
'minute': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Minutes', 'units': ''}},
'second': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Seconds', 'units': ''}},
'interpulse_period': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Interpulse Period', 'units': 'ms'},
},
'pulse_width': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Pulse width', 'units': 'ns'},
},
'first_range_gate': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Range to first range gate', 'units': 'm'},
},
'range_between_gates': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Distance between range gates', 'units': 'm'},
},
'n_gates': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Number of range gates', 'units': 'count'},
},
'n_coherent_integration': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Number of cohrent integration', 'units': 'count'},
},
'n_averaged_spectra': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Number of average spectra', 'units': 'count'},
},
'n_points_spectrum': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Number of points in spectra', 'units': 'count'},
},
'n_code_bits': {
'dims': ['time'],
'data': [],
'attrs': {'long_name': 'Number of code bits', 'units': 'count'},
},
'radial_velocity': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'Radial velocity', 'units': 'm/s'},
},
'snr': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'Signal-to-noise ratio - not range corrected', 'units': 'dB'},
},
'signal_power': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'Signal Power - not range corrected', 'units': 'dB'},
},
'spectral_width': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'Spectral width', 'units': 'm/s'},
},
'noise_amplitude': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'noise_amplitude', 'units': 'dB'},
},
'qc_variable': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'QC Value - not used', 'units': ''},
},
'time': {'dims': ['time'], 'data': [], 'attrs': {'long_name': 'Datetime', 'units': ''}},
'range': {'dims': ['range'], 'data': [], 'attrs': {'long_name': 'Range', 'units': 'm'}},
'reflectivity_uncalibrated': {
'dims': ['time', 'range'],
'data': [],
'attrs': {'long_name': 'Range', 'units': 'dB'},
},
}
# Separate out the names as they will be accessed in different parts of the code
h1_names = ['site', 'lat', 'lon', 'alt', 'freq']
h2_names = [
'azimuth',
'elevation',
'beam_direction_code',
'year',
'day_of_year',
'hour',
'minute',
'second',
]
h3_names = [
'interpulse_period',
'pulse_width',
'first_range_gate',
'range_between_gates',
'n_gates',
'n_coherent_integration',
'n_averaged_spectra',
'n_points_spectrum',
'n_code_bits',
]
names = {
'radial_velocity': 0,
'snr': 1,
'signal_power': 2,
'spectral_width': 3,
'noise_amplitude': 4,
'qc_variable': 5,
}
# If file is a string, convert to list for handling.
if not isinstance(files, list):
files = [files]
# Run through each file and read the data in
for f in files:
# Read in the first line of the file which has site, lat, lon, etc...
df = str(pd.read_table(f, nrows=0).columns[0]).split(' ')
ctr = 0
for d in df:
if len(d) > 0:
if d == 'lat' or d == 'lon':
data[h1_names[ctr]]['data'].append(float(d) / 100.0)
else:
data[h1_names[ctr]]['data'].append(d)
ctr += 1
# Set counts and errors
error = False
error_ct = 0
ct = 0
# Loop through while there's no errors i.e. eof
while error is False:
try:
# Read in the initial headers to get information used to parse data
if ct == 0:
df = str(pd.read_table(f, nrows=0, skiprows=[0]).columns[0]).split(' ')
ctr = 0
for d in df:
if len(d) > 0:
data[h2_names[ctr]]['data'].append(d)
ctr += 1
# Read in third row of header information
df = str(pd.read_table(f, nrows=0, skiprows=[0, 1]).columns[0]).split(' ')
ctr = 0
for d in df:
if len(d) > 0:
data[h3_names[ctr]]['data'].append(d)
ctr += 1
# Read in the data based on number of gates
df = pd.read_csv(
f,
skiprows=[0, 1, 2],
nrows=int(data['n_gates']['data'][-1]) - 1,
sep=r'\s+',
names=list(names.keys()),
)
index2 = 0
else:
# Set indices for parsing data, reading 2 headers and then the columns of data
index1 = ct * int(data['n_gates']['data'][-1])
index2 = index1 + int(data['n_gates']['data'][-1]) + 2 * ct + 4
df = str(
pd.read_table(f, nrows=0, skiprows=list(range(index2 - 1))).columns[0]
).split(' ')
ctr = 0
for d in df:
if len(d) > 0:
data[h2_names[ctr]]['data'].append(d)
ctr += 1
df = str(
pd.read_table(f, nrows=0, skiprows=list(range(index2))).columns[0]
).split(' ')
ctr = 0
for d in df:
if len(d) > 0:
data[h3_names[ctr]]['data'].append(d)
ctr += 1
df = pd.read_csv(
f,
skiprows=list(range(index2 + 1)),
nrows=int(data['n_gates']['data'][-1]) - 1,
sep=r'\s+',
names=list(names.keys()),
)
# Add data from the columns to the dictionary
for n in names:
data[n]['data'].append(df[n].to_list())
# Calculate the range based on number of gates, range to first gate and range between gates
if len(data['range']['data']) == 0:
ranges = float(data['first_range_gate']['data'][-1]) + np.array(
range(int(data['n_gates']['data'][-1]) - 1)
) * float(data['range_between_gates']['data'][-1])
data['range']['data'] = ranges
# Calculate a time
time = dt.datetime(
int('20' + data['year']['data'][-1]),
1,
1,
int(data['hour']['data'][-1]),
int(data['minute']['data'][-1]),
int(data['second']['data'][-1]),
) + dt.timedelta(days=int(data['day_of_year']['data'][-1]) - 1)
data['time']['data'].append(time)
# Range correct the snr which converts it essentially to an uncalibrated reflectivity
snr_rc = data['snr']['data'][-1] - 20.0 * np.log10(1.0 / (ranges / 1000.0) ** 2)
data['reflectivity_uncalibrated']['data'].append(snr_rc)
except Exception as e:
# Handle errors, if end of file then continue on, if something else
# try the next block of data but if it errors another time in this file move on
if isinstance(e, pd.errors.EmptyDataError) or error_ct > 1:
error = True
else:
print(e)
pass
error_ct += 1
ct += 1
# Convert dictionary to Dataset
ds = xr.Dataset().from_dict(data)
return ds