"""
Modules for reading in NOAA GML data
"""
import re
from datetime import datetime
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
from .text import read_csv
[docs]def read_gml(filename, datatype=None, remove_time_vars=True, convert_missing=True, **kwargs):
"""
Function to call or guess what reading NOAA GML daga routine to use. It
tries to guess the correct reading function to call based on filename.
It mostly works, but you may want to specify for best results.
Parameters
----------
filename : str or pathlib.Path
Data file full path name. In theory it should work with a list of
filenames but it is not working as well with that as expected.
datatype : str
Data file type that bypasses the guessing from filename format
and goes directly to the reading routine. Options include
[MET, RADIATION, OZONE, CO2, HALO]
remove_time_vars : bool
Some variables are convereted into coordinate variables in Xarray
DataSet and not needed after conversion. This will remove those
variables.
convert_missing : bool
Convert missing value indicator in CSV to NaN in Xarray DataSet.
**kwargs : keywords
Keywords to pass through to instrument specific reading routine.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
"""
if datatype is not None:
if datatype.upper() == 'MET':
return read_gml_met(filename, convert_missing=convert_missing, **kwargs)
elif datatype.upper() == 'RADIATION':
return read_gml_radiation(
filename,
remove_time_vars=remove_time_vars,
convert_missing=convert_missing,
**kwargs,
)
elif datatype.upper() == 'OZONE':
return read_gml_ozone(filename, **kwargs)
elif datatype.upper() == 'CO2':
return read_gml_co2(filename, convert_missing=convert_missing, **kwargs)
elif datatype.upper() == 'HALO':
return read_gml_halo(filename, **kwargs)
else:
raise ValueError('datatype is unknown')
else:
test_filename = filename
if isinstance(test_filename, (list, tuple)):
test_filename = filename[0]
test_filename = str(Path(test_filename).name)
if test_filename.startswith('met_') and test_filename.endswith('.txt'):
return read_gml_met(filename, convert_missing=convert_missing, **kwargs)
if test_filename.startswith('co2_') and test_filename.endswith('.txt'):
return read_gml_co2(filename, convert_missing=convert_missing, **kwargs)
result = re.match(r'([a-z]{3})([\d]{5}).dat', test_filename)
if result is not None:
return read_gml_radiation(
filename,
remove_time_vars=remove_time_vars,
convert_missing=convert_missing,
**kwargs,
)
ozone_pattern = [
r'[a-z]{3}_[\d]{4}_[\d]{2}_hour.dat',
r'[a-z]{3}_[\d]{2}_[\d]{4}_hour.dat',
r'[a-z]{3}_[\d]{4}_all_minute.dat',
r'[a-z]{3}_[\d]{2}_[\d]{4}_5minute.dat',
r'[a-z]{3}_[\d]{2}_[\d]{4}_min.dat',
r'[a-z]{3}_o3_6m_hour_[\d]{2}_[\d]{4}.dat',
r'[a-z]{3}_ozone_houry__[\d]{4}',
]
for pattern in ozone_pattern:
result = re.match(pattern, test_filename)
if result is not None:
return read_gml_ozone(filename, **kwargs)
ozone_pattern = [
r'[a-z]{3}_CCl4_Day.dat',
r'[a-z]{3}_CCl4_All.dat',
r'[a-z]{3}_CCl4_MM.dat',
r'[a-z]{3}_MC_MM.dat',
]
for pattern in ozone_pattern:
result = re.match(pattern, test_filename)
if result is not None:
return read_gml_halo(filename, **kwargs)
[docs]def read_gml_halo(filename, **kwargs):
"""
Function to read Halocarbon data from NOAA GML.
Parameters
----------
filename : str or pathlib.Path
Data file full path name.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
**kwargs : keywords
Keywords to pass through to ACT read_csv() routine.
"""
ds = None
if filename is None:
return ds
variables = {
'CCl4catsBRWm': {
'long_name': 'Carbon Tetrachloride (CCl4) daily median',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'CCl4',
},
'CCl4catsBRWmsd': {
'long_name': 'Carbon Tetrachloride (CCl4) standard deviation',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'CCl4_std_dev',
},
'CCl4catsBRWsd': {
'long_name': 'Carbon Tetrachloride (CCl4) standard deviation',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'CCl4_std_dev',
},
'CCl4catsBRWn': {
'long_name': 'Number of samples',
'units': 'count',
'__type': np.int16,
'__rename': 'number_of_samples',
},
'CCl4catsBRWunc': {
'long_name': 'Carbon Tetrachloride (CCl4) uncertainty',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'CCl4_uncertainty',
},
'MCcatsBRWm': {
'long_name': 'Methyl Chloroform (CH3CCl3)',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'methyl_chloroform',
},
'MCcatsBRWunc': {
'long_name': 'Methyl Chloroform (CH3CCl3) uncertainty',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'methyl_chloroform_uncertainty',
},
'MCcatsBRWsd': {
'long_name': 'Methyl Chloroform (CH3CCl3) standard deviation',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'methyl_chloroform_std_dev',
},
'MCcatsBRWmsd': {
'long_name': 'Methyl Chloroform (CH3CCl3) standard deviation',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'methyl_chloroform_std_dev',
},
'MCcatsBRWn': {
'long_name': 'Number of samples',
'units': 'count',
'__type': np.int16,
'__rename': 'number_of_samples',
},
'MCritsBRWm': {
'long_name': 'Methyl Chloroform (CH3CCl3)',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'methyl_chloroform',
},
'MCritsBRWsd': {
'long_name': 'Methyl Chloroform (CH3CCl3) standard deviation',
'units': 'ppt',
'_FillValue': np.nan,
'__type': np.float32,
'__rename': 'methyl_chloroform_std_dev',
},
'MCritsBRWn': {
'long_name': 'Number of samples',
'units': 'count',
'__type': np.int16,
'__rename': 'number_of_samples',
},
}
test_filename = filename
if isinstance(test_filename, (list, tuple)):
test_filename = test_filename[0]
with open(test_filename) as fc:
header = 0
while True:
line = fc.readline().strip()
if not line.startswith('#'):
break
header += 1
ds = read_csv(
filename, sep=r'\s+', header=header, na_values=['Nan', 'NaN', 'nan', 'NAN'], **kwargs
)
var_names = list(ds.data_vars)
year_name, month_name, day_name, hour_name, min_name = None, None, None, None, None
for var_name in var_names:
if var_name.endswith('yr'):
year_name = var_name
elif var_name.endswith('mon'):
month_name = var_name
elif var_name.endswith('day'):
day_name = var_name
elif var_name.endswith('hour'):
hour_name = var_name
elif var_name.endswith('min'):
min_name = var_name
timestamp = np.full(ds[var_names[0]].size, np.nan, dtype='datetime64[ns]')
for ii in range(0, len(timestamp)):
if min_name is not None:
ts = datetime(
ds[year_name].values[ii],
ds[month_name].values[ii],
ds[day_name].values[ii],
ds[hour_name].values[ii],
ds[min_name].values[ii],
)
elif hour_name is not None:
ts = datetime(
ds[year_name].values[ii],
ds[month_name].values[ii],
ds[day_name].values[ii],
ds[hour_name].values[ii],
)
elif day_name is not None:
ts = datetime(
ds[year_name].values[ii],
ds[month_name].values[ii],
ds[day_name].values[ii],
)
else:
ts = datetime(ds[year_name].values[ii], ds[month_name].values[ii], 1)
timestamp[ii] = np.datetime64(ts, 'ns')
for var_name in [year_name, month_name, day_name, hour_name, min_name]:
try:
del ds[var_name]
except KeyError:
pass
ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
ds['time'].attrs['long_name'] = 'Time'
for var_name, value in variables.items():
if var_name not in var_names:
continue
for att_name, att_value in value.items():
if att_name == '__type':
values = ds[var_name].values
values = values.astype(att_value)
ds[var_name].values = values
elif att_name == '__rename':
ds = ds.rename({var_name: att_value})
else:
ds[var_name].attrs[att_name] = att_value
return ds
[docs]def read_gml_co2(filename=None, convert_missing=True, **kwargs):
"""
Function to read carbon dioxide data from NOAA GML.
Parameters
----------
filename : str or pathlib.Path
Data file full path name.
convert_missing : boolean
Option to convert missing values to NaN. If turned off will
set variable attribute to missing value expected. This works well
to preserve the data type best for writing to a netCDF file.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
**kwargs : keywords
Keywords to pass through to ACT read_csv() routine.
"""
ds = None
if filename is None:
return ds
variables = {
'site_code': None,
'year': None,
'month': None,
'day': None,
'hour': None,
'minute': None,
'second': None,
'time_decimal': None,
'value': {
'long_name': 'Carbon monoxide in dry air',
'units': 'ppm',
'_FillValue': -999.99,
'comment': (
'Mole fraction reported in units of micromol mol-1 '
'(10-6 mol per mol of dry air); abbreviated as ppm (parts per million).'
),
'__type': np.float32,
'__rename': 'co2',
},
'value_std_dev': {
'long_name': 'Carbon monoxide in dry air',
'units': 'ppm',
'_FillValue': -99.99,
'comment': (
'This is the standard deviation of the reported mean value '
'when nvalue is greater than 1. See provider_comment if available.'
),
'__type': np.float32,
'__rename': 'co2_std_dev',
},
'nvalue': {
'long_name': 'Number of measurements contributing to reported value',
'units': '1',
'_FillValue': -9,
'__type': np.int16,
'__rename': 'number_of_measurements',
},
'latitude': {
'long_name': 'Latitude at which air sample was collected',
'units': 'degrees_north',
'_FillValue': -999.999,
'standard_name': 'latitude',
'__type': np.float32,
},
'longitude': {
'long_name': 'Latitude at which air sample was collected',
'units': 'degrees_east',
'_FillValue': -999.999,
'standard_name': 'longitude',
'__type': np.float32,
},
'altitude': {
'long_name': 'Sample altitude',
'units': 'm',
'_FillValue': -999.999,
'standard_name': 'altitude',
'comment': (
'Altitude for this dataset is the sum of surface elevation '
'(masl) and sample intake height (magl)'
),
'__type': np.float32,
},
'intake_height': {
'long_name': 'Sample intake height above ground level',
'units': 'm',
'_FillValue': -999.999,
'__type': np.float32,
},
}
test_filename = filename
if isinstance(test_filename, (list, tuple)):
test_filename = test_filename[0]
with open(test_filename) as fc:
skiprows = int(fc.readline().strip().split()[-1]) - 1
ds = read_csv(filename, sep=r'\s+', skiprows=skiprows, **kwargs)
timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[ns]')
for ii in range(0, len(timestamp)):
ts = datetime(
ds['year'].values[ii],
ds['month'].values[ii],
ds['day'].values[ii],
ds['hour'].values[ii],
ds['minute'].values[ii],
ds['second'].values[ii],
)
timestamp[ii] = np.datetime64(ts, 'ns')
ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
ds['time'].attrs['long_name'] = 'Time'
for var_name, value in variables.items():
if value is None:
del ds[var_name]
else:
for att_name, att_value in value.items():
if att_name == '__type':
values = ds[var_name].values
values = values.astype(att_value)
ds[var_name].values = values
elif att_name == '__rename':
ds = ds.rename({var_name: att_value})
else:
ds[var_name].attrs[att_name] = att_value
if convert_missing:
try:
var_name = variables[var_name]['__rename']
except KeyError:
pass
try:
missing_value = ds[var_name].attrs['_FillValue']
values = ds[var_name].values.astype(float)
values[np.isclose(missing_value, values)] = np.nan
ds[var_name].values = values
ds[var_name].attrs['_FillValue'] = np.nan
except KeyError:
pass
values = ds['qcflag'].values
bad_index = []
suspect_index = []
for ii, value in enumerate(values):
pts = list(value)
if pts[0] != '.':
bad_index.append(ii)
if pts[1] != '.':
suspect_index.append(ii)
var_name = 'co2'
qc_var_name = ds.qcfilter.create_qc_variable(var_name)
ds.qcfilter.add_test(
var_name,
index=bad_index,
test_assessment='Bad',
test_meaning='Obvious problems during collection or analysis',
)
ds.qcfilter.add_test(
var_name,
index=suspect_index,
test_assessment='Indeterminate',
test_meaning=(
'Likely valid but does not meet selection criteria determined by '
'the goals of a particular investigation'
),
)
ds[qc_var_name].attrs[
'comment'
] = 'This quality control flag is provided by the contributing PIs'
del ds['qcflag']
return ds
[docs]def read_gml_ozone(filename=None, **kwargs):
"""
Function to read ozone data from NOAA GML.
Parameters
----------
filename : str or pathlib.Path
Data file full path name.
**kwargs : keywords
Keywords to pass through to ACT read_csv() routine.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
"""
ds = None
if filename is None:
return ds
test_filename = filename
if isinstance(test_filename, (list, tuple)):
test_filename = test_filename[0]
with open(test_filename) as fc:
skiprows = 0
while True:
line = fc.readline().strip().split()
try:
if len(line) == 6 and line[0] == 'STN':
break
except IndexError:
pass
skiprows += 1
ds = read_csv(filename, sep=r'\s+', skiprows=skiprows, **kwargs)
ds.attrs['station'] = str(ds['STN'].values[0]).lower()
timestamp = np.full(ds['YEAR'].size, np.nan, dtype='datetime64[ns]')
for ii in range(0, len(timestamp)):
ts = datetime(
ds['YEAR'].values[ii],
ds['MON'].values[ii],
ds['DAY'].values[ii],
ds['HR'].values[ii],
)
timestamp[ii] = np.datetime64(ts, 'ns')
ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
ds['time'].attrs['long_name'] = 'Time'
for var_name in ['STN', 'YEAR', 'MON', 'DAY', 'HR']:
del ds[var_name]
var_name = 'ozone'
ds = ds.rename({'O3(PPB)': var_name})
ds[var_name].attrs['long_name'] = 'Ozone'
ds[var_name].attrs['units'] = 'ppb'
ds[var_name].attrs['_FillValue'] = np.nan
ds[var_name].values = ds[var_name].values.astype(np.float32)
return ds
[docs]def read_gml_radiation(filename=None, convert_missing=True, remove_time_vars=True, **kwargs):
"""
Function to read radiation data from NOAA GML.
Parameters
----------
filename : str or pathlib.Path
Data file full path name.
convert_missing : boolean
Option to convert missing values to NaN. If turned off will
set variable attribute to missing value expected. This works well
to preserve the data type best for writing to a netCDF file.
remove_time_vars : boolean
Some column names in the CSV file are used for creating the time
coordinate variable in the returend Xarray DataSet. Once used the
variables are not needed and will be removed from DataSet.
**kwargs : keywords
Keywords to pass through to ACT read_csv() routine.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
"""
ds = None
if filename is None:
return ds
column_names = {
'year': None,
'jday': None,
'month': None,
'day': None,
'hour': None,
'minute': None,
'decimal_time': None,
'solar_zenith_angle': {
'units': 'degree',
'long_name': 'Solar zenith angle',
'_FillValue': -9999.9,
'__type': np.float32,
},
'downwelling_global_solar': {
'units': 'W/m^2',
'long_name': 'Downwelling global solar',
'_FillValue': -9999.9,
'__type': np.float32,
},
'upwelling_global_solar': {
'units': 'W/m^2',
'long_name': 'Upwelling global solar',
'_FillValue': -9999.9,
'__type': np.float32,
},
'direct_normal_solar': {
'units': 'W/m^2',
'long_name': 'Direct-normal solar',
'_FillValue': -9999.9,
'__type': np.float32,
},
'downwelling_diffuse_solar': {
'units': 'W/m^2',
'long_name': 'Downwelling diffuse solar',
'_FillValue': -9999.9,
'__type': np.float32,
},
'downwelling_thermal_infrared': {
'units': 'W/m^2',
'long_name': 'Downwelling thermal infrared',
'_FillValue': -9999.9,
'__type': np.float32,
},
'downwelling_infrared_case_temp': {
'units': 'degK',
'long_name': 'Downwelling infrared case temp',
'_FillValue': -9999.9,
'__type': np.float32,
},
'downwelling_infrared_dome_temp': {
'units': 'degK',
'long_name': 'downwelling infrared dome temp',
'_FillValue': -9999.9,
'__type': np.float32,
},
'upwelling_thermal_infrared': {
'units': 'W/m^2',
'long_name': 'Upwelling thermal infrared',
'_FillValue': -9999.9,
'__type': np.float32,
},
'upwelling_infrared_case_temp': {
'units': 'degK',
'long_name': 'Upwelling infrared case temp',
'_FillValue': -9999.9,
'__type': np.float32,
},
'upwelling_infrared_dome_temp': {
'units': 'degK',
'long_name': 'Upwelling infrared dome temp',
'_FillValue': -9999.9,
'__type': np.float32,
},
'global_UVB': {
'units': 'mW/m^2',
'long_name': 'global ultraviolet-B',
'_FillValue': -9999.9,
'__type': np.float32,
},
'par': {
'units': 'W/m^2',
'long_name': 'Photosynthetically active radiation',
'_FillValue': -9999.9,
'__type': np.float32,
},
'net_solar': {
'units': 'W/m^2',
'long_name': 'Net solar (downwelling_global_solar - upwelling_global_solar)',
'_FillValue': -9999.9,
'__type': np.float32,
},
'net_infrared': {
'units': 'W/m^2',
'long_name': (
'Net infrared (downwelling_thermal_infrared - ' 'upwelling_thermal_infrared)'
),
'_FillValue': -9999.9,
'__type': np.float32,
},
'net_radiation': {
'units': 'W/m^2',
'long_name': 'Net radiation (net_solar + net_infrared)',
'_FillValue': -9999.9,
'__type': np.float32,
},
'air_temperature_10m': {
'units': 'degC',
'long_name': '10-meter air temperature',
'_FillValue': -9999.9,
'__type': np.float32,
},
'relative_humidity': {
'units': '%',
'long_name': 'Relative humidity',
'_FillValue': -9999.9,
'__type': np.float32,
},
'wind_speed': {
'units': 'm/s',
'long_name': 'Wind speed',
'_FillValue': -9999.9,
'__type': np.float32,
},
'wind_direction': {
'units': 'degree',
'long_name': 'Wind direction (clockwise from north)',
'_FillValue': -9999.9,
'__type': np.float32,
},
'station_pressure': {
'units': 'millibar',
'long_name': 'Station atmospheric pressure',
'_FillValue': -9999.9,
'__type': np.float32,
},
}
# Add additinal column names for NOAA SPASH campaign
if str(Path(filename).name).startswith('cbc') or str(Path(filename).name).startswith('ckp'):
column_names['SPN1_total'] = {
'units': 'W/m^2',
'long_name': 'SPN1 total average',
'_FillValue': -9999.9,
'__type': np.float32,
}
column_names['SPN1_diffuse'] = {
'units': 'W/m^2',
'long_name': 'SPN1 diffuse average',
'_FillValue': -9999.9,
'__type': np.float32,
}
names = list(column_names.keys())
skip_vars = [
'year',
'jday',
'month',
'day',
'hour',
'minute',
'decimal_time',
'solar_zenith_angle',
]
num = 1
for ii, name in enumerate(column_names.keys()):
if name in skip_vars:
continue
names.insert(ii + num, 'qc_' + name)
num += 1
ds = read_csv(filename, sep=r'\s+', header=None, skiprows=2, column_names=names, **kwargs)
if isinstance(filename, (list, tuple)):
filename = filename[0]
if ds is not None:
with open(filename) as fc:
lat = None
lon = None
alt = None
alt_unit = None
station = None
for ii in [0, 1]:
line = fc.readline().strip().split()
if len(line) == 1:
station = line[0]
else:
lat = np.array(line[0], dtype=np.float32)
lon = np.array(line[1], dtype=np.float32)
alt = np.array(line[2], dtype=np.float32)
alt_unit = str(line[3])
ds['lat'] = xr.DataArray(
lat,
attrs={
'long_name': 'Latitude',
'units': 'degree_north',
'standard_name': 'latitude',
},
)
ds['lon'] = xr.DataArray(
lon,
attrs={
'long_name': 'Longitude',
'units': 'degree_east',
'standard_name': 'longitude',
},
)
ds['alt'] = xr.DataArray(
alt,
attrs={
'long_name': 'Latitude',
'units': alt_unit,
'standard_name': 'altitude',
},
)
ds.attrs['location'] = station
timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[ns]')
for ii in range(0, len(timestamp)):
ts = datetime(
ds['year'].values[ii],
ds['month'].values[ii],
ds['day'].values[ii],
ds['hour'].values[ii],
ds['minute'].values[ii],
)
timestamp[ii] = np.datetime64(ts, 'ns')
ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
ds['time'].attrs['long_name'] = 'Time'
for var_name, value in column_names.items():
if value is None:
ds[var_name]
else:
for att_name, att_value in value.items():
if att_name == '__type':
values = ds[var_name].values
values = values.astype(att_value)
ds[var_name].values = values
else:
ds[var_name].attrs[att_name] = att_value
if convert_missing:
try:
missing_value = ds[var_name].attrs['_FillValue']
values = ds[var_name].values.astype(float)
index = np.isclose(values, missing_value)
values[index] = np.nan
ds[var_name].values = values
ds[var_name].attrs['_FillValue'] = np.nan
except KeyError:
pass
for var_name in ds.data_vars:
if not var_name.startswith('qc_'):
continue
data_var_name = var_name.replace('qc_', '', 1)
attrs = {
'long_name': f"Quality control variable for: {ds[data_var_name].attrs['long_name']}",
'units': '1',
'standard_name': 'quality_flag',
'flag_values': [0, 1, 2],
'flag_meanings': [
'Not failing any tests',
'Knowingly bad value',
'Should be used with scrutiny',
],
'flag_assessments': ['Good', 'Bad', 'Indeterminate'],
}
ds[var_name].attrs = attrs
ds[data_var_name].attrs['ancillary_variables'] = var_name
if remove_time_vars:
remove_var_names = [
'year',
'jday',
'month',
'day',
'hour',
'minute',
'decimal_time',
]
ds = ds.drop_vars(remove_var_names)
return ds
[docs]def read_gml_met(filename=None, convert_missing=True, **kwargs):
"""
Function to read meteorological data from NOAA GML.
Parameters
----------
filename : str or pathlib.Path
Data file full path name.
convert_missing : boolean
Option to convert missing values to NaN. If turned off will
set variable attribute to missing value expected. This works well
to preserve the data type best for writing to a netCDF file.
**kwargs : keywords
Keywords to pass through to ACT read_csv() routine.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
"""
ds = None
if filename is None:
return ds
column_names = {
'station': None,
'year': None,
'month': None,
'day': None,
'hour': None,
'minute': None,
'wind_direction': {
'units': 'degree',
'long_name': 'Average wind direction from which the wind is blowing',
'_FillValue': -999,
'__type': np.int16,
},
'wind_speed': {
'units': 'm/s',
'long_name': 'Average wind speed',
'_FillValue': -999.9,
'__type': np.float32,
},
'wind_steadiness_factor': {
'units': '1',
'long_name': '100 times the ratio of the vector wind speed to the '
'average wind speed for the hour',
'_FillValue': -9,
'__type': np.int16,
},
'barometric_pressure': {
'units': 'hPa',
'long_name': 'Station barometric pressure',
'_FillValue': -999.90,
'__type': np.float32,
},
'temperature_2m': {
'units': 'degC',
'long_name': 'Temperature at 2 meters above ground level',
'_FillValue': -999.9,
'__type': np.float32,
},
'temperature_10m': {
'units': 'degC',
'long_name': 'Temperature at 10 meters above ground level',
'_FillValue': -999.9,
'__type': np.float32,
},
'temperature_tower_top': {
'units': 'degC',
'long_name': 'Temperature at top of instrument tower',
'_FillValue': -999.9,
'__type': np.float32,
},
'realitive_humidity': {
'units': 'percent',
'long_name': 'Relative humidity',
'_FillValue': -99,
'__type': np.int16,
},
'preciptation_intensity': {
'units': 'mm/hour',
'long_name': 'Amount of precipitation per hour',
'_FillValue': -99,
'__type': np.int16,
'comment': (
'The precipitation amount is measured with an unheated '
'tipping bucket rain gauge.'
),
},
}
minutes = True
test_filename = filename
if isinstance(test_filename, (list, tuple)):
test_filename = test_filename[0]
if '_hour_' in Path(test_filename).name:
minutes = False
del column_names['minute']
ds = read_csv(filename, sep=r'\s+', header=None, column_names=column_names.keys(), **kwargs)
if ds is not None:
timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[ns]')
for ii in range(0, len(timestamp)):
if minutes:
ts = datetime(
ds['year'].values[ii],
ds['month'].values[ii],
ds['day'].values[ii],
ds['hour'].values[ii],
ds['minute'].values[ii],
)
else:
ts = datetime(
ds['year'].values[ii],
ds['month'].values[ii],
ds['day'].values[ii],
ds['hour'].values[ii],
)
timestamp[ii] = np.datetime64(ts, 'ns')
ds = ds.rename({'index': 'time'})
ds = ds.assign_coords(time=timestamp)
ds['time'].attrs['long_name'] = 'Time'
for var_name, value in column_names.items():
if value is None:
del ds[var_name]
else:
for att_name, att_value in value.items():
if att_name == '__type':
values = ds[var_name].values
values = values.astype(att_value)
ds[var_name].values = values
else:
ds[var_name].attrs[att_name] = att_value
if convert_missing:
try:
missing_value = ds[var_name].attrs['_FillValue']
values = ds[var_name].values.astype(float)
index = np.isclose(values, missing_value)
values[index] = np.nan
ds[var_name].values = values
ds[var_name].attrs['_FillValue'] = np.nan
except KeyError:
pass
return ds
[docs]def read_surfrad(filename, **kwargs):
"""
Function to read in NOAA SurfRad data
Parameters
----------
filename : list
Data files full path name or url to file
**kwargs : keywords
Keywords to pass through to instrument specific reading routine.
Returns
-------
ds : xarray.Dataset
Standard ARM Xarray dataset with the data cleaned up to have units,
long_name, correct type and some other stuff.
"""
names = [
'year',
'jday',
'month',
'day',
'hour',
'minute',
'dec_time',
'solar_zenith_angle',
'downwelling_global',
'qc_downwelling_global',
'upwelling_global',
'qc_upwelling_global',
'direct_normal',
'qc_direct_normal',
'downwelling_diffuse',
'qc_downwelling_diffuse',
'downwelling_ir',
'qc_downwelling_ir',
'downwelling_ir_casetemp',
'qc_downwelling_ir_casetemp',
'downwelling_ir_dometemp',
'qc_downwelling_ir_dometemp',
'upwelling_ir',
'qc_upwelling_ir',
'upwelling_ir_casetemp',
'qc_upwelling_ir_casetemp',
'upwelling_ir_dometemp',
'qc_upwelling_ir_dometemp',
'global_uvb',
'qc_global_uvb',
'par',
'qc_par',
'net_radiation',
'qc_net_radiation',
'net_ir',
'qc_net_ir',
'total_net',
'qc_total_net',
'temperature',
'qc_temperature',
'relative_humidity',
'qc_relative_humidity',
'wind_speed',
'qc_wind_speed',
'wind_direction',
'qc_wind_direction',
'pressure',
'qc_pressure',
]
for i, f in enumerate(filename):
new_df = pd.read_csv(f, names=names, skiprows=2, delimiter=r'\s+', header=None)
if i == 0:
df = new_df
else:
df = pd.concat([df, new_df])
# Create time variable and add as the coordinate
ds = df.to_xarray()
year = ds['year'].values
month = ds['month'].values
day = ds['day'].values
hour = ds['hour'].values
minute = ds['minute'].values
time = [datetime(year[i], month[i], day[i], hour[i], minute[i]) for i in range(len(year))]
ds = ds.assign_coords(index=time)
ds = ds.rename(index='time')
# Add attributes
attrs = {
'year': {'long_name': 'Year', 'units': 'unitless'},
'jday': {'long_name': 'Julian day', 'units': 'unitless'},
'month': {'long_name': 'Month', 'units': 'unitless'},
'day': {'long_name': 'Day of the month', 'units': 'unitless'},
'hour': {'long_name': 'Hour', 'units': 'unitless'},
'minute': {'long_name': 'Minutes', 'units': 'unitless'},
'dec_time': {'long_name': 'Decimal time', 'units': 'unitless'},
'solar_zenith_angle': {'long_name': 'Solar zenith angle', 'units': 'deg'},
'downwelling_global': {
'long_name': 'Downwelling global solar',
'units': 'W m^-2',
'standard_name': 'surface_downwelling_shortwave_flux_in_air',
},
'upwelling_global': {
'long_name': 'Upwelling global solar',
'units': 'W m^-2',
'standard_name': 'surface_upwelling_shortwave_flux_in_air',
},
'direct_normal': {
'long_name': 'Direct normal solar',
'units': 'W m^-2',
'standard_name': 'surface_direct_downwelling_shortwave_flux_in_air',
},
'downwelling_diffuse': {
'long_name': 'Downwelling diffuse solar',
'units': 'W m^-2',
'standard_name': 'diffuse_downwelling_shortwave_flux_in_air',
},
'downwelling_ir': {
'long_name': 'Downwelling thermal infrared',
'units': 'W m^-2',
'standard_name': 'net_downward_longwave_flux_in_air',
},
'downwelling_ir_casetemp': {
'long_name': 'Downwelling thermal infrared case temperature',
'units': 'K',
},
'downwelling_ir_dometemp': {
'long_name': 'Downwelling thermal infrared dome temperature',
'units': 'K',
},
'upwelling_ir': {
'long_name': 'Upwelling thermal infrared',
'units': 'W m^-2',
'standard_name': 'net_upward_longwave_flux_in_air',
},
'upwelling_ir_casetemp': {
'long_name': 'Upwelling thermal infrared case temperature',
'units': 'K',
},
'upwelling_ir_dometemp': {
'long_name': 'Upwelling thermal infrared dome temperature',
'units': 'K',
},
'global_uvb': {'long_name': 'Global UVB', 'units': 'milliWatts m^-2'},
'par': {
'long_name': 'Photosynthetically active radiation',
'units': 'W m^-2',
'standard_name': 'surface_downwelling_photosynthetic_radiative_flux_in_air',
},
'net_radiation': {
'long_name': 'Net solar (downwelling_global-upwelling_global)',
'units': 'W m^-2',
'standard_name': 'surface_net_downward_shortwave_flux',
},
'net_ir': {
'long_name': 'Net infrared (downwelling_ir-upwelling_ir)',
'units': 'W m^-2',
'standard_name': 'surface_net_downward_longwave_flux',
},
'total_net': {
'long_name': 'Total Net radiation (net_radiation + net_ir)',
'units': 'W m^-2',
},
'temperature': {
'long_name': '10-meter air temperature',
'units': 'degC',
'standard_name': 'air_temperature',
},
'relative_humidity': {
'long_name': 'Relative humidity',
'units': '%',
'standard_name': 'relative_humidity',
},
'wind_speed': {'long_name': 'Wind speed', 'units': 'ms^-1', 'standard_name': 'wind_speed'},
'wind_direction': {
'long_name': 'Wind direction, clockwise from North',
'units': 'deg',
'standard_name': 'wind_from_direction',
},
'pressure': {
'long_name': 'Station pressure',
'units': 'mb',
'standard_name': 'air_pressure',
},
}
for v in ds:
if v in attrs:
ds[v].attrs = attrs[v]
# Add attributes to all QC variables
qc_vars = [
'downwelling_global',
'upwelling_global',
'direct_normal',
'downwelling_diffuse',
'downwelling_ir',
'downwelling_ir_casetemp',
'downwelling_ir_dometemp',
'upwelling_ir',
'upwelling_ir_casetemp',
'upwelling_ir_dometemp',
'global_uvb',
'par',
'net_radiation',
'net_ir',
'total_net',
'temperature',
'relative_humidity',
'wind_speed',
'wind_direction',
'pressure',
]
for v in qc_vars:
atts = {
'long_name': 'Quality check results on variable: ' + v,
'units': '1',
'description': ''.join(
[
'A QC flag of zero indicates that the corresponding data point is good,',
' having passed all QC checks. A value greater than 0 indicates that',
' the data failed one level of QC. For example, a QC value of 1 means',
' that the recorded value is beyond a physically possible range, or it has',
' been affected adversely in some manner to produce a knowingly bad value.',
' A value of 2 indicates that the data value failed the second level QC check,',
' indicating that the data value may be physically possible but should be used',
' with scrutiny, and so on.',
]
),
}
ds['qc_' + v].attrs = atts
ds.attrs['datastream'] = 'SURFRAD Site: ' + filename[0].split('/')[-1][0:3]
return ds