Source code for act.io.neon
"""
Modules for reading in NOAA PSL data.
"""
import pandas as pd
import xarray as xr
from .text import read_csv
[docs]def read_neon_csv(files, variable_files=None, position_files=None):
"""
Reads in the NEON formatted csv files from local paths or urls
and returns an Xarray dataset.
Parameters
----------
filepath : list
Files to read in
variable_files : list
Name of variable files to read with metadata. Optional but the Dataset will not
have any metadata
position_files : list
Name of file to read with sensor positions. Optional, but the Dataset will not
have any location information
Return
------
ds : xarray.Dataset
Standard Xarray dataset
"""
# Raise error if empty list is passed in
if len(files) == 0:
raise ValueError('File list is empty')
if isinstance(files, str):
files = [files]
# Read in optional files
multi_ds = []
if variable_files is not None:
if isinstance(variable_files, str):
variable_files = [variable_files]
df = pd.read_csv(variable_files[0])
if position_files is not None:
if isinstance(position_files, str):
position_files = [position_files]
loc_df = pd.read_csv(position_files[0], dtype=str)
# Run through each file and read into a dataset
for i, f in enumerate(files):
ds = read_csv(f)
# Create standard time variable
time = [pd.to_datetime(t).replace(tzinfo=None) for t in ds['startDateTime'].values]
ds['time'] = xr.DataArray(data=time, dims=['index'])
ds['time'].attrs['units'] = ''
ds = ds.swap_dims({'index': 'time'})
ds = ds.drop_vars('index')
# Add some metadata
site_code = f.split('/')[-1].split('.')[2]
resolution = f.split('/')[-1].split('.')[9]
hor_loc = f.split('/')[-1].split('.')[6]
ver_loc = f.split('/')[-1].split('.')[7]
ds.attrs['_sites'] = site_code
ds.attrs['averaging_interval'] = resolution.split('_')[-1]
ds.attrs['HOR.VER'] = hor_loc + '.' + ver_loc
# Add in metadata from the variables file
if variable_files is not None:
for v in ds:
dummy = df.loc[(df['table'] == resolution) & (df['fieldName'] == v)]
ds[v].attrs['units'] = str(dummy['units'].values[0])
ds[v].attrs['long_name'] = str(dummy['description'].values[0])
ds[v].attrs['format'] = str(dummy['pubFormat'].values[0])
# Add in sensor position data
if position_files is not None:
dloc = loc_df.loc[loc_df['HOR.VER'] == hor_loc + '.' + ver_loc]
idx = dloc.index.values
if len(idx) > 0:
if len(loc_df['referenceLatitude'].values) > 1:
ds['lat'] = xr.DataArray(data=float(loc_df['referenceLatitude'].values[idx][0]))
ds['lon'] = xr.DataArray(
data=float(loc_df['referenceLongitude'].values[idx][0])
)
ds['alt'] = xr.DataArray(
data=float(loc_df['referenceElevation'].values[idx][0])
)
else:
ds['lat'] = xr.DataArray(data=float(loc_df['referenceLatitude'].values[idx]))
ds['lon'] = xr.DataArray(data=float(loc_df['referenceLongitude'].values[idx]))
ds['alt'] = xr.DataArray(data=float(loc_df['referenceElevation'].values[idx]))
variables = [
'xOffset',
'yOffset',
'zOffset',
'eastOffset',
'northOffset',
'pitch',
'roll',
'azimuth',
'xAzimuth',
'yAzimuth',
]
for v in variables:
if len(loc_df[v].values) > 1:
ds[v] = xr.DataArray(data=float(loc_df[v].values[idx][0]))
else:
ds[v] = xr.DataArray(data=float(loc_df[v].values[idx]))
multi_ds.append(ds)
ds = xr.merge(multi_ds)
return ds