"""
Script for downloading data from the IMPROVE network
"""
import pandas as pd
import numpy as np
import xarray as xr
[docs]def get_improve_data(site_id=None, parameter_id=None, start_date=None, end_date=None):
"""
Retrieve IMPROVE data for the given site and variable ids and store it in an
xarray dataset. Documentation on the IMPROVE data can be found at
https://vista.cira.colostate.edu/Improve/data-user-guide/
Also adds in metadata from the site summary page to the global attributes
https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=244
Parameters
----------
site_id : str
Site id number which can be retrieved from the IMPROVE page for each site such as
https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=244
parameter_id : list
List of parameter id values to retrieve from the API.
start_date : str
Start date formatted as M/D/YEAR such as 1/31/2022
end_date : str
End date formatted as M/D/YEAR such as 1/31/2022
Returns
-------
ds : xarray.Dataset
Returns an Xarray dataset object
Example
-------
act.discovery.get_improve_data(site_id='244')
"""
# Build URL
base_url = 'https://views.cira.colostate.edu/fed/svc/DataSvc.aspx?action=getqueryresults&cmdfileid=ServiceSqlCommandFile&cmdid=BasicDataQuery1_Codes'
if site_id is None:
raise ValueError('Please provide a site_id')
else:
base_url += '&dsidse=10001&siidse=' + str(site_id)
if parameter_id is None:
base_url += '&paidse=101,136,907,900,102,104,105,115,116,117,114,3778,142,143,144,145,3016,146,3699,141,3779,3217,108,109,112,113,301,304,303,3716,3717,3718,3719,3720,3721,3722,3730,3731,3732,3733,3734,3735,3736,3694,121,3723,3724,3725,3726,3727,3728,3729,3737,3738,3739,3740,3741,3742,3743,118,148,128,130,132,941,127,903,910,3744,3745,3746,3747,3748,3749,3750,3751,3752,3753,3754,3755,3756,3757,131,138,139,133,3704,3705,3706,3707,3708,3709,3710,3711,3712,3713,3714,3715,147,124,150,3695,3014,153,154,134,911,158,156,151,202,159,160,162,163'
else:
base_url += '&paidse=' + ','.join(parameter_id)
if start_date is None:
raise ValueError('Please provide a start date')
else:
base_url += '&sd=' + start_date
if end_date is None:
raise ValueError('Please provide an end date')
else:
base_url += '&ed=' + end_date
# Read data and get variables
df = pd.read_html(base_url)[0]
variables = np.unique(df.Param)
# Print out proper acknowledgement
print("Please use the following acknowledgment when using IMPROVE data:\n")
print(
"IMPROVE is a collaborative association of state, tribal, and federal agencies, and international partners. US Environmental Protection Agency is the primary funding source, with contracting and research support from the National Park Service. The Air Quality Group at the University of California, Davis is the central analytical laboratory, with ion analysis provided by Research Triangle Institute, and carbon analysis provided by Desert Research Institute."
)
# Creat mapping of variable names to metadata
mapping = {
'ALf': {'name': 'aluminum_fine', 'long_name': 'Aluminum (Fine)', 'epa_code': '88104'},
'ASf': {'name': 'arsenic_fine', 'long_name': 'Arsenic (Fine)', 'epa_code': '88103'},
'BRf': {'name': 'bromine_fine', 'long_name': 'Bromine (Fine)', 'epa_code': '88109'},
'CAf': {'name': 'calcium_fine', 'long_name': 'Calcium (Fine)', 'epa_code': '88111'},
'CLf': {'name': 'chlorine_fine', 'long_name': 'Chlorine (Fine)', 'epa_code': '88115'},
'CRf': {'name': 'chromium_fine', 'long_name': 'Chromium (Fine)', 'epa_code': '88112'},
'CUf': {'name': 'copper_fine', 'long_name': 'Copper (Fine)', 'epa_code': '88114'},
'FEf': {'name': 'iron_fine', 'long_name': 'Iron (Fine)', 'epa_code': '88126'},
'PBf': {'name': 'lead_fine', 'long_name': 'Lead (Fine)', 'epa_code': '88128'},
'MGf': {'name': 'magnesium_fine', 'long_name': 'Magnesium (Fine)', 'epa_code': '88140'},
'MNf': {'name': 'manganese_fine', 'long_name': 'Manganese (Fine)', 'epa_code': '88132'},
'NIf': {'name': 'nickel_fine', 'long_name': 'Nickel (Fine)', 'epa_code': '88136'},
'Pf': {'name': 'phosphorus_fine', 'long_name': 'Phosphorus (Fine)', 'epa_code': '88152'},
'Kf': {'name': 'potassium_fine', 'long_name': 'Potassium (Fine)', 'epa_code': '88180'},
'RBf': {'name': 'rubidium_fine', 'long_name': 'Rubidium (Fine)', 'epa_code': '88176'},
'SEf': {'name': 'selenium_fine', 'long_name': 'Selenium (Fine)', 'epa_code': '88154'},
'SIf': {'name': 'silicon_fine', 'long_name': 'Silicon (Fine)', 'epa_code': '88165'},
'NAf': {'name': 'sodium_fine', 'long_name': 'Sodium (Fine)', 'epa_code': '88184'},
'SRf': {'name': 'strontium_fine', 'long_name': 'Strontium (Fine)', 'epa_code': '88168'},
'Sf': {'name': 'sulfur_fine', 'long_name': 'Sulfur (Fine)', 'epa_code': '88169'},
'TIf': {'name': 'titanium_fine', 'long_name': 'Titanium (Fine)', 'epa_code': '88161'},
'Vf': {'name': 'vanadium_fine', 'long_name': 'Vanadium (Fine)', 'epa_code': '88164'},
'ZNf': {'name': 'zinc_fine', 'long_name': 'Zinc (Fine)', 'epa_code': '88167'},
'ZRf': {'name': 'zirconium_fine', 'long_name': 'Zirconium (Fine)', 'epa_code': '88185'},
'CHLf': {'name': 'chloride_fine', 'long_name': 'Chloride (Fine)', 'epa_code': '88203'},
'NO3f': {'name': 'nitrate_fine', 'long_name': 'Nitrate (Fine)', 'epa_code': '88306'},
'N2f': {'name': 'nitrite_fine', 'long_name': 'Nitrite (Fine)', 'epa_code': '88338'},
'SO4f': {'name': 'sulfate_fine', 'long_name': 'Sulfate (Fine)', 'epa_code': '88403'},
'OC1f': {
'name': 'carbon_organic_fraction_1_fine',
'long_name': 'Carbon, Organic Fraction 1 (Fine)',
'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 140 °C',
'epa_code': '88324',
},
'OC2f': {
'name': 'carbon_organic_fraction_2_fine',
'long_name': 'Carbon, Organic Fraction 2 (Fine)',
'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 280 °C',
'epa_code': '88325',
},
'OC3f': {
'name': 'carbon_organic_fraction_3_fine',
'long_name': 'Carbon, Organic Fraction 3 (Fine)',
'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 480 °C',
'epa_code': '88326',
},
'OC4f': {
'name': 'carbon_organic_fraction_4_fine',
'long_name': 'Carbon, Organic Fraction 4 (Fine)',
'comments': 'TOR, pure helium (>99.999%) atmosphere, temperature (T) = 580 °C',
'epa_code': '88327',
},
'OPf': {
'name': 'carbon_organic_reflectance_fine',
'long_name': 'Carbon, Organic Pyrolized (Fine) by Reflectance',
'comments': 'TOR, carbon that is measured after the introduction of helium/oxygen atmosphere at °550 C but beforereflectance returns to initial value',
'epa_code': '88328',
},
'OPTf': {
'name': 'carbon_organic_transmittance_fine',
'long_name': 'Carbon, Organic Pyrolized (Fine) by Transmittance',
'comments': 'TOR, carbon that is measured after the introduction of helium/oxygen atmosphere at °550 C but beforetransmittance returns to initial value',
'epa_code': '88336',
},
'OCf': {
'name': 'carbon_organic_total_fine',
'long_name': 'Carbon, Organic Total (Fine)',
'comments': 'Organic carbon from TOR carbon fractions (OC1f+OC2f+OC3f+OC4f+OPf)',
'epa_code': '88320',
},
'EC1f': {
'name': 'carbon_elemental_fraction_1_fine',
'long_name': 'Carbon, Elemental Fraction 1 (Fine)',
'comments': 'TOR, 98% helium, 2% oxygen atmosphere, temperature (T) = 580° C.',
'epa_code': '88329',
},
'EC2f': {
'name': 'carbon_elemental_fraction_2_fine',
'long_name': 'Carbon, Elemental Fraction 2 (Fine)',
'comments': 'TOR, 98% helium, 2% oxygen atmosphere, temperature (T) = 740° C.',
'epa_code': '88380',
},
'EC3f': {
'name': 'carbon_elemental_fraction_3_fine',
'long_name': 'Carbon, Elemental Fraction 3 (Fine)',
'comments': 'TOR, 98% helium, 2% oxygen atmosphere, temperature (T) = 840° C.',
'epa_code': '88331',
},
'ECf': {
'name': 'carbon_elemental_total_fine',
'long_name': 'Carbon, Elemental Total (Fine)',
'comments': 'Elemental carbon from TOR carbon fractions (E1+E2+E3-OP)',
'epa_code': '88321',
},
'fAbs': {
'name': 'filter_absorption_coeff',
'long_name': 'Filter Absorption Coefficient',
'comments': 'A calibrated absorption coefficient measured from a Teflon filter using a hybrid integrating plate and sphere (HIPS) method',
'epa_code': '63102',
},
'FlowRate': {
'name': 'flow_rate',
'long_name': 'Flow Rate',
'comments': 'The rate of air flow through an air sampling instrument',
'epa_code': '63102',
},
'MF': {
'name': 'mass_pm2_5',
'long_name': 'Mass, PM2.5 (Fine)',
'comments': 'Gravimetric mass measurement for particles with aerodynamic diameters less than 2.5 um',
'epa_code': '88101',
},
'MT': {
'name': 'mass_pm10',
'long_name': 'Mass, PM10 (Total)',
'comments': 'Gravimetric mass measurement for particles with aerodynamic diameters less than 10 um',
'epa_code': '85101',
},
'SampDur': {
'name': 'sample_duration',
'long_name': 'Sampling Duration',
'comments': 'The duration of a given sampling period in minutes',
},
'ammNO3f': {
'name': 'ammonium_nitrate_fine',
'long_name': 'Ammonium Nitrate (Fine)',
'comments': '1.29 x NO3f',
},
'ammSO4f': {
'name': 'ammonium_sulfate_fine',
'long_name': 'Ammonium Sulfate (Fine)',
'comments': '1.375 x SO4f',
},
'OMCf': {
'name': 'carbon_organic_mass_fine',
'long_name': 'Carbon, Organic Mass (fine)(1.8*OC)',
'comments': '1.8 X OCf',
},
'TCf': {
'name': 'carbon_total_fine',
'long_name': 'Carbon, Total (fine)',
'comments': 'From TOR carbon fractions (OCf+ECf)',
},
'CM_calculated': {
'name': 'CM_calculated',
'long_name': 'Mass, PM10-PM2.5 (Coarse)',
'comments': 'MT-MF',
},
'SeaSaltf': {
'name': 'sea_salt_fine',
'long_name': 'Sea Salt (Fine)',
'comments': '1.8XCHLf',
},
'SOILf': {
'name': 'soil_fine',
'long_name': 'Soil (Fine)',
'comments': '2.2 × ALf + 2.49 × SIf + 1.63 × CAf + 2.42 × FEf + 1.94 × TIf',
},
'RCFM': {
'name': 'mass_pm2_5_reconstructed',
'long_name': 'Mass, PM2.5 Reconstructed (Fine)',
'comments': 'Sum of ammSO4f, ammNO3f, OMCf, ECf, soilf, and seasaltf.',
},
'RCTM': {
'name': 'mass_pmi10_reconstructed',
'long_name': 'Mass, PM10 Reconstructed (Total)',
'comments': 'Sum of ammSO4f, ammNO3f, OMCf, ECf, soilf, seasaltf, and CM_calculated.',
},
}
laser_vars = {
'RefF': {
'units': 'ratio',
'comments': 'Final laser reflectance at ',
'name': 'final_laser_reflectance_',
},
'TransF': {
'units': 'ratio',
'comments': 'Final laser transmittance at ',
'name': 'final_laser_transmittance_',
},
'RefI': {
'units': 'ratio',
'comments': 'Initial laser reflectance at ',
'name': 'initial_laser_reflectance_',
},
'TransI': {
'units': 'ratio',
'comments': 'Initial laser transmittance at ',
'name': 'initial_laser_transmittance_',
},
'RefM': {
'units': 'ratio',
'comments': 'Minimum laser reflectance at ',
'name': 'min_laser_reflectance_',
},
'TransM': {
'units': 'ratio',
'comments': ' Minimum laser transmittance at ',
'name': 'min_laser_transmittance_',
},
'OP_TR': {
'units': 'ug m-3',
'comments': 'Organic Pyrolyzed Carbon by Reflectance at ',
'name': 'organic_pyrolyzed_carbon_reflectance_',
},
'OP_TT': {
'units': 'ug m-3',
'comments': 'Organic Pyrolyzed Carbon by Transmittance at ',
'name': 'organic_pyrolyzed_carbon_transmittance_',
},
}
laser_wl = ['405', '445', '532', '635', '780', '808', '980']
for v in laser_vars:
for wl in laser_wl:
name = laser_vars[v]['name'] + wl
if 'OP' not in v:
mapping['_'.join([v, wl])] = {
'units': laser_vars[v]['units'],
'name': name,
'long_name': ' '.join([laser_vars[v]['comments'], wl]),
}
else:
var_name = wl.join(v.split('_'))
mapping[var_name] = {
'units': laser_vars[v]['units'],
'name': name,
'long_name': ' '.join([laser_vars[v]['comments'], wl]),
}
# Run through each variable in the dataframe and add it to a dataset
# along with the appropriate metadata
ct = 0
site = np.unique(df.Site)[0]
attrs = {'url': base_url, 'datastream': site + ' IMPROVE'}
for v in variables:
# Find data for just the variable in question
poc_attrs = {'units': '1', 'long_name': 'Parameter Occurrence Code for ' + v}
df2 = df[df.Param == v]
# Get metadata
unit = np.unique(df2.UnitAbbr)
if len(unit) > 1:
raise ValueError('Multiple types of units detected, using first one')
sites = np.unique(df2.Site)
if len(sites) > 1:
raise ValueError('Multiple sites detected, please use only one')
# Get time, POC, and data
time = pd.to_datetime(df2.FactDate)
poc = df2.POC
data = df2.FactValue
# Set up attributes
var_attrs = {'units': unit[0], 'long_name': mapping[v]['long_name'], '_FillValue': -999.0}
if 'comments' in mapping[v]:
var_attrs['comments'] = mapping[v]['comments']
if 'epa_code' in mapping[v]:
var_attrs['epa_code'] = mapping[v]['epa_code']
# If the first variable, create the dataset and then add variables to it
if ct == 0:
ds = xr.Dataset(
data_vars={mapping[v]['name']: (['time'], data, var_attrs)},
coords={'time': time},
attrs=attrs,
)
ds['poc_' + mapping[v]['name']] = xr.DataArray(
data=poc, dims=['time'], coords={'time': time}, attrs=poc_attrs
)
ct += 1
else:
ds[mapping[v]['name']] = xr.DataArray(
data=data, dims=['time'], coords={'time': time}, attrs=var_attrs
)
ds['poc_' + mapping[v]['name']] = xr.DataArray(
data=poc, dims=['time'], coords={'time': time}, attrs=poc_attrs
)
# Add in metadata from site summary page
url = 'https://views.cira.colostate.edu/adms/Pub/SiteSummary.aspx?dsidse=10001&siidse=' + str(
site_id
)
df = pd.read_html(url)
for i in df[0].index:
# Add lat/lon as variables
if df[0][0][i] == 'Latitude':
attrs = {
'long_name': 'North latitude',
'units': 'degree_N',
'valid_min': -90.0,
'valid_max': 90.0,
'standard_name': 'latitude',
}
ds['lat'] = xr.DataArray(
data=float(df[0][1][i]),
dims=['time'],
coords={'time': ds['time'].values},
attrs=attrs,
)
elif df[0][0][i] == 'Longitude':
attrs = {
'long_name': 'East longitude',
'units': 'degree_E',
'valid_min': -180.0,
'valid_max': 180.0,
'standard_name': 'longitude',
}
ds['lon'] = xr.DataArray(
data=float(df[0][1][i]),
dims=['time'],
coords={'time': ds['time'].values},
attrs=attrs,
)
else:
ds.attrs[df[0][0][i]] = df[0][1][i]
# Add in problem information from the site summary page
problem = ''
for i in df[-1].index:
problem += '_'.join(
[df[-1]['EventDate'][i], df[-1]['EventType'][i], df[-1]['Notes'][i], '\n']
)
ds.attrs['site_problems'] = problem
return ds