Source code for act.discovery.neon
"""
Function for downloading data from NSF NEON program
using their API.
NEON sites can be found through the NEON website
https://www.neonscience.org/field-sites/explore-field-sites
"""
import requests
import os
import shutil
import pandas as pd
[docs]def get_neon_site_products(site_code, print_to_screen=False):
"""
Returns a list of data products available for a NEON site
NEON sites can be found through the NEON website
https://www.neonscience.org/field-sites/explore-field-sites
Parameters
----------
site : str
NEON site identifier. Required variable
print_to_screen : boolean
If set to True will print to screen
Returns
-------
products : list
Returns 2D list of data product code and title
"""
# Every request begins with the server's URL
server = 'http://data.neonscience.org/api/v0/'
# Make request, using the sites/ endpoint
site_request = requests.get(server + 'sites/' + site_code)
# Convert to Python JSON object
site_json = site_request.json()
products = {}
# View product code and name for every available data product
for product in site_json['data']['dataProducts']:
if print_to_screen:
print(product['dataProductCode'], product['dataProductTitle'])
products[product['dataProductCode']] = product['dataProductTitle']
return products
[docs]def get_neon_product_avail(site_code, product_code, print_to_screen=False):
"""
Returns a list of data products available for a NEON site
NEON sites can be found through the NEON website
https://www.neonscience.org/field-sites/explore-field-sites
Parameters
----------
site : str
NEON site identifier. Required variable
product_code : str
NEON product code. Required variable
print_to_screen : boolean
If set to True will print to screen
Returns
-------
dates : list
Returns list of available months of data
"""
# Every request begins with the server's URL
server = 'http://data.neonscience.org/api/v0/'
# Make request, using the sites/ endpoint
site_request = requests.get(server + 'sites/' + site_code)
# Convert to Python JSON object
site_json = site_request.json()
# View product code and name for every available data product
for product in site_json['data']['dataProducts']:
if product['dataProductCode'] != product_code:
continue
if print_to_screen:
print(product['availableMonths'])
dates = product['availableMonths']
return dates
[docs]def download_neon_data(site_code, product_code, start_date, end_date=None, output_dir=None):
"""
Returns a list of data products available for a NEON site. Please be sure to view the
readme files that are downloaded as well as there may be a number of different products.
If you want more information on the NEON file formats, please see:
https://www.neonscience.org/data-samples/data-management/data-formats-conventions
NEON sites can be found through the NEON website
https://www.neonscience.org/field-sites/explore-field-sites
Please be sure to acknowledge and cite the NEON program and data products appropriately:
https://www.neonscience.org/data-samples/data-policies-citation
Parameters
----------
site : str
NEON site identifier. Required variable
product_code : str
NEON product code. Required variable
start_date : str
Start date of the range to download in YYYY-MM format
end_date : str
End date of the range to download in YYYY-MM format.
If None, will just download data for start_date
output_dir : str
Local directory to store the data. If None, will default to
[current working directory]/[site_code]_[product_code]
Returns
-------
files : list
Returns a list of files that were downloaded
"""
# Every request begins with the server's URL
server = 'http://data.neonscience.org/api/v0/'
# Get dates to pass in
if end_date is not None:
date_range = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist()
else:
date_range = [start_date]
# For each month, download data for specified site/product
files = []
for date in date_range:
# Make Request
data_request = requests.get(server + 'data/' + product_code + '/' + site_code + '/' + date)
data_json = data_request.json()
if output_dir is None:
output_dir = os.path.join(os.getcwd(), site_code + '_' + product_code)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
for file in data_json['data']['files']:
print('[DOWNLOADING] ', file['name'])
output_filename = os.path.join(output_dir, file['name'])
with requests.get(file['url'], stream=True) as r:
with open(output_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)
files.append(output_filename)
return files