Source code for act.discovery.neon

"""
Function for downloading data from NSF NEON program
using their API.

NEON sites can be found through the NEON website
https://www.neonscience.org/field-sites/explore-field-sites

"""

import requests
import os
import shutil
import pandas as pd


[docs]def get_neon_site_products(site_code, print_to_screen=False): """ Returns a list of data products available for a NEON site NEON sites can be found through the NEON website https://www.neonscience.org/field-sites/explore-field-sites Parameters ---------- site : str NEON site identifier. Required variable print_to_screen : boolean If set to True will print to screen Returns ------- products : list Returns 2D list of data product code and title """ # Every request begins with the server's URL server = 'http://data.neonscience.org/api/v0/' # Make request, using the sites/ endpoint site_request = requests.get(server + 'sites/' + site_code) # Convert to Python JSON object site_json = site_request.json() products = {} # View product code and name for every available data product for product in site_json['data']['dataProducts']: if print_to_screen: print(product['dataProductCode'], product['dataProductTitle']) products[product['dataProductCode']] = product['dataProductTitle'] return products
[docs]def get_neon_product_avail(site_code, product_code, print_to_screen=False): """ Returns a list of data products available for a NEON site NEON sites can be found through the NEON website https://www.neonscience.org/field-sites/explore-field-sites Parameters ---------- site : str NEON site identifier. Required variable product_code : str NEON product code. Required variable print_to_screen : boolean If set to True will print to screen Returns ------- dates : list Returns list of available months of data """ # Every request begins with the server's URL server = 'http://data.neonscience.org/api/v0/' # Make request, using the sites/ endpoint site_request = requests.get(server + 'sites/' + site_code) # Convert to Python JSON object site_json = site_request.json() # View product code and name for every available data product for product in site_json['data']['dataProducts']: if product['dataProductCode'] != product_code: continue if print_to_screen: print(product['availableMonths']) dates = product['availableMonths'] return dates
[docs]def download_neon_data(site_code, product_code, start_date, end_date=None, output_dir=None): """ Returns a list of data products available for a NEON site. Please be sure to view the readme files that are downloaded as well as there may be a number of different products. If you want more information on the NEON file formats, please see: https://www.neonscience.org/data-samples/data-management/data-formats-conventions NEON sites can be found through the NEON website https://www.neonscience.org/field-sites/explore-field-sites Please be sure to acknowledge and cite the NEON program and data products appropriately: https://www.neonscience.org/data-samples/data-policies-citation Parameters ---------- site : str NEON site identifier. Required variable product_code : str NEON product code. Required variable start_date : str Start date of the range to download in YYYY-MM format end_date : str End date of the range to download in YYYY-MM format. If None, will just download data for start_date output_dir : str Local directory to store the data. If None, will default to [current working directory]/[site_code]_[product_code] Returns ------- files : list Returns a list of files that were downloaded """ # Every request begins with the server's URL server = 'http://data.neonscience.org/api/v0/' # Get dates to pass in if end_date is not None: date_range = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist() else: date_range = [start_date] # For each month, download data for specified site/product files = [] for date in date_range: # Make Request data_request = requests.get(server + 'data/' + product_code + '/' + site_code + '/' + date) data_json = data_request.json() if output_dir is None: output_dir = os.path.join(os.getcwd(), site_code + '_' + product_code) if not os.path.exists(output_dir): os.makedirs(output_dir) for file in data_json['data']['files']: print('[DOWNLOADING] ', file['name']) output_filename = os.path.join(output_dir, file['name']) with requests.get(file['url'], stream=True) as r: with open(output_filename, 'wb') as f: shutil.copyfileobj(r.raw, f) files.append(output_filename) return files