Source code for act.discovery.noaapsl
Function for downloading data from NOAA PSL Profiler Network
from datetime import datetime
import pandas as pd
import os
from urllib.request import urlopen
except ImportError:
from urllib import urlopen
[docs]def download_noaa_psl_data(
site=None, instrument=None, startdate=None, enddate=None, hour=None, output=None
Function to download data from the NOAA PSL Profiler Network Data Library
site : str
3 letter NOAA site identifier. Required variable
instrument : str
Name of the dataset to download. Options currently include (name prior to -). Required variable
'Parsivel' - Parsivel disdrometer data
'Pressure', 'Datalogger', 'Net Radiation', 'Temp/RH', 'Solar Radiation' - Surface meteorology/radiation data
'Tipping Bucket', 'TBRG', 'Wind Speed', 'Wind Direction' - Surface meteorology/radiation data
'Wind Speed and Direction' - Surface meteorology/radiation data
'GpsTrimble' - GPS Trimble water vapor data
'Radar S-band Moment' - 3 GHz Precipitation Profiler moment data
'Radar S-band Bright Band' - 3 GHz Precipitation Profiler bright band data
'449RWP Bright Band' - 449 MHz Wind Profiler bright band data
'449RWP Wind' - 449 MHz Wind Profiler wind data
'449RWP Sub-Hour Wind' - 449 MHz Wind Profiler sub-hourly wind data
'449RWP Sub-Hour Temp' - 449 MHz Wind Profiler sub-hourly temperature data
'915RWP Wind' - 915 MHz Wind Profiler wind data
'915RWP Temp' - 915 MHz Wind Profiler temperature data
'915RWP Sub-Hour Wind' - 915 MHz Wind Profiler sub-hourly wind data
'915WP Sub-Hour Temp' - 915 MHz Wind Profiler sub-hourly temperature data
'Radar FMCW Moment' - FMCW Radar moments data
'Radar FMCW Bright Band' - FMCW Radar bright band data
startdate : str
The start date of the data to acquire. Format is YYYYMMDD. Required variable
enddate : str
The end date of the data to acquire. Format is YYYYMMDD
hour : str
Two digit hour of file to dowload if wanting a specific time
output : str
The output directory for the data. Set to None to make a folder in the
current working directory with the same name as *datastream* to place
the files in.
files : list
Returns list of files retrieved
if (site is None) or (instrument is None) or (startdate is None):
raise ValueError('site, instrument, and startdate need to be set')
datastream = site + '_' + instrument.replace(' ', '_')
# Convert dates to day of year (doy) for NOAA folder structure
s_doy = datetime.strptime(startdate, '%Y%m%d').timetuple().tm_yday
year = datetime.strptime(startdate, '%Y%m%d').year
if enddate is None:
enddate = startdate
e_doy = datetime.strptime(enddate, '%Y%m%d').timetuple().tm_yday
# Set base URL
url = ''
# Set list of strings that all point to the surface meteorology dataset
met_ds = [
'Net Radiation',
'Solar Radiation',
'Tipping Bucket',
'Wind Speed',
'Wind Direction',
'Wind Speed and Direction',
# Add to the url depending on which instrument is requested
if 'Parsivel' in instrument:
url += 'DisdrometerParsivel/Stats/'
elif any([d in instrument for d in met_ds]):
url += 'CsiDatalogger/SurfaceMet/'
elif 'GpsTrimble' in instrument:
url += 'GpsTrimble/WaterVapor/'
elif 'Radar S-band Moment' in instrument:
url += 'Radar3000/PopMoments/'
elif 'Radar S-band Bright Band' in instrument:
url += 'Radar3000/BrightBand/'
elif '449RWP Bright Band' in instrument:
url += 'Radar449/BrightBand/'
elif '449RWP Wind' in instrument:
url += 'Radar449/WwWind/'
elif '449RWP Sub-Hour Wind' in instrument:
url += 'Radar449/WwWindSubHourly/'
elif '449RWP Sub-Hour Temp' in instrument:
url += 'Radar449/WwTempSubHourly/'
elif '915RWP Wind' in instrument:
url += 'Radar915/WwWind/'
elif '915RWP Temp' in instrument:
url += 'Radar915/WwTemp/'
elif '915RWP Sub-Hour Wind' in instrument:
url += 'Radar915/WwWindSubHourly/'
elif '915RWP Sub-Hour Temp' in instrument:
url += 'Radar915/WwTempSubHourly/'
elif 'Radar FMCW Moment' in instrument:
url += 'RadarFMCW/PopMoments/'
elif 'Radar FMCW Bright Band' in instrument:
url += 'RadarFMCW/BrightBand/'
raise ValueError('Instrument not supported')
# Construct output directory
if output:
# Output files to directory specified
output_dir = os.path.join(output)
# If no folder given, add datastream folder
# to current working dir to prevent file mix-up
output_dir = os.path.join(os.getcwd(), datastream)
if not os.path.isdir(output_dir):
# Set up doy ranges, taking into account changes for a new year
prev_doy = 0
if e_doy < s_doy:
r = list(range(s_doy, 366)) + list(range(1, e_doy + 1))
r = list(range(s_doy, e_doy + 1))
# Set filename variable to return
filenames = []
# Loop through each doy in range
for doy in r:
# if the previous day is greater than current, assume a new year
# i.e. 365 -> 001
if prev_doy > doy:
year += 1
# Add site, year, and 3-digit day to url
new_url = url + site + '/' + str(year) + '/' + str(doy).zfill(3) + '/'
# User pandas to get a list of filenames to download
# Exclude the first and last records which are "parent directory" and "nan"
files = pd.read_html(new_url, skiprows=[1])[0]['Name']
files = list(files[1:-1])
# Write each file out to a file with same name as online
for f in files:
if hour is not None:
if (str(doy).zfill(3) + str(hour)) not in f and (
str(doy).zfill(3) + '.' + str(hour)
) not in f:
output_file = os.path.join(output_dir, f)
print('Downloading ' + f)
with open(output_file, 'wb') as open_bytes_file:
open_bytes_file.write(urlopen(new_url + f).read())
except Exception:
prev_doy = doy
return filenames