"""Modules for reading in NOAA GML data"""importrefromdatetimeimportdatetimefrompathlibimportPathimportnumpyasnpimportpandasaspdimportxarrayasxrfromact.io.textimportread_csv
[docs]defread_gml(filename,datatype=None,remove_time_vars=True,convert_missing=True,**kwargs):""" Function to call or guess what reading NOAA GML daga routine to use. It tries to guess the correct reading function to call based on filename. It mostly works, but you may want to specify for best results. Parameters ---------- filename : str or pathlib.Path Data file full path name. In theory it should work with a list of filenames but it is not working as well with that as expected. datatype : str Data file type that bypasses the guessing from filename format and goes directly to the reading routine. Options include [MET, RADIATION, OZONE, CO2, HALO] remove_time_vars : bool Some variables are convereted into coordinate variables in Xarray DataSet and not needed after conversion. This will remove those variables. convert_missing : bool Convert missing value indicator in CSV to NaN in Xarray DataSet. **kwargs : keywords Keywords to pass through to instrument specific reading routine. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. """ifdatatypeisnotNone:ifdatatype.upper()=='MET':returnread_gml_met(filename,convert_missing=convert_missing,**kwargs)elifdatatype.upper()=='RADIATION':returnread_gml_radiation(filename,remove_time_vars=remove_time_vars,convert_missing=convert_missing,**kwargs,)elifdatatype.upper()=='OZONE':returnread_gml_ozone(filename,**kwargs)elifdatatype.upper()=='CO2':returnread_gml_co2(filename,convert_missing=convert_missing,**kwargs)elifdatatype.upper()=='HALO':returnread_gml_halo(filename,**kwargs)elifdatatype.upper()=='AEROSOL':returnread_gml_aerosol(filename,**kwargs)else:raiseValueError('datatype is unknown')else:test_filename=filenameifisinstance(test_filename,(list,tuple)):test_filename=filename[0]test_filename=str(Path(test_filename).name)iftest_filename.startswith('met_')andtest_filename.endswith('.txt'):returnread_gml_met(filename,convert_missing=convert_missing,**kwargs)iftest_filename.startswith('co2_')andtest_filename.endswith('.txt'):returnread_gml_co2(filename,convert_missing=convert_missing,**kwargs)iftest_filename.endswith('.nas'):returnread_gml_aerosol(filename,**kwargs)result=re.match(r'([a-z]{3})([\d]{5}).dat',test_filename)ifresultisnotNone:returnread_gml_radiation(filename,remove_time_vars=remove_time_vars,convert_missing=convert_missing,**kwargs,)ozone_pattern=[r'[a-z]{3}_[\d]{4}_[\d]{2}_hour.dat',r'[a-z]{3}_[\d]{2}_[\d]{4}_hour.dat',r'[a-z]{3}_[\d]{4}_all_minute.dat',r'[a-z]{3}_[\d]{2}_[\d]{4}_5minute.dat',r'[a-z]{3}_[\d]{2}_[\d]{4}_min.dat',r'[a-z]{3}_o3_6m_hour_[\d]{2}_[\d]{4}.dat',r'[a-z]{3}_ozone_houry__[\d]{4}',]forpatterninozone_pattern:result=re.match(pattern,test_filename)ifresultisnotNone:returnread_gml_ozone(filename,**kwargs)ozone_pattern=[r'[a-z]{3}_CCl4_Day.dat',r'[a-z]{3}_CCl4_All.dat',r'[a-z]{3}_CCl4_MM.dat',r'[a-z]{3}_MC_MM.dat',]forpatterninozone_pattern:result=re.match(pattern,test_filename)ifresultisnotNone:returnread_gml_halo(filename,**kwargs)
[docs]defread_gml_halo(filename,**kwargs):""" Function to read Halocarbon data from NOAA GML. Parameters ---------- filename : str or pathlib.Path Data file full path name. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. **kwargs : keywords Keywords to pass through to ACT read_csv() routine. """ds=NoneiffilenameisNone:returndsvariables={'CCl4catsBRWm':{'long_name':'Carbon Tetrachloride (CCl4) daily median','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'CCl4',},'CCl4catsBRWmsd':{'long_name':'Carbon Tetrachloride (CCl4) standard deviation','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'CCl4_std_dev',},'CCl4catsBRWsd':{'long_name':'Carbon Tetrachloride (CCl4) standard deviation','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'CCl4_std_dev',},'CCl4catsBRWn':{'long_name':'Number of samples','units':'count','__type':np.int16,'__rename':'number_of_samples',},'CCl4catsBRWunc':{'long_name':'Carbon Tetrachloride (CCl4) uncertainty','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'CCl4_uncertainty',},'MCcatsBRWm':{'long_name':'Methyl Chloroform (CH3CCl3)','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'methyl_chloroform',},'MCcatsBRWunc':{'long_name':'Methyl Chloroform (CH3CCl3) uncertainty','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'methyl_chloroform_uncertainty',},'MCcatsBRWsd':{'long_name':'Methyl Chloroform (CH3CCl3) standard deviation','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'methyl_chloroform_std_dev',},'MCcatsBRWmsd':{'long_name':'Methyl Chloroform (CH3CCl3) standard deviation','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'methyl_chloroform_std_dev',},'MCcatsBRWn':{'long_name':'Number of samples','units':'count','__type':np.int16,'__rename':'number_of_samples',},'MCritsBRWm':{'long_name':'Methyl Chloroform (CH3CCl3)','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'methyl_chloroform',},'MCritsBRWsd':{'long_name':'Methyl Chloroform (CH3CCl3) standard deviation','units':'ppt','_FillValue':np.nan,'__type':np.float32,'__rename':'methyl_chloroform_std_dev',},'MCritsBRWn':{'long_name':'Number of samples','units':'count','__type':np.int16,'__rename':'number_of_samples',},}test_filename=filenameifisinstance(test_filename,(list,tuple)):test_filename=test_filename[0]withopen(test_filename)asfc:header=0whileTrue:line=fc.readline().strip()ifnotline.startswith('#'):breakheader+=1ds=read_csv(filename,sep=r'\s+',header=header,na_values=['Nan','NaN','nan','NAN'],**kwargs)var_names=list(ds.data_vars)year_name,month_name,day_name,hour_name,min_name=None,None,None,None,Noneforvar_nameinvar_names:ifvar_name.endswith('yr'):year_name=var_nameelifvar_name.endswith('mon'):month_name=var_nameelifvar_name.endswith('day'):day_name=var_nameelifvar_name.endswith('hour'):hour_name=var_nameelifvar_name.endswith('min'):min_name=var_nametimestamp=np.full(ds[var_names[0]].size,np.nan,dtype='datetime64[ns]')foriiinrange(0,len(timestamp)):ifmin_nameisnotNone:ts=datetime(ds[year_name].values[ii],ds[month_name].values[ii],ds[day_name].values[ii],ds[hour_name].values[ii],ds[min_name].values[ii],)elifhour_nameisnotNone:ts=datetime(ds[year_name].values[ii],ds[month_name].values[ii],ds[day_name].values[ii],ds[hour_name].values[ii],)elifday_nameisnotNone:ts=datetime(ds[year_name].values[ii],ds[month_name].values[ii],ds[day_name].values[ii],)else:ts=datetime(ds[year_name].values[ii],ds[month_name].values[ii],1)timestamp[ii]=np.datetime64(ts,'ns')forvar_namein[year_name,month_name,day_name,hour_name,min_name]:try:delds[var_name]exceptKeyError:passds=ds.rename({'index':'time'})ds=ds.assign_coords(time=timestamp)ds['time'].attrs['long_name']='Time'forvar_name,valueinvariables.items():ifvar_namenotinvar_names:continueforatt_name,att_valueinvalue.items():ifatt_name=='__type':values=ds[var_name].valuesvalues=values.astype(att_value)ds[var_name].values=valueselifatt_name=='__rename':ds=ds.rename({var_name:att_value})else:ds[var_name].attrs[att_name]=att_valuereturnds
[docs]defread_gml_co2(filename=None,convert_missing=True,**kwargs):""" Function to read carbon dioxide data from NOAA GML. Parameters ---------- filename : str or pathlib.Path Data file full path name. convert_missing : boolean Option to convert missing values to NaN. If turned off will set variable attribute to missing value expected. This works well to preserve the data type best for writing to a netCDF file. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. **kwargs : keywords Keywords to pass through to ACT read_csv() routine. """ds=NoneiffilenameisNone:returndsvariables={'site_code':None,'year':None,'month':None,'day':None,'hour':None,'minute':None,'second':None,'time_decimal':None,'value':{'long_name':'Carbon monoxide in dry air','units':'ppm','_FillValue':-999.99,'comment':('Mole fraction reported in units of micromol mol-1 ''(10-6 mol per mol of dry air); abbreviated as ppm (parts per million).'),'__type':np.float32,'__rename':'co2',},'value_std_dev':{'long_name':'Carbon monoxide in dry air','units':'ppm','_FillValue':-99.99,'comment':('This is the standard deviation of the reported mean value ''when nvalue is greater than 1. See provider_comment if available.'),'__type':np.float32,'__rename':'co2_std_dev',},'nvalue':{'long_name':'Number of measurements contributing to reported value','units':'1','_FillValue':-9,'__type':np.int16,'__rename':'number_of_measurements',},'latitude':{'long_name':'Latitude at which air sample was collected','units':'degrees_north','_FillValue':-999.999,'standard_name':'latitude','__type':np.float32,},'longitude':{'long_name':'Latitude at which air sample was collected','units':'degrees_east','_FillValue':-999.999,'standard_name':'longitude','__type':np.float32,},'altitude':{'long_name':'Sample altitude','units':'m','_FillValue':-999.999,'standard_name':'altitude','comment':('Altitude for this dataset is the sum of surface elevation ''(masl) and sample intake height (magl)'),'__type':np.float32,},'intake_height':{'long_name':'Sample intake height above ground level','units':'m','_FillValue':-999.999,'__type':np.float32,},}test_filename=filenameifisinstance(test_filename,(list,tuple)):test_filename=test_filename[0]withopen(test_filename)asfc:skiprows=int(fc.readline().strip().split()[-1])-1ds=read_csv(filename,sep=r'\s+',skiprows=skiprows,**kwargs)timestamp=np.full(ds['year'].size,np.nan,dtype='datetime64[ns]')foriiinrange(0,len(timestamp)):ts=datetime(ds['year'].values[ii],ds['month'].values[ii],ds['day'].values[ii],ds['hour'].values[ii],ds['minute'].values[ii],ds['second'].values[ii],)timestamp[ii]=np.datetime64(ts,'ns')ds=ds.rename({'index':'time'})ds=ds.assign_coords(time=timestamp)ds['time'].attrs['long_name']='Time'forvar_name,valueinvariables.items():ifvalueisNone:delds[var_name]else:foratt_name,att_valueinvalue.items():ifatt_name=='__type':values=ds[var_name].valuesvalues=values.astype(att_value)ds[var_name].values=valueselifatt_name=='__rename':ds=ds.rename({var_name:att_value})else:ds[var_name].attrs[att_name]=att_valueifconvert_missing:try:var_name=variables[var_name]['__rename']exceptKeyError:passtry:missing_value=ds[var_name].attrs['_FillValue']values=ds[var_name].values.astype(float)values[np.isclose(missing_value,values)]=np.nands[var_name].values=valuesds[var_name].attrs['_FillValue']=np.nanexceptKeyError:passvalues=ds['qcflag'].valuesbad_index=[]suspect_index=[]forii,valueinenumerate(values):pts=list(value)ifpts[0]!='.':bad_index.append(ii)ifpts[1]!='.':suspect_index.append(ii)var_name='co2'qc_var_name=ds.qcfilter.create_qc_variable(var_name)ds.qcfilter.add_test(var_name,index=bad_index,test_assessment='Bad',test_meaning='Obvious problems during collection or analysis',)ds.qcfilter.add_test(var_name,index=suspect_index,test_assessment='Indeterminate',test_meaning=('Likely valid but does not meet selection criteria determined by ''the goals of a particular investigation'),)ds[qc_var_name].attrs['comment']='This quality control flag is provided by the contributing PIs'delds['qcflag']returnds
[docs]defread_gml_ozone(filename=None,**kwargs):""" Function to read ozone data from NOAA GML. Parameters ---------- filename : str or pathlib.Path Data file full path name. **kwargs : keywords Keywords to pass through to ACT read_csv() routine. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. """ds=NoneiffilenameisNone:returndstest_filename=filenameifisinstance(test_filename,(list,tuple)):test_filename=test_filename[0]withopen(test_filename)asfc:skiprows=0whileTrue:line=fc.readline().strip().split()try:iflen(line)==6andline[0]=='STN':breakexceptIndexError:passskiprows+=1ds=read_csv(filename,sep=r'\s+',skiprows=skiprows,**kwargs)ds.attrs['station']=str(ds['STN'].values[0]).lower()timestamp=np.full(ds['YEAR'].size,np.nan,dtype='datetime64[ns]')foriiinrange(0,len(timestamp)):ts=datetime(ds['YEAR'].values[ii],ds['MON'].values[ii],ds['DAY'].values[ii],ds['HR'].values[ii],)timestamp[ii]=np.datetime64(ts,'ns')ds=ds.rename({'index':'time'})ds=ds.assign_coords(time=timestamp)ds['time'].attrs['long_name']='Time'forvar_namein['STN','YEAR','MON','DAY','HR']:delds[var_name]var_name='ozone'ds=ds.rename({'O3(PPB)':var_name})ds[var_name].attrs['long_name']='Ozone'ds[var_name].attrs['units']='ppb'ds[var_name].attrs['_FillValue']=np.nands[var_name].values=ds[var_name].values.astype(np.float32)returnds
[docs]defread_gml_radiation(filename=None,convert_missing=True,remove_time_vars=True,**kwargs):""" Function to read radiation data from NOAA GML. Parameters ---------- filename : str or pathlib.Path Data file full path name. convert_missing : boolean Option to convert missing values to NaN. If turned off will set variable attribute to missing value expected. This works well to preserve the data type best for writing to a netCDF file. remove_time_vars : boolean Some column names in the CSV file are used for creating the time coordinate variable in the returend Xarray DataSet. Once used the variables are not needed and will be removed from DataSet. **kwargs : keywords Keywords to pass through to ACT read_csv() routine. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. """ds=NoneiffilenameisNone:returndscolumn_names={'year':None,'jday':None,'month':None,'day':None,'hour':None,'minute':None,'decimal_time':None,'solar_zenith_angle':{'units':'degree','long_name':'Solar zenith angle','_FillValue':-9999.9,'__type':np.float32,},'downwelling_global_solar':{'units':'W/m^2','long_name':'Downwelling global solar','_FillValue':-9999.9,'__type':np.float32,},'upwelling_global_solar':{'units':'W/m^2','long_name':'Upwelling global solar','_FillValue':-9999.9,'__type':np.float32,},'direct_normal_solar':{'units':'W/m^2','long_name':'Direct-normal solar','_FillValue':-9999.9,'__type':np.float32,},'downwelling_diffuse_solar':{'units':'W/m^2','long_name':'Downwelling diffuse solar','_FillValue':-9999.9,'__type':np.float32,},'downwelling_thermal_infrared':{'units':'W/m^2','long_name':'Downwelling thermal infrared','_FillValue':-9999.9,'__type':np.float32,},'downwelling_infrared_case_temp':{'units':'degK','long_name':'Downwelling infrared case temp','_FillValue':-9999.9,'__type':np.float32,},'downwelling_infrared_dome_temp':{'units':'degK','long_name':'downwelling infrared dome temp','_FillValue':-9999.9,'__type':np.float32,},'upwelling_thermal_infrared':{'units':'W/m^2','long_name':'Upwelling thermal infrared','_FillValue':-9999.9,'__type':np.float32,},'upwelling_infrared_case_temp':{'units':'degK','long_name':'Upwelling infrared case temp','_FillValue':-9999.9,'__type':np.float32,},'upwelling_infrared_dome_temp':{'units':'degK','long_name':'Upwelling infrared dome temp','_FillValue':-9999.9,'__type':np.float32,},'global_UVB':{'units':'mW/m^2','long_name':'global ultraviolet-B','_FillValue':-9999.9,'__type':np.float32,},'par':{'units':'W/m^2','long_name':'Photosynthetically active radiation','_FillValue':-9999.9,'__type':np.float32,},'net_solar':{'units':'W/m^2','long_name':'Net solar (downwelling_global_solar - upwelling_global_solar)','_FillValue':-9999.9,'__type':np.float32,},'net_infrared':{'units':'W/m^2','long_name':('Net infrared (downwelling_thermal_infrared - ''upwelling_thermal_infrared)'),'_FillValue':-9999.9,'__type':np.float32,},'net_radiation':{'units':'W/m^2','long_name':'Net radiation (net_solar + net_infrared)','_FillValue':-9999.9,'__type':np.float32,},'air_temperature_10m':{'units':'degC','long_name':'10-meter air temperature','_FillValue':-9999.9,'__type':np.float32,},'relative_humidity':{'units':'%','long_name':'Relative humidity','_FillValue':-9999.9,'__type':np.float32,},'wind_speed':{'units':'m/s','long_name':'Wind speed','_FillValue':-9999.9,'__type':np.float32,},'wind_direction':{'units':'degree','long_name':'Wind direction (clockwise from north)','_FillValue':-9999.9,'__type':np.float32,},'station_pressure':{'units':'millibar','long_name':'Station atmospheric pressure','_FillValue':-9999.9,'__type':np.float32,},}# Add additinal column names for NOAA SPASH campaignifstr(Path(filename).name).startswith('cbc')orstr(Path(filename).name).startswith('ckp'):column_names['SPN1_total']={'units':'W/m^2','long_name':'SPN1 total average','_FillValue':-9999.9,'__type':np.float32,}column_names['SPN1_diffuse']={'units':'W/m^2','long_name':'SPN1 diffuse average','_FillValue':-9999.9,'__type':np.float32,}names=list(column_names.keys())skip_vars=['year','jday','month','day','hour','minute','decimal_time','solar_zenith_angle',]num=1forii,nameinenumerate(column_names.keys()):ifnameinskip_vars:continuenames.insert(ii+num,'qc_'+name)num+=1ds=read_csv(filename,sep=r'\s+',header=None,skiprows=2,column_names=names,**kwargs)ifisinstance(filename,(list,tuple)):filename=filename[0]ifdsisnotNone:withopen(filename)asfc:lat=Nonelon=Nonealt=Nonealt_unit=Nonestation=Noneforiiin[0,1]:line=fc.readline().strip().split()iflen(line)==1:station=line[0]else:lat=np.array(line[0],dtype=np.float32)lon=np.array(line[1],dtype=np.float32)alt=np.array(line[2],dtype=np.float32)alt_unit=str(line[3])ds['lat']=xr.DataArray(lat,attrs={'long_name':'Latitude','units':'degree_north','standard_name':'latitude',},)ds['lon']=xr.DataArray(lon,attrs={'long_name':'Longitude','units':'degree_east','standard_name':'longitude',},)ds['alt']=xr.DataArray(alt,attrs={'long_name':'Altitude','units':alt_unit,'standard_name':'altitude',},)ds.attrs['location']=stationtimestamp=np.full(ds['year'].size,np.nan,dtype='datetime64[ns]')foriiinrange(0,len(timestamp)):ts=datetime(ds['year'].values[ii],ds['month'].values[ii],ds['day'].values[ii],ds['hour'].values[ii],ds['minute'].values[ii],)timestamp[ii]=np.datetime64(ts,'ns')ds=ds.rename({'index':'time'})ds=ds.assign_coords(time=timestamp)ds['time'].attrs['long_name']='Time'forvar_name,valueincolumn_names.items():ifvalueisNone:ds[var_name]else:foratt_name,att_valueinvalue.items():ifatt_name=='__type':values=ds[var_name].valuesvalues=values.astype(att_value)ds[var_name].values=valueselse:ds[var_name].attrs[att_name]=att_valueifconvert_missing:try:missing_value=ds[var_name].attrs['_FillValue']values=ds[var_name].values.astype(float)index=np.isclose(values,missing_value)values[index]=np.nands[var_name].values=valuesds[var_name].attrs['_FillValue']=np.nanexceptKeyError:passforvar_nameinds.data_vars:ifnotvar_name.startswith('qc_'):continuedata_var_name=var_name.replace('qc_','',1)attrs={'long_name':f"Quality control variable for: {ds[data_var_name].attrs['long_name']}",'units':'1','standard_name':'quality_flag','flag_values':[0,1,2],'flag_meanings':['Not failing any tests','Knowingly bad value','Should be used with scrutiny',],'flag_assessments':['Good','Bad','Indeterminate'],}ds[var_name].attrs=attrsds[data_var_name].attrs['ancillary_variables']=var_nameifremove_time_vars:remove_var_names=['year','jday','month','day','hour','minute','decimal_time',]ds=ds.drop_vars(remove_var_names)returnds
[docs]defread_gml_met(filename=None,convert_missing=True,**kwargs):""" Function to read meteorological data from NOAA GML. Parameters ---------- filename : str or pathlib.Path Data file full path name. convert_missing : boolean Option to convert missing values to NaN. If turned off will set variable attribute to missing value expected. This works well to preserve the data type best for writing to a netCDF file. **kwargs : keywords Keywords to pass through to ACT read_csv() routine. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. """ds=NoneiffilenameisNone:returndscolumn_names={'station':None,'year':None,'month':None,'day':None,'hour':None,'minute':None,'wind_direction':{'units':'degree','long_name':'Average wind direction from which the wind is blowing','_FillValue':-999,'__type':np.int16,},'wind_speed':{'units':'m/s','long_name':'Average wind speed','_FillValue':-999.9,'__type':np.float32,},'wind_steadiness_factor':{'units':'1','long_name':'100 times the ratio of the vector wind speed to the ''average wind speed for the hour','_FillValue':-9,'__type':np.int16,},'barometric_pressure':{'units':'hPa','long_name':'Station barometric pressure','_FillValue':-999.90,'__type':np.float32,},'temperature_2m':{'units':'degC','long_name':'Temperature at 2 meters above ground level','_FillValue':-999.9,'__type':np.float32,},'temperature_10m':{'units':'degC','long_name':'Temperature at 10 meters above ground level','_FillValue':-999.9,'__type':np.float32,},'temperature_tower_top':{'units':'degC','long_name':'Temperature at top of instrument tower','_FillValue':-999.9,'__type':np.float32,},'realitive_humidity':{'units':'percent','long_name':'Relative humidity','_FillValue':-99,'__type':np.int16,},'preciptation_intensity':{'units':'mm/hour','long_name':'Amount of precipitation per hour','_FillValue':-99,'__type':np.int16,'comment':('The precipitation amount is measured with an unheated ''tipping bucket rain gauge.'),},}minutes=Truetest_filename=filenameifisinstance(test_filename,(list,tuple)):test_filename=test_filename[0]if'_hour_'inPath(test_filename).name:minutes=Falsedelcolumn_names['minute']ds=read_csv(filename,sep=r'\s+',header=None,column_names=column_names.keys(),**kwargs)ifdsisnotNone:timestamp=np.full(ds['year'].size,np.nan,dtype='datetime64[ns]')foriiinrange(0,len(timestamp)):ifminutes:ts=datetime(ds['year'].values[ii],ds['month'].values[ii],ds['day'].values[ii],ds['hour'].values[ii],ds['minute'].values[ii],)else:ts=datetime(ds['year'].values[ii],ds['month'].values[ii],ds['day'].values[ii],ds['hour'].values[ii],)timestamp[ii]=np.datetime64(ts,'ns')ds=ds.rename({'index':'time'})ds=ds.assign_coords(time=timestamp)ds['time'].attrs['long_name']='Time'forvar_name,valueincolumn_names.items():ifvalueisNone:delds[var_name]else:foratt_name,att_valueinvalue.items():ifatt_name=='__type':values=ds[var_name].valuesvalues=values.astype(att_value)ds[var_name].values=valueselse:ds[var_name].attrs[att_name]=att_valueifconvert_missing:try:missing_value=ds[var_name].attrs['_FillValue']values=ds[var_name].values.astype(float)index=np.isclose(values,missing_value)values[index]=np.nands[var_name].values=valuesds[var_name].attrs['_FillValue']=np.nanexceptKeyError:passreturnds
[docs]defread_surfrad(filename,**kwargs):""" Function to read in NOAA SurfRad data Parameters ---------- filename : list Data files full path name or url to file **kwargs : keywords Keywords to pass through to instrument specific reading routine. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. """ds=NoneiffilenameisNone:returndsnames=['year','jday','month','day','hour','minute','dec_time','solar_zenith_angle','downwelling_global','qc_downwelling_global','upwelling_global','qc_upwelling_global','direct_normal','qc_direct_normal','downwelling_diffuse','qc_downwelling_diffuse','downwelling_ir','qc_downwelling_ir','downwelling_ir_casetemp','qc_downwelling_ir_casetemp','downwelling_ir_dometemp','qc_downwelling_ir_dometemp','upwelling_ir','qc_upwelling_ir','upwelling_ir_casetemp','qc_upwelling_ir_casetemp','upwelling_ir_dometemp','qc_upwelling_ir_dometemp','global_uvb','qc_global_uvb','par','qc_par','net_radiation','qc_net_radiation','net_ir','qc_net_ir','total_net','qc_total_net','temperature','qc_temperature','relative_humidity','qc_relative_humidity','wind_speed','qc_wind_speed','wind_direction','qc_wind_direction','pressure','qc_pressure',]fori,finenumerate(filename):new_df=pd.read_csv(f,names=names,skiprows=2,delimiter=r'\s+',header=None)ifi==0:df=new_dfelse:df=pd.concat([df,new_df])# Create time variable and add as the coordinateds=df.to_xarray()year=ds['year'].valuesmonth=ds['month'].valuesday=ds['day'].valueshour=ds['hour'].valuesminute=ds['minute'].valuestime=[datetime(year[i],month[i],day[i],hour[i],minute[i])foriinrange(len(year))]ds=ds.assign_coords(index=time)ds=ds.rename(index='time')# Add attributesattrs={'year':{'long_name':'Year','units':'unitless'},'jday':{'long_name':'Julian day','units':'unitless'},'month':{'long_name':'Month','units':'unitless'},'day':{'long_name':'Day of the month','units':'unitless'},'hour':{'long_name':'Hour','units':'unitless'},'minute':{'long_name':'Minutes','units':'unitless'},'dec_time':{'long_name':'Decimal time','units':'unitless'},'solar_zenith_angle':{'long_name':'Solar zenith angle','units':'deg'},'downwelling_global':{'long_name':'Downwelling global solar','units':'W m^-2','standard_name':'surface_downwelling_shortwave_flux_in_air',},'upwelling_global':{'long_name':'Upwelling global solar','units':'W m^-2','standard_name':'surface_upwelling_shortwave_flux_in_air',},'direct_normal':{'long_name':'Direct normal solar','units':'W m^-2','standard_name':'surface_direct_downwelling_shortwave_flux_in_air',},'downwelling_diffuse':{'long_name':'Downwelling diffuse solar','units':'W m^-2','standard_name':'diffuse_downwelling_shortwave_flux_in_air',},'downwelling_ir':{'long_name':'Downwelling thermal infrared','units':'W m^-2','standard_name':'net_downward_longwave_flux_in_air',},'downwelling_ir_casetemp':{'long_name':'Downwelling thermal infrared case temperature','units':'K',},'downwelling_ir_dometemp':{'long_name':'Downwelling thermal infrared dome temperature','units':'K',},'upwelling_ir':{'long_name':'Upwelling thermal infrared','units':'W m^-2','standard_name':'net_upward_longwave_flux_in_air',},'upwelling_ir_casetemp':{'long_name':'Upwelling thermal infrared case temperature','units':'K',},'upwelling_ir_dometemp':{'long_name':'Upwelling thermal infrared dome temperature','units':'K',},'global_uvb':{'long_name':'Global UVB','units':'milliWatts m^-2'},'par':{'long_name':'Photosynthetically active radiation','units':'W m^-2','standard_name':'surface_downwelling_photosynthetic_radiative_flux_in_air',},'net_radiation':{'long_name':'Net solar (downwelling_global-upwelling_global)','units':'W m^-2','standard_name':'surface_net_downward_shortwave_flux',},'net_ir':{'long_name':'Net infrared (downwelling_ir-upwelling_ir)','units':'W m^-2','standard_name':'surface_net_downward_longwave_flux',},'total_net':{'long_name':'Total Net radiation (net_radiation + net_ir)','units':'W m^-2',},'temperature':{'long_name':'10-meter air temperature','units':'degC','standard_name':'air_temperature',},'relative_humidity':{'long_name':'Relative humidity','units':'%','standard_name':'relative_humidity',},'wind_speed':{'long_name':'Wind speed','units':'ms^-1','standard_name':'wind_speed'},'wind_direction':{'long_name':'Wind direction, clockwise from North','units':'deg','standard_name':'wind_from_direction',},'pressure':{'long_name':'Station pressure','units':'mb','standard_name':'air_pressure',},}forvinds:ifvinattrs:ds[v].attrs=attrs[v]# Add attributes to all QC variablesqc_vars=['downwelling_global','upwelling_global','direct_normal','downwelling_diffuse','downwelling_ir','downwelling_ir_casetemp','downwelling_ir_dometemp','upwelling_ir','upwelling_ir_casetemp','upwelling_ir_dometemp','global_uvb','par','net_radiation','net_ir','total_net','temperature','relative_humidity','wind_speed','wind_direction','pressure',]forvinqc_vars:atts={'long_name':'Quality check results on variable: '+v,'units':'1','description':''.join(['A QC flag of zero indicates that the corresponding data point is good,',' having passed all QC checks. A value greater than 0 indicates that',' the data failed one level of QC. For example, a QC value of 1 means',' that the recorded value is beyond a physically possible range, or it has',' been affected adversely in some manner to produce a knowingly bad value.',' A value of 2 indicates that the data value failed the second level QC check,',' indicating that the data value may be physically possible but should be used',' with scrutiny, and so on.',]),}ds['qc_'+v].attrs=attsds.attrs['datastream']='SURFRAD Site: '+filename[0].split('/')[-1][0:3]returnds
defread_gml_aerosol(filename,**kwargs):""" Function to read aerosol data from NOAA GML. Parameters ---------- filename : str or pathlib.Path Data file full path name. Returns ------- ds : xarray.Dataset Standard ACT Xarray dataset with the data cleaned up to have units, long_name, correct type and some other stuff. **kwargs : keywords Keywords to pass through to ACT read_csv() routine. """ds=NoneiffilenameisNone:returndsifnotisinstance(filename,list):filename=[filename]skiprows=0names=Nonestartdate=Nonelat=Nonelon=Nonealt=Noneheight=Nonestation=Nonestation_code=Nonestation_gaw_id=Nonemissing_value=Nonematrix=Noneinstrument_type=Noneinlet_type=Nonewithopen(filename[0])asfc:whileTrue:line=fc.readline().strip()ifline.startswith('Startdate:'):startdate=line.split()[-1].strip()ifline.startswith('Measurement latitude:'):lat=float(line.split()[-1].strip())ifline.startswith('Measurement longitude:'):lon=float(line.split()[-1].strip())ifline.startswith('Measurement altitude:'):alt=float(line.split()[-1].strip().replace('m',''))ifline.startswith('Measurement height:'):height=float(line.split()[-1].strip().replace('m',''))ifline.startswith('Station GAW-Name:'):station=line.split(":")[-1].strip()ifline.startswith('Station code:'):station_code=line.split(":")[-1].strip()ifline.startswith('Station GAW-ID:'):station_gaw_id=line.split(":")[-1].strip()ifline.startswith('Matrix:'):matrix=line.split(":")[-1].strip()ifline.startswith('Instrument type:'):instrument_type=line.split(":")[-1].strip()ifline.startswith('Inlet type:'):inlet_type=line.split(":")[-1].strip()ifline.startswith('9999.999999'):missing_value=line.split()ifline.startswith('start_time'):names=line.split()skiprows+=1breakskiprows+=1missing_value=list(set(missing_value))fori,finenumerate(filename):new_df=pd.read_csv(f,names=names,skiprows=skiprows,delimiter=r'\s+',na_values=missing_value)ifi==0:df=new_dfelse:df=pd.concat([df,new_df])# Create time variable and add as the coordinate. There is a start and end time# for each time step. Pick a value in the middle.ds=df.to_xarray()startdate=np.datetime64(datetime.strptime(startdate,'%Y%m%d%H%M%S'))startdate=startdate.astype('datetime64[s]')start_time=ds['start_time'].values*24.0*60.0*60.0end_time=ds['end_time'].values*24.0*60.0*60.0time=(end_time+start_time)/2.0time=np.round(time/10.0)*10.0time=startdate+time.astype(int)ds=ds.assign_coords(index=time)ds=ds.rename(index='time')delds['start_time']delds['end_time']ds['lat']=xr.DataArray(lat,attrs={'long_name':'Latitude','units':'degree_north','standard_name':'latitude',},)ds['lon']=xr.DataArray(lon,attrs={'long_name':'Longitude','units':'degree_east','standard_name':'longitude',},)ds['alt']=xr.DataArray(alt,attrs={'long_name':'Altitude','units':'m','standard_name':'altitude',},)ds.attrs['height']=heightds.attrs['Station_GAW-Name']=stationds.attrs['Station_code']=station_codeds.attrs['Station_GAW-ID']=station_gaw_idds.attrs['Matrix']=matrixds.attrs['Instrument type']=instrument_typeds.attrs['Inlet type']=inlet_type# Add attributesattrs={'p_int':{'long_name':'Atmospheric pressure','units':'hPa',},'T_int':{'long_name':'Atmospheric temperature','units':'K',},'RH_int':{'long_name':'Atmospheric relative humidity','units':'%',},'sc450':{'long_name':'Aerosol light scattering coefficient at 450 nm','units':'1/Mm',},'sc550':{'long_name':'Aerosol light scattering coefficient at 550 nm','units':'1/Mm',},'sc700':{'long_name':'Aerosol light scattering coefficient at 700 nm','units':'1/Mm',},'bsc450':{'long_name':'Aerosol light backscattering coefficient at 450 nm','units':'1/Mm',},'bsc550':{'long_name':'Aerosol light backscattering coefficient at 550 nm','units':'1/Mm',},'bsc700':{'long_name':'Aerosol light backscattering coefficient at 700 nm','units':'1/Mm',},'sc450pc16':{'long_name':'Aerosol light scattering coefficient at 450 nm percentile:15.87','units':'1/Mm',},'sc550pc16':{'long_name':'Aerosol light scattering coefficient at 550 nm percentile:15.87','units':'1/Mm',},'sc700pc16':{'long_name':'Aerosol light scattering coefficient at 700 nm percentile:15.87','units':'1/Mm',},'bsc450pc16':{'long_name':'Aerosol light backscattering coefficient at 450 nm percentile:15.87','units':'1/Mm',},'bsc550pc16':{'long_name':'Aerosol light backscattering coefficient at 550 nm percentile:15.87','units':'1/Mm',},'bsc700pc16':{'long_name':'Aerosol light backscattering coefficient at 700 nm percentile:15.87','units':'1/Mm',},'sc450pc84':{'long_name':'Aerosol light scattering coefficient at 450 nm percentile:84.13','units':'1/Mm',},'sc550pc84':{'long_name':'Aerosol light scattering coefficient at 550 nm percentile:84.13','units':'1/Mm',},'sc700pc84':{'long_name':'Aerosol light scattering coefficient at 700 nm percentile:84.13','units':'1/Mm',},'bsc450pc84':{'long_name':'Aerosol light backscattering coefficient at 450 nm percentile:84.13','units':'1/Mm',},'bsc550pc84':{'long_name':'Aerosol light backscattering coefficient at 550 nm percentile:84.13','units':'1/Mm',},'bsc700pc84':{'long_name':'Aerosol light backscattering coefficient at 700 nm percentile:84.13','units':'1/Mm',},'numflag':{'long_name':'Numflag','units':'1',},}# Apply attributes to variablesforvinds:ifvinattrs:ds[v].attrs=attrs[v]returnds