{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Working with TAR and gunzip files\n\nThis is an example of how to use the TAR and gunzip extensions\nfor creating or extracting data files. The functions for creation\nand extraction can be called independently to manage the data\nfiles directly or a TAR or gunzip file can be provided to the\nnetCDF reader and the extraction will happen automatically to\na temporary area.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pathlib import Path\n\nimport matplotlib.pyplot as plt\n\n# Import standard libraries\nfrom arm_test_data import DATASETS\n\n# Import ACT functions\nfrom act.io.arm import read_arm_netcdf\nfrom act.plotting import TimeSeriesDisplay\nfrom act.utils.io_utils import cleanup_files, pack_gzip, pack_tar, unpack_tar\n\n# Create a TAR file from multiple netCDF data files and pass newly created\n# TAR file into read_arm_netcdf() to be unpacked and read.\n\n# Here we get a list of MET data files to pack into a TAR bundle\nmet_wildcard_list = [\n 'sgpmetE13.b1.20190101.000000.cdf',\n 'sgpmetE13.b1.20190102.000000.cdf',\n 'sgpmetE13.b1.20190103.000000.cdf',\n 'sgpmetE13.b1.20190104.000000.cdf',\n 'sgpmetE13.b1.20190105.000000.cdf',\n 'sgpmetE13.b1.20190106.000000.cdf',\n 'sgpmetE13.b1.20190107.000000.cdf',\n]\nmet_files = [Path(DATASETS.fetch(file)) for file in met_wildcard_list]\n\n# We can pass the list of netCDF data files to the pack_tar() function.\n# Notice that the new_dir directory does not exist. The directory will\n# be created.\nnew_dir = 'temporary_directory'\nfilename = pack_tar(met_files, write_directory=new_dir)\n\nprint('Created TAR file: ', filename)\n\n# Read the data within the TAR file\nds = read_arm_netcdf(filename)\n\n# Create a plotting display object\ndisplay = TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(1,))\n\n# Plot up the diffuse variable in the first plot\nvariable = 'temp_mean'\ndisplay.plot(variable, subplot_index=(0,), day_night_background=True)\n\nplt.show()\ndel ds\n\n# Create a gunzip file from TAR file containing multiple netCDF data files and\n# pass newly created gunzip file into read_arm_netcdf() to be unpacked and read.\n\n# Pass the TAR filename into gunzip. Have the function remove the TAR file after\n# creating the gunzip file\nfilename = pack_gzip(filename, write_directory=new_dir, remove=True)\n\nprint('New gunzip file: ', filename)\n\n# Read the data within the gunzipped TAR file\nds = read_arm_netcdf(filename)\n\n# Create a plotting display object\ndisplay = TimeSeriesDisplay(ds, figsize=(15, 10), subplot_shape=(1,))\n\n# Plot up the diffuse variable in the first plot\nvariable = 'rh_mean'\ndisplay.plot(variable, subplot_index=(0,), day_night_background=True)\n\nplt.show()\n\nPath(filename).unlink()\n\n# When working with a TAR file and reading it often will be more efficient to untar once\n# and point reader to untarred files. Then clean up the directory when multiple reads are done.\ntar_file = pack_tar(met_files, write_directory=new_dir)\n\n# This will unpack the TAR file to a new directroy created with a random name to ensure multiple\n# simultaneous uses do not collide. The full path to all extracted filenames will be returned.\nfilenames = unpack_tar(\n tar_file, write_directory=new_dir, randomize=True, return_files=True, remove=True\n)\n\n# Print the extracted filenames\nprint('Extracted filenames: ', filenames)\n\n# Print a list of filenames and directories in the new directory\nprint('LS of temporary directory:', list(Path(new_dir).glob('*')))\n\n# After the extracted files are read for last time we can clean up the directory.\ncleanup_files(files=filenames)\n\n# Print a list of filenames and directories in the new directory\nprint('LS of temporary directory:', list(Path(new_dir).glob('*')))\n\n# Remove the temporary directory we created to clean up directory.\nPath(new_dir).rmdir()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.3" } }, "nbformat": 4, "nbformat_minor": 0 }