Source code for ncas_amof_netcdf_template.remove_empty_variables

"""
Functions for removal of empty product-specific variables.
As a variable cannot be removed from a netCDF file, a new file
has to be created, with the option of removing the old one.
"""

import os
from netCDF4 import Dataset
import requests
import numpy as np
from typing import Union, Optional
from . import values


[docs]def get_product_variables_metadata( product: str, skip_check: bool = False, tag: str = "latest" ) -> tuple[list[str], dict[str, dict[str, Union[str, float]]]]: """ Get variables and their metadata associated with a product. `product` should be in https://github.com/ncasuk/AMF_CVs/blob/main/AMF_CVs/AMF_product.json Args: product (str): Product describing the data from the instrument for the netCDF file. skip_check (bool): Skips checking if product in the product json file. Default False. tag (str): Tagged release version of AMF_CVs to check Returns: list: All product-specific variables. dict: Dictionary of variables and their attributes. """ if tag == "latest": tag = values.get_latest_CVs_version() if not skip_check: product_list = get_json_from_github( f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{tag}/AMF_CVs/AMF_product.json" )["product"] # Check for valid product if product not in product_list: msg = ( f"product {product} is not in " f"https://github.com/ncasuk/AMF_CVs/blob/{tag}/AMF_CVs/AMF_product.json" ) raise ValueError(msg) # Get the stuff var_dict = get_json_from_github( f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{tag}/AMF_CVs/AMF_product_{product}_variable.json" )[f"product_{product}_variable"] variables = list(var_dict.keys()) return variables, var_dict
[docs]def get_json_from_github( url: str, ) -> dict[str, dict[str, dict[str, Union[str, float]]]]: """ Returns desired json file from https://github.com/ncasuk/AMF_CVs/tree/main/AMF_CVs URL should be in form https://raw.githubusercontent.com/ncasuk/AMF_CVs/main/AMF_CVs/___.json, otherwise a JSONDecodeError will be returned by the r.json() call Args: url (str): URL of json file Returns: dict: JSON data from URL """ r = requests.get(url) return r.json()
[docs]def main( infile: str, outfile: Optional[str] = None, overwrite: bool = True, verbose: int = 0, tag: str = "latest", skip_check: bool = False, ) -> None: """ If a product-specific variable is empty, we want to remove it. However, removing a variable from a netcdf file is not possible, so we have to create a new one, and just not copy over the empty variable. Args: infile (str): File path and name of current netCDF file. outfile (str): Name of temporary netCDF file to create (or not so temporary, see overwrite). If None, then an file with `tmp` appended to start of infile filename will be created. Default None. overwrite (any): Optional. If truthy, outfile overwrites infile. If falsy, both outfile and infile remain. Default True. verbose (any): Optional. If truthy, prints variables that are being removed from infile. Default 0. tag (str): Optional. Tag release version of AMF_CVs being used. Passed to get_product_variables_metadata function. Default "latest". skip_check (bool): Optional. Skip checking for product in AMF_CVs product json file. Passed to get_product_variables_metadata function. Default False. """ in_ncfile = Dataset(infile, "r") product = infile.split("/")[-1].split("_")[3] if outfile is None: infile_name = infile.split("/")[-1] infile_dir = "/".join(infile.split("/")[:-1]) or "." outfile = f"{infile_dir}/tmp_{infile_name}" toexclude = [] product_vars, _ = get_product_variables_metadata( product, tag=tag, skip_check=skip_check ) for var in in_ncfile.variables.keys(): if var in product_vars: if ( "valid_min" in in_ncfile[var].ncattrs() and in_ncfile[var].valid_min == "<derived from file>" ): toexclude.append(var) elif np.all(in_ncfile[var][:].mask): toexclude.append(var) if verbose: print(f"empty variables being removed: {toexclude}") dst = Dataset(outfile, "w", format="NETCDF4_CLASSIC") # copy global attributes all at once via dictionary dst.setncatts(in_ncfile.__dict__) # copy dimensions for name, dimension in in_ncfile.dimensions.items(): dst.createDimension(name, (len(dimension))) # copy all file data except for the excluded for name, variable in in_ncfile.variables.items(): if name not in toexclude: in_ncfile_name_attrs = in_ncfile[name].__dict__ if "_FillValue" in in_ncfile_name_attrs: fill_value = in_ncfile_name_attrs.pop("_FillValue") else: fill_value = None if in_ncfile[name].chunking() != "contiguous": chunksizes = in_ncfile[name].chunking() else: chunksizes = None dst.createVariable( name, variable.datatype, variable.dimensions, fill_value=fill_value, chunksizes=chunksizes, ) # copy variable attributes all at once via dictionary dst[name].setncatts(in_ncfile_name_attrs) dst[name][:] = in_ncfile[name][:] dst.close() in_ncfile.close() if overwrite: os.rename(outfile, infile)