"""
Functions for removal of empty product-specific variables.
As a variable cannot be removed from a netCDF file, a new file
has to be created, with the option of removing the old one.
"""
import os
from netCDF4 import Dataset
import requests
import numpy as np
from typing import Union, Optional
from . import values
[docs]def get_json_from_github(
url: str,
) -> dict[str, dict[str, dict[str, Union[str, float]]]]:
"""
Returns desired json file from https://github.com/ncasuk/AMF_CVs/tree/main/AMF_CVs
URL should be in form
https://raw.githubusercontent.com/ncasuk/AMF_CVs/main/AMF_CVs/___.json,
otherwise a JSONDecodeError will be returned by the r.json() call
Args:
url (str): URL of json file
Returns:
dict: JSON data from URL
"""
r = requests.get(url)
return r.json()
[docs]def main(
infile: str,
outfile: Optional[str] = None,
overwrite: bool = True,
verbose: int = 0,
tag: str = "latest",
skip_check: bool = False,
) -> None:
"""
If a product-specific variable is empty, we want to remove it.
However, removing a variable from a netcdf file is not possible,
so we have to create a new one, and just not copy over the
empty variable.
Args:
infile (str): File path and name of current netCDF file.
outfile (str): Name of temporary netCDF file to create (or not so temporary,
see overwrite). If None, then an file with `tmp` appended to
start of infile filename will be created. Default None.
overwrite (any): Optional. If truthy, outfile overwrites infile. If falsy,
both outfile and infile remain. Default True.
verbose (any): Optional. If truthy, prints variables that are
being removed from infile. Default 0.
tag (str): Optional. Tag release version of AMF_CVs being used. Passed to
get_product_variables_metadata function. Default "latest".
skip_check (bool): Optional. Skip checking for product in AMF_CVs product json
file. Passed to get_product_variables_metadata function.
Default False.
"""
in_ncfile = Dataset(infile, "r")
product = infile.split("/")[-1].split("_")[3]
if outfile is None:
infile_name = infile.split("/")[-1]
infile_dir = "/".join(infile.split("/")[:-1]) or "."
outfile = f"{infile_dir}/tmp_{infile_name}"
toexclude = []
product_vars, _ = get_product_variables_metadata(
product, tag=tag, skip_check=skip_check
)
for var in in_ncfile.variables.keys():
if var in product_vars:
if (
"valid_min" in in_ncfile[var].ncattrs()
and in_ncfile[var].valid_min == "<derived from file>"
):
toexclude.append(var)
elif np.all(in_ncfile[var][:].mask):
toexclude.append(var)
if verbose:
print(f"empty variables being removed: {toexclude}")
dst = Dataset(outfile, "w", format="NETCDF4_CLASSIC")
# copy global attributes all at once via dictionary
dst.setncatts(in_ncfile.__dict__)
# copy dimensions
for name, dimension in in_ncfile.dimensions.items():
dst.createDimension(name, (len(dimension)))
# copy all file data except for the excluded
for name, variable in in_ncfile.variables.items():
if name not in toexclude:
in_ncfile_name_attrs = in_ncfile[name].__dict__
if "_FillValue" in in_ncfile_name_attrs:
fill_value = in_ncfile_name_attrs.pop("_FillValue")
else:
fill_value = None
if in_ncfile[name].chunking() != "contiguous":
chunksizes = in_ncfile[name].chunking()
else:
chunksizes = None
dst.createVariable(
name,
variable.datatype,
variable.dimensions,
fill_value=fill_value,
chunksizes=chunksizes,
)
# copy variable attributes all at once via dictionary
dst[name].setncatts(in_ncfile_name_attrs)
dst[name][:] = in_ncfile[name][:]
dst.close()
in_ncfile.close()
if overwrite:
os.rename(outfile, infile)