Source code for ncas_amof_netcdf_template.tsv2dict

"""
Take tsv files and return data as dictionaries
useful for creating netCDF files.

"""

import pandas as pd
import re
import requests
import os
import warnings
from typing import Union, Optional

from . import values


[docs]def tsv2dict_vars(tsv_file: str) -> dict[str, dict[str, Union[str, float]]]: """ For a given tsv file from https://github.com/ncasuk/AMF_CVs/tree/main/product-definitions/tsv for data variables, return dictionary of variables and their attributes. Args: tsv_file (str): URL to location of tsv file Returns: dictionary of variables and attributes """ df_vars = pd.read_csv(tsv_file, sep="\t") df_vars = df_vars.fillna("") all_vars_dict = {} current_var_dict = {} first_loop = True current_var = "" for current_line in df_vars.iloc: if current_line["Variable"] != "": if not first_loop: all_vars_dict[current_var] = current_var_dict current_var_dict = {} else: first_loop = False current_var = current_line["Variable"] current_var_dict = {} # continue if current_line["Attribute"] != "": if ( current_line["Value"] == "" and "example value" in current_line.keys() and current_line["example value"] != "" ): current_var_dict[current_line["Attribute"]] = ( f"EXAMPLE: {current_line['example value']}" ) else: current_var_dict[current_line["Attribute"]] = current_line["Value"] all_vars_dict[current_var] = current_var_dict return all_vars_dict
[docs]def tsv2dict_dims(tsv_file: str) -> dict[str, dict[str, str]]: """ For a given tsv file from https://github.com/ncasuk/AMF_CVs/tree/main/product-definitions/tsv for data dimensions, return dictionary of dimensions and additional info. Args: tsv_file (str): URL to location of tsv file Returns: dictionary of dimensions and info """ df_dims = pd.read_csv(tsv_file, sep="\t") df_dims = df_dims.fillna("") all_dims_dict = {} for dim in df_dims.iloc: dim_dict = dim.to_dict() dim_name = dim_dict.pop("Name") all_dims_dict[dim_name] = dim_dict return all_dims_dict
[docs]def tsv2dict_attrs(tsv_file: str) -> dict[str, dict[str, str]]: """ For a given tsv file from https://github.com/ncasuk/AMF_CVs/tree/main/product-definitions/tsv for data global attributes, return dictionary of attributes and associated values and info. Args: tsv_file (str): URL to location of tsv file Returns: dictionary of global attributes and associated values and info """ df_attrs = pd.read_csv(tsv_file, sep="\t") df_attrs = df_attrs.fillna("") all_attrs_dict = {} for dim in df_attrs.iloc: dim_dict = dim.to_dict() dim_name = dim_dict.pop("Name") all_attrs_dict[dim_name] = dim_dict return all_attrs_dict
[docs]def tsv2dict_instruments(tsv_file: str) -> dict[str, dict[str, str]]: """ For a given tsv file from https://github.com/ncasuk/AMF_CVs/tree/main/product-definitions/tsv for ncas- or community-instruments, return dictionary of instruments and associated information. Args: tsv_file (str): URL to location of tsv file Returns: dictionary of instruments and associated information """ df_instruments = pd.read_csv(tsv_file, sep="\t") df_instruments = df_instruments.fillna("") all_instruments = {} for current_instrument in df_instruments.iloc: inst_dict = current_instrument.to_dict() inst_name = inst_dict.pop("New Instrument Name") inst_dict["instrument_name"] = inst_name data_products = re.split(r",| |\|", inst_dict["Data Product(s)"]) data_products = list(filter(None, data_products)) inst_dict["Data Product(s)"] = data_products all_instruments[inst_name] = inst_dict return all_instruments
[docs]def create_variables_tsv_url( product: str, use_local_files: Optional[str] = None, tag: str = "latest" ) -> str: """ Returns URL for tsv file of variables specific to a given product and tag release or branch. Args: product (str): data product use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. tag (str): tagged release of definitions, or 'latest' to get most recent release. Ignored if use_local_files is not None. Default "latest". Return: URL """ if use_local_files: file_loc = use_local_files else: if tag == "latest": tag = values.get_latest_CVs_version() file_loc = ( f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{tag}" "/product-definitions/tsv" ) return f"{file_loc}/{product}/variables-specific.tsv"
[docs]def create_dimensions_tsv_url( product: str, use_local_files: Optional[str] = None, tag: str = "latest" ) -> str: """ Returns URL for tsv file of dimensions specific to a given product and tag release or branch. Args: product (str): data product use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. tag (str): tagged release of definitions, or 'latest' to get most recent release. Ignored if use_local_files is not None. Default "latest". Return: URL """ if use_local_files: file_loc = use_local_files else: if tag == "latest": tag = values.get_latest_CVs_version() file_loc = ( f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{tag}" "/product-definitions/tsv" ) return f"{file_loc}/{product}/dimensions-specific.tsv"
[docs]def create_attributes_tsv_url( product: str, use_local_files: Optional[str] = None, tag: str = "latest" ) -> str: """ Returns URL for tsv file of global attributes specific to a given product and tag release or branch. Args: product (str): data product use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. tag (str): tagged release of definitions, or 'latest' to get most recent release. Ignored if use_local_files is not None. Default "latest". Return: URL """ if use_local_files: file_loc = use_local_files else: if tag == "latest": tag = values.get_latest_CVs_version() file_loc = ( f"https://raw.githubusercontent.com/ncasuk/AMF_CVs/{tag}" "/product-definitions/tsv" ) return f"{file_loc}/{product}/global-attributes-specific.tsv"
[docs]def instrument_dict( desired_instrument: str, loc: str = "land", use_local_files: Optional[str] = None, tag: str = "latest", ) -> dict[str, Union[str, list[str], dict[str, dict[str, str]]]]: """ Collect all variables, dimensions and attributes required for all data products associated with an instrument and deployment mode. Args: desired_instrument (str): name of instrument loc (str): deployment mode, one of 'land', 'sea', 'air', or 'trajectory'. Default 'land'. use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. tag (str): tagged release of definitions, or 'latest' to get most recent release. Ignored if use_local_files is not None. Default "latest". Returns: dictionary of all attributes, dimensions and variables associated with the named instrument. """ common_dimensions_url = values.get_common_dimensions_url( use_local_files=use_local_files, tag=tag, loc=loc ) common_variables_url = values.get_common_variables_url( use_local_files=use_local_files, tag=tag, loc=loc ) instrument_dict = {} main_instruments = tsv2dict_instruments( values.get_instruments_url(use_local_files=use_local_files, tag=tag) ) if desired_instrument in main_instruments.keys(): instrument_dict["info"] = main_instruments[desired_instrument] else: instrument_dict["info"] = tsv2dict_instruments( values.get_community_instruments_url( use_local_files=use_local_files, tag=tag ) )[desired_instrument] # Add common stuff instrument_dict["common"] = {} instrument_dict["common"]["attributes"] = {} instrument_dict["common"]["dimensions"] = {} instrument_dict["common"]["variables"] = {} instrument_dict["common"]["attributes"] = tsv2dict_attrs( values.get_common_attributes_url(use_local_files=use_local_files, tag=tag) ) instrument_dict["common"]["dimensions"] = tsv2dict_dims(common_dimensions_url) instrument_dict["common"]["variables"] = tsv2dict_vars(common_variables_url) # Add stuff for each product of instrument it specifics exist for product in instrument_dict["info"]["Data Product(s)"]: instrument_dict[product] = {} instrument_dict[product]["attributes"] = {} instrument_dict[product]["dimensions"] = {} instrument_dict[product]["variables"] = {} attr_url = create_attributes_tsv_url( product, use_local_files=use_local_files, tag=tag ) dim_url = create_dimensions_tsv_url( product, use_local_files=use_local_files, tag=tag ) var_url = create_variables_tsv_url( product, use_local_files=use_local_files, tag=tag ) if (use_local_files and os.path.isfile(attr_url)) or ( not use_local_files and requests.get(attr_url).status_code == 200 ): instrument_dict[product]["attributes"] = tsv2dict_attrs(attr_url) if (use_local_files and os.path.isfile(dim_url)) or ( not use_local_files and requests.get(dim_url).status_code == 200 ): instrument_dict[product]["dimensions"] = tsv2dict_dims(dim_url) if (use_local_files and os.path.isfile(var_url)) or ( not use_local_files and requests.get(var_url).status_code == 200 ): instrument_dict[product]["variables"] = tsv2dict_vars(var_url) return instrument_dict
[docs]def product_dict( desired_product: str, platform: str = "", deployment_loc: str = "land", use_local_files: Optional[str] = None, tag: str = "latest", ) -> dict[str, dict[str, Union[str, dict[str, dict[str, Union[str, float]]]]]]: """ Collect all variables, dimensions and attributes required for a data products and deployment mode. Args: desired_product (str): name of data product platform (str): location or observatory of instrument deployment_loc (str): deployment mode, one of 'land', 'sea', 'air', or 'trajectory'. Default 'land'. use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. tag (str): tagged release of definitions, or 'latest' to get most recent release. Ignored if use_local_files is not None. Default "latest". Returns: dictionary of all attributes, dimensions and variables associated with the named data product. """ common_dimensions_url = values.get_common_dimensions_url( use_local_files=use_local_files, tag=tag, loc=deployment_loc ) common_variables_url = values.get_common_variables_url( use_local_files=use_local_files, tag=tag, loc=deployment_loc ) product_dict = {} # Add common stuff product_dict["common"] = {} product_dict["common"]["attributes"] = {} product_dict["common"]["dimensions"] = {} product_dict["common"]["variables"] = {} product_dict["common"]["attributes"] = tsv2dict_attrs( values.get_common_attributes_url(use_local_files=use_local_files, tag=tag) ) product_dict["common"]["dimensions"] = tsv2dict_dims(common_dimensions_url) product_dict["common"]["variables"] = tsv2dict_vars(common_variables_url) # Add stuff for each product of instrument it specifics exist product_dict[desired_product] = {} product_dict[desired_product]["attributes"] = {} product_dict[desired_product]["dimensions"] = {} product_dict[desired_product]["variables"] = {} attr_url = create_attributes_tsv_url( desired_product, use_local_files=use_local_files, tag=tag ) dim_url = create_dimensions_tsv_url( desired_product, use_local_files=use_local_files, tag=tag ) var_url = create_variables_tsv_url( desired_product, use_local_files=use_local_files, tag=tag ) if (use_local_files and os.path.isfile(attr_url)) or ( not use_local_files and requests.get(attr_url).status_code == 200 ): product_dict[desired_product]["attributes"] = tsv2dict_attrs(attr_url) if (use_local_files and os.path.isfile(dim_url)) or ( not use_local_files and requests.get(dim_url).status_code == 200 ): product_dict[desired_product]["dimensions"] = tsv2dict_dims(dim_url) if (use_local_files and os.path.isfile(var_url)) or ( not use_local_files and requests.get(var_url).status_code == 200 ): product_dict[desired_product]["variables"] = tsv2dict_vars(var_url) # Add basic info bits product_dict["info"] = {} product_dict["info"]["Mobile/Fixed (loc)"] = platform product_dict["info"]["Manufacturer"] = ( "CHANGE: Manufacturer of instrument and key sub components." " String: min 2 characters." ) product_dict["info"]["Model No."] = ( "CHANGE: Model number of instrument and key sub components." " String: min 3 characters" ) product_dict["info"]["Serial Number"] = ( "CHANGE: Serial number of instrument and key sub components." " String: min 3 characters." ) product_dict["info"]["Descriptor"] = "CHANGE: Descripton of instrument." return product_dict
[docs]def list_all_products(use_local_files: Optional[str] = None, tag: str = "latest"): """ Return list of all available data products. Args: use_local_files (str or None): path to local directory where tsv files are stored. If "None", read from online. Default None. tag (str): tagged release of definitions, or 'latest' to get most recent release. Ignored if use_local_files is not None. Default "latest". """ data_products_url = values.get_all_data_products_url( use_local_files=use_local_files, tag=tag ) df_data_products = pd.read_csv(data_products_url, sep="\t") return list(df_data_products["Data Product"])
if __name__ == "__main__": import sys desired_instrument = sys.argv[1] instrument = instrument_dict(desired_instrument) """ This bit just makes the print look pretty, a standard print would also work """ import pprint pp = pprint.PrettyPrinter(indent=2, width=200) pp.pprint(instrument)