Source code for pycldf.cli_util

"""
Functionality to use in commandline tools which need to access CLDF datasets.
"""
import argparse

from clldutils.clilib import PathType, ParserError
from csvw.utils import is_url
import requests

from pycldf import Dataset, Database
from pycldf.ext import discovery

__all__ = [
    'add_dataset', 'get_dataset',
    'UrlOrPathType', 'FlagOrPathType', 'strtobool',
    'add_database', 'get_database',
    'add_catalog_spec',
]


#
# Copied from distutils.util - because we don't want to deal with deprecation warnings.
#
[docs]def strtobool(val: str) -> int:  # pragma: no cover
    """Convert a string representation of truth to true (1) or false (0).

    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
    are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
    'val' is anything else.
    """
    val = val.lower()
    if val in ('y', 'yes', 't', 'true', 'on', '1'):
        return 1
    elif val in ('n', 'no', 'f', 'false', 'off', '0'):
        return 0
    else:
        raise ValueError("invalid truth value %r" % (val,))


[docs]class FlagOrPathType(PathType):
    def __call__(self, string):
        try:
            return bool(strtobool(string))
        except ValueError:
            return super().__call__(string)


[docs]class UrlOrPathType(PathType):
    def __call__(self, string):
        if is_url(string):
            if self._must_exist:
                sc = requests.head(string).status_code
                # We accept not only HTTP 200 as valid but also common redirection codes because
                # these are used e.g. for DOIs.
                if sc not in {200, 301, 302}:
                    raise argparse.ArgumentTypeError(
                        'URL {} does not exist [HTTP {}]!'.format(string, sc))
            return string
        super().__call__(string.partition('#')[0])
        return string


[docs]def add_dataset(parser: argparse.ArgumentParser):
    """
    Adds a positional argument named `dataset` to the parser to specify a CLDF dataset.
    """
    parser.add_argument(
        'dataset',
        metavar='DATASET',
        help="Dataset locator (i.e. URL or path to a CLDF metadata file or to the data file). "
             "Resolving dataset locators like DOI URLs might require installation of third-party "
             "packages, registering such functionality using the `pycldf_dataset_resolver` "
             "entry point.",
        type=UrlOrPathType(),
    )
    parser.add_argument(
        '--download-dir',
        type=PathType(type='dir'),
        help='An existing directory to use for downloading a dataset (if necessary).',
        default=None,
    )


[docs]def get_dataset(args: argparse.Namespace) -> Dataset:
    """
    Uses the dataset specification in `args` to return a corresponding `Dataset` instance.
    """
    try:
        return discovery.get_dataset(args.dataset, download_dir=args.download_dir)
    except TypeError as e:  # pragma: no cover
        if 'PathLike' in str(e):
            raise ParserError(
                'The dataset locator may require downloading, so you should specify --download-dir')
        raise


def add_database(parser, must_exist=True):
    add_dataset(parser)
    parser.add_argument(
        'db',
        metavar='SQLITE_DB_PATH',
        help='Path to the SQLite db file',
        type=PathType(type='file', must_exist=must_exist),
    )
    parser.add_argument('--infer-primary-keys', action='store_true', default=False)


def get_database(args):
    return Database(get_dataset(args), fname=args.db, infer_primary_keys=args.infer_primary_keys)


def add_catalog_spec(parser, name):
    parser.add_argument(
        '--' + name,
        metavar=name.upper(),
        type=PathType(type='dir'),
        help='Path to repository clone of {0} data'.format(name.capitalize()))
    parser.add_argument(
        '--{0}-version'.format(name),
        help='Version of {0} data to checkout'.format(name.capitalize()),
        default=None)