Source code for pycldf.cli_util

"""
Functionality to use in commandline tools which need to access CLDF datasets.
"""
import argparse
import urllib.request

from clldutils.clilib import PathType, ParserError
from csvw.utils import is_url

from pycldf import Dataset, Database
from pycldf.ext import discovery

__all__ = [
    'add_dataset', 'get_dataset',
    'UrlOrPathType', 'FlagOrPathType', 'strtobool',
    'add_database', 'get_database',
    'add_catalog_spec',
]


#
# Copied from distutils.util - because we don't want to deal with deprecation warnings.
#
[docs]def strtobool(val: str) -> int: # pragma: no cover """Convert a string representation of truth to true (1) or false (0). True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if 'val' is anything else. """ val = val.lower() if val in ('y', 'yes', 't', 'true', 'on', '1'): return 1 if val in ('n', 'no', 'f', 'false', 'off', '0'): return 0 raise ValueError(f"invalid truth value {val}")
[docs]class FlagOrPathType(PathType): # pylint: disable=too-few-public-methods """ Argument type allowing input of a path or a boolean. The boolean can be used to determine whether to download a file from a known location. """ def __call__(self, string): try: return bool(strtobool(string)) except ValueError: return super().__call__(string)
def http_head_status(url: str) -> int: # pragma: no cover """Do a HEAD request for `url` to determine its status.""" class NoRedirection(urllib.request.HTTPErrorProcessor): """Don't follow redirects.""" def http_response(self, request, response): return response https_response = http_response opener = urllib.request.build_opener(NoRedirection) return opener.open(urllib.request.Request(url, method="HEAD")).status
[docs]class UrlOrPathType(PathType): # pylint: disable=too-few-public-methods """Type suitable for argparse arguments, allowing input of URL or local file path.""" def __call__(self, string: str) -> str: if is_url(string): if self._must_exist: sc = http_head_status(string) # We accept not only HTTP 200 as valid but also common redirection codes because # these are used e.g. for DOIs. if sc not in {200, 301, 302}: raise argparse.ArgumentTypeError(f'URL {string} does not exist [HTTP {sc}]!') return string super().__call__(string.partition('#')[0]) return string
[docs]def add_dataset(parser: argparse.ArgumentParser) -> None: """ Adds a positional argument named `dataset` to the parser to specify a CLDF dataset. """ parser.add_argument( 'dataset', metavar='DATASET', help="Dataset locator (i.e. URL or path to a CLDF metadata file or to the data file). " "Resolving dataset locators like DOI URLs might require installation of third-party " "packages, registering such functionality using the `pycldf_dataset_resolver` " "entry point.", type=UrlOrPathType(), ) parser.add_argument( '--download-dir', type=PathType(type='dir'), help='An existing directory to use for downloading a dataset (if necessary).', default=None, )
[docs]def get_dataset(args: argparse.Namespace) -> Dataset: """ Uses the dataset specification in `args` to return a corresponding `Dataset` instance. """ try: return discovery.get_dataset(args.dataset, download_dir=args.download_dir) except TypeError as e: # pragma: no cover if 'PathLike' in str(e): raise ParserError( 'The dataset locator may require downloading, so you should specify --download-dir' ) from e raise
[docs]def add_database(parser: argparse.ArgumentParser, must_exist: bool = True) -> None: """ Add CLI arguments to specify a CLDF SQLite database. Retrieve in the `run` function of a command using `get_database` (see below). """ add_dataset(parser) parser.add_argument( 'db', metavar='SQLITE_DB_PATH', help='Path to the SQLite db file', type=PathType(type='file', must_exist=must_exist), ) parser.add_argument('--infer-primary-keys', action='store_true', default=False)
[docs]def get_database(args: argparse.Namespace) -> Database: """ Retrieve a `Database` instance based on CLI input in `args` (see `add_database`). """ return Database(get_dataset(args), fname=args.db, infer_primary_keys=args.infer_primary_keys)
[docs]def add_catalog_spec(parser: argparse.ArgumentParser, name: str) -> None: """Add CLI arguments suitable to specify a catalog.""" parser.add_argument( '--' + name, metavar=name.upper(), type=PathType(type='dir'), help=f'Path to repository clone of {name.capitalize()} data') parser.add_argument( f'--{name}-version', help=f'Version of {name.capitalize()} data to checkout', default=None)