Source code for pycldf.ext.markdown

"""
This module provides tools to build a CLDF Markdown renderer.

For an example, see :class:`FilenameToComponent`.
"""
import re
import typing
import pathlib
import warnings
import collections.abc

import yaml
import jmespath
import attr
import frontmatter
import clldutils
from clldutils.markup import MarkdownLink

from .discovery import get_dataset
from pycldf.util import pkg_path, url_without_fragment
from pycldf.dataset import MD_SUFFIX
from pycldf.sources import Source
from pycldf import Dataset
from pycldf import orm

__all__ = ['CLDFMarkdownLink', 'CLDFMarkdownText', 'FilenameToComponent']

#: The YAML frontmatter key to specify dataset mappings:
DATASETS_MAPPING = 'cldf-datasets'
SOURCE_COMPONENT = 'Source'
METADATA_COMPONENT = 'Metadata'


class DatasetMapping(collections.abc.Mapping):
    """
    A read-only mapping of prefixes to datasets.
    """
    key_pattern = re.compile('[a-zA-Z0-9_]+')

    @staticmethod
    def to_dict(o):
        if isinstance(o, DatasetMapping):
            return o.m
        return {} if not o else ({None: o} if isinstance(o, (str, Dataset)) else o)

    def __init__(self,
                 m1,
                 m2=None,
                 doc_path: typing.Optional[pathlib.Path] = None,
                 download_dir: typing.Optional[pathlib.Path] = None):
        """
        :param m1: Mapping of prefixes to datasets (locators).
        :param m2: Mapping of prefixes to datasets (locators) to update `m1`.
        :param doc_path: Path of a CLDF markdown document, relative to which dataset locators are \
        to be resolved.
        :param download_dir: Path to an existing directory to which to download datasets \
        (if necessary).
        """
        self.m = self.to_dict(m1)
        self.m.update(self.to_dict(m2))
        if not all(True if k is None else DatasetMapping.key_pattern.fullmatch(k) for k in self.m):
            raise ValueError('Invalid dataset prefix')
        for k in self.m:
            if not isinstance(self.m[k], Dataset):
                self.m[k] = get_dataset(self.m[k], download_dir, doc_path)

    def __getitem__(self, prefix: typing.Union[str, None]) -> Dataset:
        """
        Get a `Dataset` mapped to a prefix.
        """
        return self.m[prefix]

    def __iter__(self):
        return iter(self.m)

    def __len__(self):
        return len(self.m)





[docs]class CLDFMarkdownText: """ A CLDF Markdown document. Basic CLDF Markdown rendering can be implemented by overwriting the `render_link` method. Then, calling the `render` method will return a markdown string with CLDF Markdown links replaced. A trivial renderer, replacing each CLDF Markdown link with the link label, would look as follows: .. code-block:: python from pycldf.ext.markdown import CLDFMarkdownText class Renderer(CLDFMarkdownText): def render_link(self, link): return str(link.label) assert Renderer('[Example 1](ExampleTable#cldf:ex1)').render() == 'Example 1' :ivar text: `str` containing the markdown text (with YAML frontmatter removed). :ivar metadata: `dict` of document metadata read from YAML frontmatter. :ivar dataset_mapping: :class:`DatasetMapping` instance, linking prefixes used in CLDF \ Markdown links to :class:`pycldf.Dataset` instances. :cvar source_component: Name of the special "Source" component. :cvar metadata_component: Name of the special "Metadata" component. """ def __init__(self, text: typing.Union[pathlib.Path, str], dataset_mapping: typing.Optional[typing.Union[str, Dataset, dict]] = None, download_dir: typing.Optional[pathlib.Path] = None): """ :param text: CLDF Markdown text either to be read from a path or specified as `str`. :param dataset_mapping: Mapping of dataset prefixes to `Dataset` instances. May override \ the mapping provided in YAML frontmatter as part of the text. :download_dir: Optional path to a directory to download data for remote datasets. """ p = frontmatter.loads(text) if isinstance(text, str) else frontmatter.load(str(text)) self.metadata = p.metadata self.dataset_mapping = DatasetMapping( p.get(DATASETS_MAPPING), dataset_mapping, text.parent if isinstance(text, pathlib.Path) else None, download_dir, ) self.text = p.content self._datadict = collections.defaultdict(dict) for prefix, ds in self.dataset_mapping.items(): self._datadict[prefix][SOURCE_COMPONENT] = {src.id: src for src in ds.sources} self._datadict[prefix][METADATA_COMPONENT] = ds.tablegroup.asdict(omit_defaults=True) @property def frontmatter(self) -> str: """ The markdown documents metadata formatted as YAML frontmatter. """ return '---\n{}---'.format(yaml.dump(self.metadata))
[docs] def get_object(self, ml: CLDFMarkdownLink) -> typing.Union[list, orm.Object, Source, dict]: """ Resolve the reference in a CLDF Markdown link to the matching object from a mapped dataset. The returned object is - an :class:`pycldf.orm.Object` instance for items in ORM mapped components, - a row `dict` for items in custom tables, - a :class:`pycldf.sources.Source` instance for source references, - a `list` of the above for the special `__all__` identifier. - a `jmespath.search` result for referenced items in the Metadata component, This method can be used within :meth:`render_link` implementations. """ cldf = self.dataset_mapping[ml.prefix] comp = ml.component(cldf) key = comp or ml.table_or_fname if key == METADATA_COMPONENT: if ml.all: return self._datadict[ml.prefix][METADATA_COMPONENT] return jmespath.search(ml.objid, self._datadict[ml.prefix][METADATA_COMPONENT]) if key not in self._datadict[ml.prefix]: # A new type of data is referenced. objs = cldf.objects(comp) if comp else cldf.iter_rows(key, 'id') self._datadict[ml.prefix][key] = { r.id if isinstance(r, orm.Object) else r['id']: r for r in objs} return list(self._datadict[ml.prefix][key].values()) if ml.all \ else self._datadict[ml.prefix][key][ml.objid]
def _render_link(self, link): if link.is_cldf_link: return self.render_link(link) return link
[docs] def render(self, simple_link_detection: bool = True, markdown_kw: typing.Optional[dict] = None) -> str: """ A markdown string with CLDF Markdown links replaced. """ if tuple(map(int, clldutils.__version__.split('.')[:2])) < (3, 14): # pragma: no cover if not simple_link_detection or markdown_kw: warnings.warn( 'Extended markdown link detection is only supported with clldutils>=3.14', category=UserWarning) kw = {} else: kw = dict(simple=simple_link_detection, markdown_kw=markdown_kw) return CLDFMarkdownLink.replace(self.text, self._render_link, **kw)
[docs]class FilenameToComponent(CLDFMarkdownText): """ Renderer to replace filenames in CLDF Markdown links with CLDF component names. """