Source code for pycldf.ext.markdown

"""
This module provides tools to build a CLDF Markdown renderer.

For an example, see :class:`FilenameToComponent`.
"""
import re
from typing import Optional, Union, Any
import pathlib
import warnings
import collections
from collections.abc import Mapping
import dataclasses

import yaml
import jmespath
import frontmatter
import clldutils
from clldutils.markup import MarkdownLink

from pycldf.util import pkg_path, MD_SUFFIX
from pycldf.urlutil import url_without_fragment
from pycldf.sources import Source
from pycldf import Dataset
from pycldf import orm
from .discovery import get_dataset

__all__ = ['CLDFMarkdownLink', 'CLDFMarkdownText', 'FilenameToComponent']

#: The YAML frontmatter key to specify dataset mappings:
DATASETS_MAPPING = 'cldf-datasets'
SOURCE_COMPONENT = 'Source'
METADATA_COMPONENT = 'Metadata'


class DatasetMapping(Mapping):
    """
    A read-only mapping of prefixes to datasets.
    """
    key_pattern = re.compile('[a-zA-Z0-9_]+')

    @staticmethod
    def to_dict(o):  # pylint: disable=C0116
        if isinstance(o, DatasetMapping):
            return o.m
        return {} if not o else ({None: o} if isinstance(o, (str, Dataset)) else o)

    def __init__(self,
                 m1,
                 m2=None,
                 doc_path: Optional[pathlib.Path] = None,
                 download_dir: Optional[pathlib.Path] = None):
        """
        :param m1: Mapping of prefixes to datasets (locators).
        :param m2: Mapping of prefixes to datasets (locators) to update `m1`.
        :param doc_path: Path of a CLDF markdown document, relative to which dataset locators are \
        to be resolved.
        :param download_dir: Path to an existing directory to which to download datasets \
        (if necessary).
        """
        self.m = self.to_dict(m1)
        self.m.update(self.to_dict(m2))
        if not all(True if k is None else DatasetMapping.key_pattern.fullmatch(k) for k in self.m):
            raise ValueError('Invalid dataset prefix')
        for k in self.m:
            if not isinstance(self.m[k], Dataset):
                self.m[k] = get_dataset(self.m[k], download_dir, doc_path)

    def __getitem__(self, prefix: Union[str, None]) -> Dataset:
        """
        Get a `Dataset` mapped to a prefix.
        """
        return self.m[prefix]

    def __iter__(self):
        return iter(self.m)

    def __len__(self):
        return len(self.m)





[docs]class CLDFMarkdownText: """ A CLDF Markdown document. Basic CLDF Markdown rendering can be implemented by overwriting the `render_link` method. Then, calling the `render` method will return a markdown string with CLDF Markdown links replaced. A trivial renderer, replacing each CLDF Markdown link with the link label, would look as follows: .. code-block:: python from pycldf.ext.markdown import CLDFMarkdownText class Renderer(CLDFMarkdownText): def render_link(self, link): return str(link.label) assert Renderer('[Example 1](ExampleTable#cldf:ex1)').render() == 'Example 1' :ivar text: `str` containing the markdown text (with YAML frontmatter removed). :ivar metadata: `dict` of document metadata read from YAML frontmatter. :ivar dataset_mapping: :class:`DatasetMapping` instance, linking prefixes used in CLDF \ Markdown links to :class:`pycldf.Dataset` instances. :cvar source_component: Name of the special "Source" component. :cvar metadata_component: Name of the special "Metadata" component. """ def __init__(self, text: Union[pathlib.Path, str], dataset_mapping: Optional[Union[str, Dataset, dict]] = None, download_dir: Optional[pathlib.Path] = None): """ :param text: CLDF Markdown text either to be read from a path or specified as `str`. :param dataset_mapping: Mapping of dataset prefixes to `Dataset` instances. May override \ the mapping provided in YAML frontmatter as part of the text. :download_dir: Optional path to a directory to download data for remote datasets. """ p = frontmatter.loads(text) if isinstance(text, str) else frontmatter.load(str(text)) self.metadata: dict[str, Any] = p.metadata self.dataset_mapping: Mapping[Union[str, None], Dataset] = DatasetMapping( p.get(DATASETS_MAPPING), dataset_mapping, text.parent if isinstance(text, pathlib.Path) else None, download_dir, ) self.text: str = p.content self._datadict = collections.defaultdict(dict) for prefix, ds in self.dataset_mapping.items(): self._datadict[prefix][SOURCE_COMPONENT] = {src.id: src for src in ds.sources} self._datadict[prefix][METADATA_COMPONENT] = ds.tablegroup.asdict(omit_defaults=True) @property def frontmatter(self) -> str: """ The markdown documents metadata formatted as YAML frontmatter. """ return f'---\n{yaml.dump(self.metadata)}---'
[docs] def get_object(self, ml: CLDFMarkdownLink) -> Union[list, orm.Object, Source, dict]: """ Resolve the reference in a CLDF Markdown link to the matching object from a mapped dataset. The returned object is - an :class:`pycldf.orm.Object` instance for items in ORM mapped components, - a row `dict` for items in custom tables, - a :class:`pycldf.sources.Source` instance for source references, - a `list` of the above for the special `__all__` identifier. - a `jmespath.search` result for referenced items in the Metadata component, This method can be used within :meth:`render_link` implementations. """ cldf = self.dataset_mapping[ml.prefix] comp = ml.component(cldf) key = comp or ml.table_or_fname if key == METADATA_COMPONENT: if ml.all: return self._datadict[ml.prefix][METADATA_COMPONENT] return jmespath.search(ml.objid, self._datadict[ml.prefix][METADATA_COMPONENT]) if key not in self._datadict[ml.prefix]: # A new type of data is referenced. objs = cldf.objects(comp) if comp else cldf.iter_rows(key, 'id') self._datadict[ml.prefix][key] = { r.id if isinstance(r, orm.Object) else r['id']: r for r in objs} return list(self._datadict[ml.prefix][key].values()) if ml.all \ else self._datadict[ml.prefix][key][ml.objid]
def _render_link(self, link: CLDFMarkdownLink) -> Union[str, CLDFMarkdownLink]: """Dispatches to custom rendering in case of CLDF links.""" if link.is_cldf_link: return self.render_link(link) return link
[docs] def render( self, simple_link_detection: bool = True, markdown_kw: Optional[dict[str, Any]] = None, ) -> str: """ A markdown string with CLDF Markdown links replaced. """ if tuple(map(int, clldutils.__version__.split('.')[:2])) < (3, 14): # pragma: no cover if not simple_link_detection or markdown_kw: warnings.warn( 'Extended markdown link detection is only supported with clldutils>=3.14', category=UserWarning) kw = {} else: kw = {'simple': simple_link_detection, 'markdown_kw': markdown_kw} return CLDFMarkdownLink.replace(self.text, self._render_link, **kw)
[docs]class FilenameToComponent(CLDFMarkdownText): """ Renderer to replace filenames in CLDF Markdown links with CLDF component names. """