from collections.abc import Iterable
from functools import lru_cache
from io import StringIO
from typing import Any, Optional
from docutils import nodes
from docutils.frontend import OptionParser
from docutils.parsers.rst import Parser
from docutils.transforms import Transform
from docutils.transforms.references import (
AnonymousHyperlinks,
Footnotes,
PropagateTargets,
)
from docutils.utils import new_document, roman
import yaml
try:
from importlib.resources import files
except ImportError:
from importlib_resources import files
from . import data as package_data
from .inliner import InlinerMyst
from .namespace import ApplicationNamespace, compile_namespace
from .nodes import FrontMatterNode
from .states import get_state_classes
class LosslessRSTParser(Parser):
"""Modified RST Parser, allowing for the retrieval of the original source text.
Principally, roles and directives are not run.
"""
def __init__(self):
self.initial_state = "Body"
self.state_classes = get_state_classes()
for state_class in self.state_classes:
# flush any cached states from the last parse
state_class.nested_sm_cache = []
self.inliner = InlinerMyst()
class IndirectHyperlinks(Transform):
"""Resolve indirect hyperlinks."""
def apply(self):
for target in self.document.indirect_targets:
if not target.resolved:
self.resolve_indirect_target(target) # TODO implement this resolve?
# Do not resolve the actual references, since this replaces the "refname"
# self.resolve_indirect_references(target)
class StripFootnoteLabel(Transform):
"""Footnotes and citations can start with a label note, which we do not need."""
def apply(self):
for node in self.document.traverse(
lambda n: isinstance(n, (nodes.footnote, nodes.citation))
):
if node.children and isinstance(node.children[0], nodes.label):
node.pop(0)
ENUM_CONVERTERS = {
"arabic": (lambda i: i),
"lowerroman": (lambda i: roman.toRoman(i).lower()),
"upperroman": (lambda i: roman.toRoman(i).upper()),
"loweralpha": (lambda i: chr(ord("a") + i - 1)),
"upperalpha": (lambda i: chr(ord("a") + i - 1).upper()),
}
class ResolveListItems(Transform):
"""For bullet/enumerated lists, propagate attributes to their child list items.
Also decide if they are loose/tight::
A list is loose if any of its list items are separated by blank lines,
or if any of its list items directly contain two block-level elements
with a blank line between them. Otherwise a list is tight.
"""
def apply(self):
for node in self.document.traverse(nodes.bullet_list):
prefix = node["bullet"] + " "
for child in node.children:
if isinstance(child, nodes.list_item):
child["style"] = "bullet"
child["prefix"] = prefix
for node in self.document.traverse(nodes.enumerated_list):
number = 1
if "start" in node:
number = node["start"]
# TODO markdown-it only supports numbers
# prefix = node["prefix"]
# suffix = node["suffix"]
# convert = ENUM_CONVERTERS[node["enumtype"]]
for child in node.children:
if isinstance(child, nodes.list_item):
child["style"] = "enumerated"
child["prefix"] = f"{number}. "
number += 1
class FrontMatter(Transform):
"""Extract an initial field list into a `FrontMatterNode`.
Similar to ``docutils.transforms.frontmatter.DocInfo``.
"""
def apply(self):
if not self.document.settings.front_matter:
return
index = self.document.first_child_not_matching_class(nodes.PreBibliographic)
if index is None:
return
candidate = self.document[index]
if isinstance(candidate, nodes.section):
index = candidate.first_child_not_matching_class(nodes.PreBibliographic)
if index is None:
return
candidate = candidate[index]
if isinstance(candidate, nodes.field_list):
front_matter = FrontMatterNode("", *candidate.children)
candidate.replace_self(front_matter)
@lru_cache
def _load_directive_data() -> dict[str, Any]:
return yaml.safe_load(
files(package_data).joinpath("directives.yml").read_text("utf8")
)
[docs]def to_docutils_ast(
text: str,
uri: str = "source",
report_level: int = 2,
halt_level: int = 4,
warning_stream: Optional[StringIO] = None,
language_code: str = "en",
use_sphinx: bool = True,
extensions: Iterable[str] = (),
default_domain: str = "py",
conversions: Optional[dict] = None,
front_matter: bool = True,
namespace: Optional[ApplicationNamespace] = None,
) -> tuple[nodes.document, StringIO]:
"""Convert a string of text to a docutils AST.
:param text: The text to convert.
:param uri: The URI of the document.
:param report_level: The report level for docutils.
:param halt_level: The halt level for docutils.
:param warning_stream: A stream to write warnings to.
:param language_code: The language code for docutils.
:param use_sphinx: Whether to use Sphinx roles and directives.
:param extensions: A list of Sphinx extensions to use.
:param default_domain: The default Sphinx domain.
:param conversions: A dictionary of conversion functions.
:param front_matter: Whether to treat initial field list as front matter.
:param namespace: A pre-computed docutils namespace to use.
"""
settings = OptionParser(components=(LosslessRSTParser,)).get_default_values()
warning_stream = StringIO() if warning_stream is None else warning_stream
settings.warning_stream = warning_stream
settings.report_level = report_level # 2=warning
settings.halt_level = halt_level # 4=severe
# The level at or above which `SystemMessage` exceptions
# will be raised, halting execution.
settings.language_code = language_code
document = new_document(uri, settings=settings)
# compile lookup for directives/roles
namespace = (
compile_namespace(
language_code=language_code,
use_sphinx=use_sphinx,
extensions=extensions,
default_domain=default_domain,
)
if namespace is None
else namespace
)
document.settings.namespace = namespace
# get conversion lookup for directives
directive_data = _load_directive_data()
if conversions:
directive_data = {**directive_data, **conversions}
document.settings.directive_data = directive_data
# whether to treat initial field list as front matter
document.settings.front_matter = front_matter
parser = LosslessRSTParser()
parser.parse(text, document)
# these three transforms are required for converting targets correctly
for transform_cls in [
PropagateTargets, # Propagate empty internal targets to the next element. (260)
FrontMatter, # convert initial field list (DocInfo=340)
AnonymousHyperlinks, # Link anonymous references to targets. (440)
# IndirectHyperlinks, # "refuri" migrated back to all indirect targets (460)
Footnotes, # Assign numbers to autonumbered footnotes (620)
# bespoke transforms
StripFootnoteLabel,
ResolveListItems,
]:
transform = transform_cls(document)
transform.apply()
return document, warning_stream