aboutsummaryrefslogtreecommitdiffhomepage
path: root/libs/markdown
diff options
context:
space:
mode:
authormorpheus65535 <[email protected]>2024-03-03 12:15:23 -0500
committerGitHub <[email protected]>2024-03-03 12:15:23 -0500
commit03afeb347075381bcb7fd6036295c9fa4a90d2dc (patch)
tree7c5d72c973d2c8e4ade57391a1c9ad5e94903a46 /libs/markdown
parent9ae684240b5bdd40a870d8122f0e380f8d03a187 (diff)
downloadbazarr-03afeb347075381bcb7fd6036295c9fa4a90d2dc.tar.gz
bazarr-03afeb347075381bcb7fd6036295c9fa4a90d2dc.zip
Updated multiple Python modules (now in libs and custom_libs directories) and React libraries
Diffstat (limited to 'libs/markdown')
-rw-r--r--libs/markdown/__init__.py48
-rw-r--r--libs/markdown/__main__.py34
-rw-r--r--libs/markdown/__meta__.py48
-rw-r--r--libs/markdown/blockparser.py115
-rw-r--r--libs/markdown/blockprocessors.py273
-rw-r--r--libs/markdown/core.py356
-rw-r--r--libs/markdown/extensions/__init__.py131
-rw-r--r--libs/markdown/extensions/abbr.py50
-rw-r--r--libs/markdown/extensions/admonition.py61
-rw-r--r--libs/markdown/extensions/attr_list.py65
-rw-r--r--libs/markdown/extensions/codehilite.py207
-rw-r--r--libs/markdown/extensions/def_list.py36
-rw-r--r--libs/markdown/extensions/extra.py32
-rw-r--r--libs/markdown/extensions/fenced_code.py62
-rw-r--r--libs/markdown/extensions/footnotes.py151
-rw-r--r--libs/markdown/extensions/legacy_attrs.py48
-rw-r--r--libs/markdown/extensions/legacy_em.py19
-rw-r--r--libs/markdown/extensions/md_in_html.py90
-rw-r--r--libs/markdown/extensions/meta.py33
-rw-r--r--libs/markdown/extensions/nl2br.py28
-rw-r--r--libs/markdown/extensions/sane_lists.py37
-rw-r--r--libs/markdown/extensions/smarty.py164
-rw-r--r--libs/markdown/extensions/tables.py52
-rw-r--r--libs/markdown/extensions/toc.py212
-rw-r--r--libs/markdown/extensions/wikilinks.py44
-rw-r--r--libs/markdown/htmlparser.py150
-rw-r--r--libs/markdown/inlinepatterns.py459
-rw-r--r--libs/markdown/postprocessors.py76
-rw-r--r--libs/markdown/preprocessors.py65
-rw-r--r--libs/markdown/serializers.py51
-rw-r--r--libs/markdown/test_tools.py84
-rw-r--r--libs/markdown/treeprocessors.py222
-rw-r--r--libs/markdown/util.py206
33 files changed, 2181 insertions, 1528 deletions
diff --git a/libs/markdown/__init__.py b/libs/markdown/__init__.py
index d88b1e974..9674d6e06 100644
--- a/libs/markdown/__init__.py
+++ b/libs/markdown/__init__.py
@@ -1,23 +1,43 @@
-"""
-Python Markdown
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
-A Python implementation of John Gruber's Markdown.
+# - Documentation: https://python-markdown.github.io/
+# - GitHub: https://github.com/Python-Markdown/markdown/
+# - PyPI: https://pypi.org/project/Markdown/
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# - Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# - Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# - Copyright 2004 Manfred Stienstra (the original version)
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# License: BSD (see LICENSE.md for details).
-License: BSD (see LICENSE.md for details).
"""
+Python-Markdown provides two public functions ([`markdown.markdown`][] and [`markdown.markdownFromFile`][])
+both of which wrap the public class [`markdown.Markdown`][]. All submodules support these public functions
+and class and/or provide extension support.
+
+Modules:
+ core: Core functionality.
+ preprocessors: Pre-processors.
+ blockparser: Core Markdown block parser.
+ blockprocessors: Block processors.
+ treeprocessors: Tree processors.
+ inlinepatterns: Inline patterns.
+ postprocessors: Post-processors.
+ serializers: Serializers.
+ util: Utility functions.
+ htmlparser: HTML parser.
+ test_tools: Testing utilities.
+ extensions: Markdown extensions.
+"""
+
+from __future__ import annotations
from .core import Markdown, markdown, markdownFromFile
from .__meta__ import __version__, __version_info__ # noqa
diff --git a/libs/markdown/__main__.py b/libs/markdown/__main__.py
index 018400824..c323aaac4 100644
--- a/libs/markdown/__main__.py
+++ b/libs/markdown/__main__.py
@@ -1,23 +1,23 @@
-"""
-Python Markdown
+# Python Markdown
-A Python implementation of John Gruber's Markdown.
+# A Python implementation of John Gruber's Markdown.
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see LICENSE.md for details).
-"""
+# License: BSD (see LICENSE.md for details).
+
+from __future__ import annotations
import sys
import optparse
@@ -146,6 +146,6 @@ def run(): # pragma: no cover
if __name__ == '__main__': # pragma: no cover
- # Support running module as a commandline command.
- # `python -m markdown [options] [args]`.
+ # Support running module as a command line command.
+ # python -m markdown [options] [args]
run()
diff --git a/libs/markdown/__meta__.py b/libs/markdown/__meta__.py
index ccabee528..a35a2de8f 100644
--- a/libs/markdown/__meta__.py
+++ b/libs/markdown/__meta__.py
@@ -1,36 +1,38 @@
-"""
-Python Markdown
+# Python Markdown
-A Python implementation of John Gruber's Markdown.
+# A Python implementation of John Gruber's Markdown.
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see LICENSE.md for details).
-"""
+# License: BSD (see LICENSE.md for details).
# __version_info__ format:
-# (major, minor, patch, dev/alpha/beta/rc/final, #)
-# (1, 1, 2, 'dev', 0) => "1.1.2.dev0"
-# (1, 1, 2, 'alpha', 1) => "1.1.2a1"
-# (1, 2, 0, 'beta', 2) => "1.2b2"
-# (1, 2, 0, 'rc', 4) => "1.2rc4"
-# (1, 2, 0, 'final', 0) => "1.2"
-__version_info__ = (3, 4, 1, 'final', 0)
+# (major, minor, patch, dev/alpha/beta/rc/final, #)
+# (1, 1, 2, 'dev', 0) => "1.1.2.dev0"
+# (1, 1, 2, 'alpha', 1) => "1.1.2a1"
+# (1, 2, 0, 'beta', 2) => "1.2b2"
+# (1, 2, 0, 'rc', 4) => "1.2rc4"
+# (1, 2, 0, 'final', 0) => "1.2"
+
+from __future__ import annotations
+
+
+__version_info__ = (3, 5, 2, 'final', 0)
def _get_version(version_info):
- " Returns a PEP 440-compliant version number from version_info. "
+ " Returns a PEP 440-compliant version number from `version_info`. "
assert len(version_info) == 5
assert version_info[3] in ('dev', 'alpha', 'beta', 'rc', 'final')
diff --git a/libs/markdown/blockparser.py b/libs/markdown/blockparser.py
index b0ca4b1b5..549c9ecd5 100644
--- a/libs/markdown/blockparser.py
+++ b/libs/markdown/blockparser.py
@@ -1,32 +1,47 @@
-"""
-Python Markdown
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
-A Python implementation of John Gruber's Markdown.
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# License: BSD (see LICENSE.md for details).
+
+"""
+The block parser handles basic parsing of Markdown blocks. It doesn't concern
+itself with inline elements such as `**bold**` or `*italics*`, but rather just
+catches blocks, lists, quotes, etc.
-License: BSD (see LICENSE.md for details).
+The `BlockParser` is made up of a bunch of `BlockProcessors`, each handling a
+different type of block. Extensions may add/replace/remove `BlockProcessors`
+as they need to alter how Markdown blocks are parsed.
"""
+from __future__ import annotations
+
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Iterable, Any
from . import util
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+ from .blockprocessors import BlockProcessor
+
class State(list):
""" Track the current and nested state of the parser.
- This utility class is used to track the state of the BlockParser and
+ This utility class is used to track the state of the `BlockParser` and
support multiple levels if nesting. It's just a simple API wrapped around
a list. Each time a state is set, that state is appended to the end of the
list. Each time a state is reset, that state is removed from the end of
@@ -41,15 +56,15 @@ class State(list):
"""
- def set(self, state):
+ def set(self, state: Any):
""" Set a new state. """
self.append(state)
- def reset(self):
+ def reset(self) -> None:
""" Step back one step in nested state. """
self.pop()
- def isstate(self, state):
+ def isstate(self, state: Any) -> bool:
""" Test that top (current) level is of given state. """
if len(self):
return self[-1] == state
@@ -58,58 +73,84 @@ class State(list):
class BlockParser:
- """ Parse Markdown blocks into an ElementTree object.
+ """ Parse Markdown blocks into an `ElementTree` object.
+
+ A wrapper class that stitches the various `BlockProcessors` together,
+ looping through them and creating an `ElementTree` object.
- A wrapper class that stitches the various BlockProcessors together,
- looping through them and creating an ElementTree object.
"""
- def __init__(self, md):
- self.blockprocessors = util.Registry()
+ def __init__(self, md: Markdown):
+ """ Initialize the block parser.
+
+ Arguments:
+ md: A Markdown instance.
+
+ Attributes:
+ BlockParser.md (Markdown): A Markdown instance.
+ BlockParser.state (State): Tracks the nesting level of current location in document being parsed.
+ BlockParser.blockprocessors (util.Registry): A collection of
+ [`blockprocessors`][markdown.blockprocessors].
+
+ """
+ self.blockprocessors: util.Registry[BlockProcessor] = util.Registry()
self.state = State()
self.md = md
- def parseDocument(self, lines):
- """ Parse a markdown document into an ElementTree.
+ def parseDocument(self, lines: Iterable[str]) -> etree.ElementTree:
+ """ Parse a Markdown document into an `ElementTree`.
- Given a list of lines, an ElementTree object (not just a parent
- Element) is created and the root element is passed to the parser
- as the parent. The ElementTree object is returned.
+ Given a list of lines, an `ElementTree` object (not just a parent
+ `Element`) is created and the root element is passed to the parser
+ as the parent. The `ElementTree` object is returned.
This should only be called on an entire document, not pieces.
+ Arguments:
+ lines: A list of lines (strings).
+
+ Returns:
+ An element tree.
"""
- # Create a ElementTree from the lines
+ # Create an `ElementTree` from the lines
self.root = etree.Element(self.md.doc_tag)
self.parseChunk(self.root, '\n'.join(lines))
return etree.ElementTree(self.root)
- def parseChunk(self, parent, text):
- """ Parse a chunk of markdown text and attach to given etree node.
+ def parseChunk(self, parent: etree.Element, text: str) -> None:
+ """ Parse a chunk of Markdown text and attach to given `etree` node.
- While the ``text`` argument is generally assumed to contain multiple
+ While the `text` argument is generally assumed to contain multiple
blocks which will be split on blank lines, it could contain only one
block. Generally, this method would be called by extensions when
block parsing is required.
- The ``parent`` etree Element passed in is altered in place.
+ The `parent` `etree` Element passed in is altered in place.
Nothing is returned.
+ Arguments:
+ parent: The parent element.
+ text: The text to parse.
+
"""
self.parseBlocks(parent, text.split('\n\n'))
- def parseBlocks(self, parent, blocks):
- """ Process blocks of markdown text and attach to given etree node.
+ def parseBlocks(self, parent: etree.Element, blocks: list[str]) -> None:
+ """ Process blocks of Markdown text and attach to given `etree` node.
- Given a list of ``blocks``, each blockprocessor is stepped through
+ Given a list of `blocks`, each `blockprocessor` is stepped through
until there are no blocks left. While an extension could potentially
call this method directly, it's generally expected to be used
internally.
This is a public method as an extension may need to add/alter
- additional BlockProcessors which call this method to recursively
+ additional `BlockProcessors` which call this method to recursively
parse a nested block.
+ Arguments:
+ parent: The parent element.
+ blocks: The blocks of text to parse.
+
"""
while blocks:
for processor in self.blockprocessors:
diff --git a/libs/markdown/blockprocessors.py b/libs/markdown/blockprocessors.py
index 3d0ff86eb..d2020b9b6 100644
--- a/libs/markdown/blockprocessors.py
+++ b/libs/markdown/blockprocessors.py
@@ -1,45 +1,45 @@
-"""
-Python Markdown
-
-A Python implementation of John Gruber's Markdown.
+# Python Markdown
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# A Python implementation of John Gruber's Markdown.
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-License: BSD (see LICENSE.md for details).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-CORE MARKDOWN BLOCKPARSER
-===========================================================================
+# License: BSD (see LICENSE.md for details).
-This parser handles basic parsing of Markdown blocks. It doesn't concern
-itself with inline elements such as **bold** or *italics*, but rather just
-catches blocks, lists, quotes, etc.
-
-The BlockParser is made up of a bunch of BlockProcessors, each handling a
-different type of block. Extensions may add/replace/remove BlockProcessors
-as they need to alter how markdown blocks are parsed.
+"""
+A block processor parses blocks of text and adds new elements to the ElementTree. Blocks of text,
+separated from other text by blank lines, may have a different syntax and produce a differently
+structured tree than other Markdown. Block processors excel at handling code formatting, equation
+layouts, tables, etc.
"""
+from __future__ import annotations
+
import logging
import re
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Any
from . import util
from .blockparser import BlockParser
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+
logger = logging.getLogger('MARKDOWN')
-def build_block_parser(md, **kwargs):
+def build_block_parser(md: Markdown, **kwargs: Any) -> BlockParser:
""" Build the default block parser used by Markdown. """
parser = BlockParser(md)
parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100)
@@ -60,25 +60,29 @@ class BlockProcessor:
""" Base class for block processors.
Each subclass will provide the methods below to work with the source and
- tree. Each processor will need to define it's own ``test`` and ``run``
- methods. The ``test`` method should return True or False, to indicate
+ tree. Each processor will need to define it's own `test` and `run`
+ methods. The `test` method should return True or False, to indicate
whether the current block should be processed by this processor. If the
- test passes, the parser will call the processors ``run`` method.
+ test passes, the parser will call the processors `run` method.
+
+ Attributes:
+ BlockProcessor.parser (BlockParser): The `BlockParser` instance this is attached to.
+ BlockProcessor.tab_length (int): The tab length set on the `Markdown` instance.
"""
- def __init__(self, parser):
+ def __init__(self, parser: BlockParser):
self.parser = parser
self.tab_length = parser.md.tab_length
- def lastChild(self, parent):
- """ Return the last child of an etree element. """
+ def lastChild(self, parent: etree.Element) -> etree.Element | None:
+ """ Return the last child of an `etree` element. """
if len(parent):
return parent[-1]
else:
return None
- def detab(self, text, length=None):
+ def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
""" Remove a tab from the front of each line of the given text. """
if length is None:
length = self.tab_length
@@ -93,7 +97,7 @@ class BlockProcessor:
break
return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
- def looseDetab(self, text, level=1):
+ def looseDetab(self, text: str, level: int = 1) -> str:
""" Remove a tab from front of lines but allowing dedented lines. """
lines = text.split('\n')
for i in range(len(lines)):
@@ -101,47 +105,47 @@ class BlockProcessor:
lines[i] = lines[i][self.tab_length*level:]
return '\n'.join(lines)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
""" Test for block type. Must be overridden by subclasses.
- As the parser loops through processors, it will call the ``test``
+ As the parser loops through processors, it will call the `test`
method on each to determine if the given block of text is of that
- type. This method must return a boolean ``True`` or ``False``. The
+ type. This method must return a boolean `True` or `False`. The
actual method of testing is left to the needs of that particular
- block type. It could be as simple as ``block.startswith(some_string)``
+ block type. It could be as simple as `block.startswith(some_string)`
or a complex regular expression. As the block type may be different
depending on the parent of the block (i.e. inside a list), the parent
- etree element is also provided and may be used as part of the test.
+ `etree` element is also provided and may be used as part of the test.
- Keywords:
-
- * ``parent``: A etree element which will be the parent of the block.
- * ``block``: A block of text from the source which has been split at
- blank lines.
+ Keyword arguments:
+ parent: An `etree` element which will be the parent of the block.
+ block: A block of text from the source which has been split at blank lines.
"""
pass # pragma: no cover
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
""" Run processor. Must be overridden by subclasses.
When the parser determines the appropriate type of a block, the parser
- will call the corresponding processor's ``run`` method. This method
+ will call the corresponding processor's `run` method. This method
should parse the individual lines of the block and append them to
- the etree.
+ the `etree`.
- Note that both the ``parent`` and ``etree`` keywords are pointers
+ Note that both the `parent` and `etree` keywords are pointers
to instances of the objects which should be edited in place. Each
processor must make changes to the existing objects as there is no
mechanism to return new/different objects to replace them.
- This means that this method should be adding SubElements or adding text
- to the parent, and should remove (``pop``) or add (``insert``) items to
+ This means that this method should be adding `SubElements` or adding text
+ to the parent, and should remove (`pop`) or add (`insert`) items to
the list of blocks.
- Keywords:
+ If `False` is returned, this will have the same effect as returning `False`
+ from the `test` method.
- * ``parent``: A etree element which is the parent of the current block.
- * ``blocks``: A list of all remaining blocks of the document.
+ Keyword arguments:
+ parent: An `etree` element which is the parent of the current block.
+ blocks: A list of all remaining blocks of the document.
"""
pass # pragma: no cover
@@ -149,7 +153,8 @@ class BlockProcessor:
class ListIndentProcessor(BlockProcessor):
""" Process children of list items.
- Example:
+ Example
+
* a list item
process this part
@@ -158,46 +163,48 @@ class ListIndentProcessor(BlockProcessor):
"""
ITEM_TYPES = ['li']
+ """ List of tags used for list items. """
LIST_TYPES = ['ul', 'ol']
+ """ Types of lists this processor can operate on. """
def __init__(self, *args):
super().__init__(*args)
self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return block.startswith(' '*self.tab_length) and \
not self.parser.state.isstate('detabbed') and \
(parent.tag in self.ITEM_TYPES or
(len(parent) and parent[-1] is not None and
(parent[-1].tag in self.LIST_TYPES)))
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
level, sibling = self.get_level(parent, block)
block = self.looseDetab(block, level)
self.parser.state.set('detabbed')
if parent.tag in self.ITEM_TYPES:
- # It's possible that this parent has a 'ul' or 'ol' child list
+ # It's possible that this parent has a `ul` or `ol` child list
# with a member. If that is the case, then that should be the
# parent. This is intended to catch the edge case of an indented
# list whose first member was parsed previous to this point
- # see OListProcessor
+ # see `OListProcessor`
if len(parent) and parent[-1].tag in self.LIST_TYPES:
self.parser.parseBlocks(parent[-1], [block])
else:
- # The parent is already a li. Just parse the child block.
+ # The parent is already a `li`. Just parse the child block.
self.parser.parseBlocks(parent, [block])
elif sibling.tag in self.ITEM_TYPES:
- # The sibling is a li. Use it as parent.
+ # The sibling is a `li`. Use it as parent.
self.parser.parseBlocks(sibling, [block])
elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES:
- # The parent is a list (``ol`` or ``ul``) which has children.
- # Assume the last child li is the parent of this block.
+ # The parent is a list (`ol` or `ul`) which has children.
+ # Assume the last child `li` is the parent of this block.
if sibling[-1].text:
- # If the parent li has text, that text needs to be moved to a p
- # The p must be 'inserted' at beginning of list in the event
- # that other children already exist i.e.; a nested sublist.
+ # If the parent `li` has text, that text needs to be moved to a `p`
+ # The `p` must be 'inserted' at beginning of list in the event
+ # that other children already exist i.e.; a nested sub-list.
p = etree.Element('p')
p.text = sibling[-1].text
sibling[-1].text = ''
@@ -207,13 +214,13 @@ class ListIndentProcessor(BlockProcessor):
self.create_item(sibling, block)
self.parser.state.reset()
- def create_item(self, parent, block):
- """ Create a new li and parse the block with it as the parent. """
+ def create_item(self, parent: etree.Element, block: str) -> None:
+ """ Create a new `li` and parse the block with it as the parent. """
li = etree.SubElement(parent, 'li')
self.parser.parseBlocks(li, [block])
- def get_level(self, parent, block):
- """ Get level of indent based on list level. """
+ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Element]:
+ """ Get level of indentation based on list level. """
# Get indent level
m = self.INDENT_RE.match(block)
if m:
@@ -221,10 +228,10 @@ class ListIndentProcessor(BlockProcessor):
else:
indent_level = 0
if self.parser.state.isstate('list'):
- # We're in a tightlist - so we already are at correct parent.
+ # We're in a tight-list - so we already are at correct parent.
level = 1
else:
- # We're in a looselist - so we need to find parent.
+ # We're in a loose-list - so we need to find parent.
level = 0
# Step through children of tree to find matching indent level.
while indent_level > level:
@@ -235,7 +242,7 @@ class ListIndentProcessor(BlockProcessor):
level += 1
parent = child
else:
- # No more child levels. If we're short of indent_level,
+ # No more child levels. If we're short of `indent_level`,
# we have a code block. So we stop here.
break
return level, parent
@@ -244,10 +251,10 @@ class ListIndentProcessor(BlockProcessor):
class CodeBlockProcessor(BlockProcessor):
""" Process code blocks. """
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return block.startswith(' '*self.tab_length)
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
sibling = self.lastChild(parent)
block = blocks.pop(0)
theRest = ''
@@ -255,14 +262,14 @@ class CodeBlockProcessor(BlockProcessor):
len(sibling) and sibling[0].tag == "code"):
# The previous block was a code block. As blank lines do not start
# new code blocks, append this block to the previous, adding back
- # linebreaks removed from the split into a list.
+ # line breaks removed from the split into a list.
code = sibling[0]
block, theRest = self.detab(block)
code.text = util.AtomicString(
'{}\n{}\n'.format(code.text, util.code_escape(block.rstrip()))
)
else:
- # This is a new codeblock. Create the elements and insert text.
+ # This is a new code block. Create the elements and insert text.
pre = etree.SubElement(parent, 'pre')
code = etree.SubElement(pre, 'code')
block, theRest = self.detab(block)
@@ -275,20 +282,21 @@ class CodeBlockProcessor(BlockProcessor):
class BlockQuoteProcessor(BlockProcessor):
+ """ Process blockquotes. """
RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
m = self.RE.search(block)
if m:
before = block[:m.start()] # Lines before blockquote
# Pass lines before blockquote in recursively for parsing first.
self.parser.parseBlocks(parent, [before])
- # Remove ``> `` from beginning of each line.
+ # Remove `> ` from beginning of each line.
block = '\n'.join(
[self.clean(line) for line in block[m.start():].split('\n')]
)
@@ -300,13 +308,13 @@ class BlockQuoteProcessor(BlockProcessor):
# This is a new blockquote. Create a new parent element.
quote = etree.SubElement(parent, 'blockquote')
# Recursively parse block with blockquote as parent.
- # change parser state so blockquotes embedded in lists use p tags
+ # change parser state so blockquotes embedded in lists use `p` tags
self.parser.state.set('blockquote')
self.parser.parseChunk(quote, block)
self.parser.state.reset()
- def clean(self, line):
- """ Remove ``>`` from beginning of a line. """
+ def clean(self, line: str) -> str:
+ """ Remove `>` from beginning of a line. """
m = self.RE.match(line)
if line.strip() == ">":
return ""
@@ -319,20 +327,24 @@ class BlockQuoteProcessor(BlockProcessor):
class OListProcessor(BlockProcessor):
""" Process ordered list blocks. """
- TAG = 'ol'
- # The integer (python string) with which the lists starts (default=1)
- # Eg: If list is initialized as)
- # 3. Item
- # The ol tag will get starts="3" attribute
- STARTSWITH = '1'
- # Lazy ol - ignore startswith
- LAZY_OL = True
- # List of allowed sibling tags.
- SIBLING_TAGS = ['ol', 'ul']
-
- def __init__(self, parser):
+ TAG: str = 'ol'
+ """ The tag used for the the wrapping element. """
+ STARTSWITH: str = '1'
+ """
+ The integer (as a string ) with which the list starts. For example, if a list is initialized as
+ `3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
+ """
+ LAZY_OL: bool = True
+ """ Ignore `STARTSWITH` if `True`. """
+ SIBLING_TAGS: list[str] = ['ol', 'ul']
+ """
+ Markdown does not require the type of a new list item match the previous list item type.
+ This is the list of types which can be mixed.
+ """
+
+ def __init__(self, parser: BlockParser):
super().__init__(parser)
- # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+ # Detect an item (`1. item`). `group(1)` contains contents of item.
self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
# Detect items on secondary lines. they can be of either list type.
self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' %
@@ -341,28 +353,28 @@ class OListProcessor(BlockProcessor):
self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
(self.tab_length, self.tab_length * 2 - 1))
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.match(block))
- def run(self, parent, blocks):
- # Check fr multiple items in one block.
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
+ # Check for multiple items in one block.
items = self.get_items(blocks.pop(0))
sibling = self.lastChild(parent)
if sibling is not None and sibling.tag in self.SIBLING_TAGS:
# Previous block was a list item, so set that as parent
lst = sibling
- # make sure previous item is in a p- if the item has text,
- # then it isn't in a p
+ # make sure previous item is in a `p` - if the item has text,
+ # then it isn't in a `p`
if lst[-1].text:
# since it's possible there are other children for this
- # sibling, we can't just SubElement the p, we need to
+ # sibling, we can't just `SubElement` the `p`, we need to
# insert it as the first item.
p = etree.Element('p')
p.text = lst[-1].text
lst[-1].text = ''
lst[-1].insert(0, p)
- # if the last item has a tail, then the tail needs to be put in a p
+ # if the last item has a tail, then the tail needs to be put in a `p`
# likely only when a header is not followed by a blank line
lch = self.lastChild(lst[-1])
if lch is not None and lch.tail:
@@ -370,7 +382,7 @@ class OListProcessor(BlockProcessor):
p.text = lch.tail.lstrip()
lch.tail = ''
- # parse first block differently as it gets wrapped in a p.
+ # parse first block differently as it gets wrapped in a `p`.
li = etree.SubElement(lst, 'li')
self.parser.state.set('looselist')
firstitem = items.pop(0)
@@ -379,9 +391,9 @@ class OListProcessor(BlockProcessor):
elif parent.tag in ['ol', 'ul']:
# this catches the edge case of a multi-item indented list whose
# first item is in a blank parent-list item:
- # * * subitem1
- # * subitem2
- # see also ListIndentProcessor
+ # * * subitem1
+ # * subitem2
+ # see also `ListIndentProcessor`
lst = parent
else:
# This is a new list so create parent with appropriate tag.
@@ -398,12 +410,12 @@ class OListProcessor(BlockProcessor):
# Item is indented. Parse with last item as parent
self.parser.parseBlocks(lst[-1], [item])
else:
- # New item. Create li and parse with it as parent
+ # New item. Create `li` and parse with it as parent
li = etree.SubElement(lst, 'li')
self.parser.parseBlocks(li, [item])
self.parser.state.reset()
- def get_items(self, block):
+ def get_items(self, block: str) -> list[str]:
""" Break a block into list items. """
items = []
for line in block.split('\n'):
@@ -433,11 +445,12 @@ class OListProcessor(BlockProcessor):
class UListProcessor(OListProcessor):
""" Process unordered list blocks. """
- TAG = 'ul'
+ TAG: str = 'ul'
+ """ The tag used for the the wrapping element. """
- def __init__(self, parser):
+ def __init__(self, parser: BlockParser):
super().__init__(parser)
- # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+ # Detect an item (`1. item`). `group(1)` contains contents of item.
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
@@ -447,10 +460,10 @@ class HashHeaderProcessor(BlockProcessor):
# Detect a header at start of any line in block
RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.search(block))
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
m = self.RE.search(block)
if m:
@@ -478,12 +491,12 @@ class SetextHeaderProcessor(BlockProcessor):
# Detect Setext-style header. Must be first 2 lines of block.
RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.match(block))
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
lines = blocks.pop(0).split('\n')
- # Determine level. ``=`` is 1 and ``-`` is 2.
+ # Determine level. `=` is 1 and `-` is 2.
if lines[1].startswith('='):
level = 1
else:
@@ -498,13 +511,13 @@ class SetextHeaderProcessor(BlockProcessor):
class HRProcessor(BlockProcessor):
""" Process Horizontal Rules. """
- # Python's re module doesn't officially support atomic grouping. However you can fake it.
+ # Python's `re` module doesn't officially support atomic grouping. However you can fake it.
# See https://stackoverflow.com/a/13577411/866026
RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
# Detect hr on any line of a block.
SEARCH_RE = re.compile(RE, re.MULTILINE)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
m = self.SEARCH_RE.search(block)
if m:
# Save match object on class instance so we can use it later.
@@ -512,30 +525,30 @@ class HRProcessor(BlockProcessor):
return True
return False
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
match = self.match
- # Check for lines in block before hr.
+ # Check for lines in block before `hr`.
prelines = block[:match.start()].rstrip('\n')
if prelines:
- # Recursively parse lines before hr so they get parsed first.
+ # Recursively parse lines before `hr` so they get parsed first.
self.parser.parseBlocks(parent, [prelines])
# create hr
etree.SubElement(parent, 'hr')
- # check for lines in block after hr.
+ # check for lines in block after `hr`.
postlines = block[match.end():].lstrip('\n')
if postlines:
- # Add lines after hr to master blocks for later parsing.
+ # Add lines after `hr` to master blocks for later parsing.
blocks.insert(0, postlines)
class EmptyBlockProcessor(BlockProcessor):
""" Process blocks that are empty or start with an empty line. """
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return not block or block.startswith('\n')
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
filler = '\n\n'
if block:
@@ -550,7 +563,7 @@ class EmptyBlockProcessor(BlockProcessor):
sibling = self.lastChild(parent)
if (sibling is not None and sibling.tag == 'pre' and
len(sibling) and sibling[0].tag == 'code'):
- # Last block is a codeblock. Append to preserve whitespace.
+ # Last block is a code block. Append to preserve whitespace.
sibling[0].text = util.AtomicString(
'{}{}'.format(sibling[0].text, filler)
)
@@ -562,10 +575,10 @@ class ReferenceProcessor(BlockProcessor):
r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return True
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
block = blocks.pop(0)
m = self.RE.search(block)
if m:
@@ -588,10 +601,10 @@ class ReferenceProcessor(BlockProcessor):
class ParagraphProcessor(BlockProcessor):
""" Process Paragraph blocks. """
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return True
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
if block.strip():
# Not a blank block. Add to parent, otherwise throw it away.
@@ -606,7 +619,7 @@ class ParagraphProcessor(BlockProcessor):
# Line 2 of list item - not part of header.
sibling = self.lastChild(parent)
if sibling is not None:
- # Insetrt after sibling.
+ # Insert after sibling.
if sibling.tail:
sibling.tail = '{}\n{}'.format(sibling.tail, block)
else:
diff --git a/libs/markdown/core.py b/libs/markdown/core.py
index f6a171c11..6c7a21be9 100644
--- a/libs/markdown/core.py
+++ b/libs/markdown/core.py
@@ -1,28 +1,29 @@
-"""
-Python Markdown
+# Python Markdown
-A Python implementation of John Gruber's Markdown.
+# A Python implementation of John Gruber's Markdown.
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see LICENSE.md for details).
-"""
+# License: BSD (see LICENSE.md for details).
+
+from __future__ import annotations
import codecs
import sys
import logging
import importlib
+from typing import TYPE_CHECKING, Any, BinaryIO, Callable, ClassVar, Mapping, Sequence
from . import util
from .preprocessors import build_preprocessors
from .blockprocessors import build_block_parser
@@ -31,6 +32,10 @@ from .inlinepatterns import build_inlinepatterns
from .postprocessors import build_postprocessors
from .extensions import Extension
from .serializers import to_html_string, to_xhtml_string
+from .util import BLOCK_LEVEL_ELEMENTS
+
+if TYPE_CHECKING: # pragma: no cover
+ from xml.etree.ElementTree import Element
__all__ = ['Markdown', 'markdown', 'markdownFromFile']
@@ -39,67 +44,111 @@ logger = logging.getLogger('MARKDOWN')
class Markdown:
- """Convert Markdown to HTML."""
+ """
+ A parser which converts Markdown to HTML.
+
+ Attributes:
+ Markdown.tab_length (int): The number of spaces which correspond to a single tab. Default: `4`.
+ Markdown.ESCAPED_CHARS (list[str]): List of characters which get the backslash escape treatment.
+ Markdown.block_level_elements (list[str]): List of HTML tags which get treated as block-level elements.
+ See [`markdown.util.BLOCK_LEVEL_ELEMENTS`][] for the full list of elements.
+ Markdown.registeredExtensions (list[Extension]): List of extensions which have called
+ [`registerExtension`][markdown.Markdown.registerExtension] during setup.
+ Markdown.doc_tag (str): Element used to wrap document. Default: `div`.
+ Markdown.stripTopLevelTags (bool): Indicates whether the `doc_tag` should be removed. Default: 'True'.
+ Markdown.references (dict[str, tuple[str, str]]): A mapping of link references found in a parsed document
+ where the key is the reference name and the value is a tuple of the URL and title.
+ Markdown.htmlStash (util.HtmlStash): The instance of the `HtmlStash` used by an instance of this class.
+ Markdown.output_formats (dict[str, Callable[xml.etree.ElementTree.Element]]): A mapping of known output
+ formats by name and their respective serializers. Each serializer must be a callable which accepts an
+ [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
+ Markdown.output_format (str): The output format set by
+ [`set_output_format`][markdown.Markdown.set_output_format].
+ Markdown.serializer (Callable[xml.etree.ElementTree.Element]): The serializer set by
+ [`set_output_format`][markdown.Markdown.set_output_format].
+ Markdown.preprocessors (util.Registry): A collection of [`preprocessors`][markdown.preprocessors].
+ Markdown.parser (blockparser.BlockParser): A collection of [`blockprocessors`][markdown.blockprocessors].
+ Markdown.inlinePatterns (util.Registry): A collection of [`inlinepatterns`][markdown.inlinepatterns].
+ Markdown.treeprocessors (util.Registry): A collection of [`treeprocessors`][markdown.treeprocessors].
+ Markdown.postprocessors (util.Registry): A collection of [`postprocessors`][markdown.postprocessors].
+
+ """
doc_tag = "div" # Element used to wrap document - later removed
- output_formats = {
+ output_formats: ClassVar[dict[str, Callable[[Element], str]]] = {
'html': to_html_string,
'xhtml': to_xhtml_string,
}
+ """
+ A mapping of known output formats by name and their respective serializers. Each serializer must be a
+ callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
+ """
def __init__(self, **kwargs):
"""
Creates a new Markdown instance.
- Keyword arguments:
+ Keyword Arguments:
+ extensions (list[Extension | str]): A list of extensions.
- * extensions: A list of extensions.
- If an item is an instance of a subclass of `markdown.extension.Extension`, the instance will be used
- as-is. If an item is of type string, first an entry point will be loaded. If that fails, the string is
- assumed to use Python dot notation (`path.to.module:ClassName`) to load a markdown.Extension subclass. If
- no class is specified, then a `makeExtension` function is called within the specified module.
- * extension_configs: Configuration settings for extensions.
- * output_format: Format of output. Supported formats are:
- * "xhtml": Outputs XHTML style tags. Default.
- * "html": Outputs HTML style tags.
- * tab_length: Length of tabs in the source. Default: 4
+ If an item is an instance of a subclass of [`markdown.extensions.Extension`][],
+ the instance will be used as-is. If an item is of type `str`, it is passed
+ to [`build_extension`][markdown.Markdown.build_extension] with its corresponding
+ `extension_configs` and the returned instance of [`markdown.extensions.Extension`][]
+ is used.
+ extension_configs (dict[str, dict[str, Any]]): Configuration settings for extensions.
+ output_format (str): Format of output. Supported formats are:
+
+ * `xhtml`: Outputs XHTML style tags. Default.
+ * `html`: Outputs HTML style tags.
+ tab_length (int): Length of tabs in the source. Default: `4`
"""
- self.tab_length = kwargs.get('tab_length', 4)
-
- self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
- '(', ')', '>', '#', '+', '-', '.', '!']
-
- self.block_level_elements = [
- # Elements which are invalid to wrap in a `<p>` tag.
- # See https://w3c.github.io/html/grouping-content.html#the-p-element
- 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
- 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
- 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
- 'p', 'pre', 'section', 'table', 'ul',
- # Other elements which Markdown should not be mucking up the contents of.
- 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend',
- 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
- 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
+ self.tab_length: int = kwargs.get('tab_length', 4)
+
+ self.ESCAPED_CHARS: list[str] = [
+ '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!'
]
+ """ List of characters which get the backslash escape treatment. """
- self.registeredExtensions = []
- self.docType = ""
- self.stripTopLevelTags = True
+ self.block_level_elements: list[str] = BLOCK_LEVEL_ELEMENTS.copy()
+
+ self.registeredExtensions: list[Extension] = []
+ self.docType = "" # TODO: Maybe delete this. It does not appear to be used anymore.
+ self.stripTopLevelTags: bool = True
self.build_parser()
- self.references = {}
- self.htmlStash = util.HtmlStash()
+ self.references: dict[str, tuple[str, str]] = {}
+ self.htmlStash: util.HtmlStash = util.HtmlStash()
self.registerExtensions(extensions=kwargs.get('extensions', []),
configs=kwargs.get('extension_configs', {}))
self.set_output_format(kwargs.get('output_format', 'xhtml'))
self.reset()
- def build_parser(self):
- """ Build the parser from the various parts. """
+ def build_parser(self) -> Markdown:
+ """
+ Build the parser from the various parts.
+
+ Assigns a value to each of the following attributes on the class instance:
+
+ * **`Markdown.preprocessors`** ([`Registry`][markdown.util.Registry]) -- A collection of
+ [`preprocessors`][markdown.preprocessors].
+ * **`Markdown.parser`** ([`BlockParser`][markdown.blockparser.BlockParser]) -- A collection of
+ [`blockprocessors`][markdown.blockprocessors].
+ * **`Markdown.inlinePatterns`** ([`Registry`][markdown.util.Registry]) -- A collection of
+ [`inlinepatterns`][markdown.inlinepatterns].
+ * **`Markdown.treeprocessors`** ([`Registry`][markdown.util.Registry]) -- A collection of
+ [`treeprocessors`][markdown.treeprocessors].
+ * **`Markdown.postprocessors`** ([`Registry`][markdown.util.Registry]) -- A collection of
+ [`postprocessors`][markdown.postprocessors].
+
+ This method could be redefined in a subclass to build a custom parser which is made up of a different
+ combination of processors and patterns.
+
+ """
self.preprocessors = build_preprocessors(self)
self.parser = build_block_parser(self)
self.inlinePatterns = build_inlinepatterns(self)
@@ -107,15 +156,22 @@ class Markdown:
self.postprocessors = build_postprocessors(self)
return self
- def registerExtensions(self, extensions, configs):
+ def registerExtensions(
+ self,
+ extensions: Sequence[Extension | str],
+ configs: Mapping[str, dict[str, Any]]
+ ) -> Markdown:
"""
- Register extensions with this instance of Markdown.
+ Load a list of extensions into an instance of the `Markdown` class.
- Keyword arguments:
+ Arguments:
+ extensions (list[Extension | str]): A list of extensions.
- * extensions: A list of extensions, which can either
- be strings or objects.
- * configs: A dictionary mapping extension names to config options.
+ If an item is an instance of a subclass of [`markdown.extensions.Extension`][],
+ the instance will be used as-is. If an item is of type `str`, it is passed
+ to [`build_extension`][markdown.Markdown.build_extension] with its corresponding `configs` and the
+ returned instance of [`markdown.extensions.Extension`][] is used.
+ configs (dict[str, dict[str, Any]]): Configuration settings for extensions.
"""
for ext in extensions:
@@ -136,17 +192,24 @@ class Markdown:
)
return self
- def build_extension(self, ext_name, configs):
+ def build_extension(self, ext_name: str, configs: Mapping[str, Any]) -> Extension:
"""
- Build extension from a string name, then return an instance.
+ Build extension from a string name, then return an instance using the given `configs`.
+
+ Arguments:
+ ext_name: Name of extension as a string.
+ configs: Configuration settings for extension.
+
+ Returns:
+ An instance of the extension with the given configuration settings.
First attempt to load an entry point. The string name must be registered as an entry point in the
- `markdown.extensions` group which points to a subclass of the `markdown.extensions.Extension` class.
+ `markdown.extensions` group which points to a subclass of the [`markdown.extensions.Extension`][] class.
If multiple distributions have registered the same name, the first one found is returned.
If no entry point is found, assume dot notation (`path.to.module:ClassName`). Load the specified class and
return an instance. If no class is specified, import the module and call a `makeExtension` function and return
- the Extension instance returned by that function.
+ the [`markdown.extensions.Extension`][] instance returned by that function.
"""
configs = dict(configs)
@@ -172,7 +235,7 @@ class Markdown:
# Load given class name from module.
return getattr(module, class_name)(**configs)
else:
- # Expect makeExtension() function to return a class.
+ # Expect `makeExtension()` function to return a class.
try:
return module.makeExtension(**configs)
except AttributeError as e:
@@ -182,14 +245,27 @@ class Markdown:
e.args = (message,) + e.args[1:]
raise
- def registerExtension(self, extension):
- """ This gets called by the extension """
+ def registerExtension(self, extension: Extension) -> Markdown:
+ """
+ Register an extension as having a resettable state.
+
+ Arguments:
+ extension: An instance of the extension to register.
+
+ This should get called once by an extension during setup. A "registered" extension's
+ `reset` method is called by [`Markdown.reset()`][markdown.Markdown.reset]. Not all extensions have or need a
+ resettable state, and so it should not be assumed that all extensions are "registered."
+
+ """
self.registeredExtensions.append(extension)
return self
- def reset(self):
+ def reset(self) -> Markdown:
"""
- Resets all state variables so that we can start with a new text.
+ Resets all state variables to prepare the parser instance for new input.
+
+ Called once upon creation of a class instance. Should be called manually between calls
+ to [`Markdown.convert`][markdown.Markdown.convert].
"""
self.htmlStash.reset()
self.references.clear()
@@ -200,9 +276,15 @@ class Markdown:
return self
- def set_output_format(self, format):
- """ Set the output format for the class instance. """
- self.output_format = format.lower().rstrip('145') # ignore num
+ def set_output_format(self, format: str) -> Markdown:
+ """
+ Set the output format for the class instance.
+
+ Arguments:
+ format: Must be a known value in `Markdown.output_formats`.
+
+ """
+ self.output_format = format.lower().rstrip('145') # ignore number
try:
self.serializer = self.output_formats[self.output_format]
except KeyError as e:
@@ -215,44 +297,55 @@ class Markdown:
raise
return self
- def is_block_level(self, tag):
- """Check if the tag is a block level HTML tag."""
+ # Note: the `tag` argument is type annotated `Any` as ElementTree uses many various objects as tags.
+ # As there is no standardization in ElementTree, the type of a given tag is unpredictable.
+ def is_block_level(self, tag: Any) -> bool:
+ """
+ Check if the given `tag` is a block level HTML tag.
+
+ Returns `True` for any string listed in `Markdown.block_level_elements`. A `tag` which is
+ not a string always returns `False`.
+
+ """
if isinstance(tag, str):
return tag.lower().rstrip('/') in self.block_level_elements
# Some ElementTree tags are not strings, so return False.
return False
- def convert(self, source):
+ def convert(self, source: str) -> str:
"""
- Convert markdown to serialized XHTML or HTML.
+ Convert a Markdown string to a string in the specified output format.
- Keyword arguments:
+ Arguments:
+ source: Markdown formatted text as Unicode or ASCII string.
- * source: Source text as a Unicode string.
+ Returns:
+ A string in the specified output format.
- Markdown processing takes place in five steps:
+ Markdown parsing takes place in five steps:
- 1. A bunch of "preprocessors" munge the input text.
- 2. BlockParser() parses the high-level structural elements of the
- pre-processed text into an ElementTree.
- 3. A bunch of "treeprocessors" are run against the ElementTree. One
- such treeprocessor runs InlinePatterns against the ElementTree,
- detecting inline markup.
- 4. Some post-processors are run against the text after the ElementTree
- has been serialized into text.
- 5. The output is written to a string.
+ 1. A bunch of [`preprocessors`][markdown.preprocessors] munge the input text.
+ 2. A [`BlockParser`][markdown.blockparser.BlockParser] parses the high-level structural elements of the
+ pre-processed text into an [`ElementTree`][xml.etree.ElementTree.ElementTree] object.
+ 3. A bunch of [`treeprocessors`][markdown.treeprocessors] are run against the
+ [`ElementTree`][xml.etree.ElementTree.ElementTree] object. One such `treeprocessor`
+ ([`markdown.treeprocessors.InlineProcessor`][]) runs [`inlinepatterns`][markdown.inlinepatterns]
+ against the [`ElementTree`][xml.etree.ElementTree.ElementTree] object, parsing inline markup.
+ 4. Some [`postprocessors`][markdown.postprocessors] are run against the text after the
+ [`ElementTree`][xml.etree.ElementTree.ElementTree] object has been serialized into text.
+ 5. The output is returned as a string.
"""
- # Fixup the source text
+ # Fix up the source text
if not source.strip():
- return '' # a blank unicode string
+ return '' # a blank Unicode string
try:
source = str(source)
except UnicodeDecodeError as e: # pragma: no cover
- # Customise error message while maintaining original trackback
- e.reason += '. -- Note: Markdown only accepts unicode input!'
+ # Customize error message while maintaining original traceback
+ e.reason += '. -- Note: Markdown only accepts Unicode input!'
raise
# Split into lines and run the line preprocessors.
@@ -292,24 +385,30 @@ class Markdown:
return output.strip()
- def convertFile(self, input=None, output=None, encoding=None):
- """Converts a markdown file and returns the HTML as a unicode string.
+ def convertFile(
+ self,
+ input: str | BinaryIO | None = None,
+ output: str | BinaryIO | None = None,
+ encoding: str | None = None,
+ ) -> Markdown:
+ """
+ Converts a Markdown file and returns the HTML as a Unicode string.
- Decodes the file using the provided encoding (defaults to utf-8),
- passes the file content to markdown, and outputs the html to either
+ Decodes the file using the provided encoding (defaults to `utf-8`),
+ passes the file content to markdown, and outputs the HTML to either
the provided stream or the file with provided name, using the same
- encoding as the source file. The 'xmlcharrefreplace' error handler is
- used when encoding the output.
+ encoding as the source file. The
+ [`xmlcharrefreplace`](https://docs.python.org/3/library/codecs.html#error-handlers)
+ error handler is used when encoding the output.
- **Note:** This is the only place that decoding and encoding of unicode
- takes place in Python-Markdown. (All other code is unicode-in /
- unicode-out.)
+ **Note:** This is the only place that decoding and encoding of Unicode
+ takes place in Python-Markdown. (All other code is Unicode-in /
+ Unicode-out.)
- Keyword arguments:
-
- * input: File object or path. Reads from stdin if `None`.
- * output: File object or path. Writes to stdout if `None`.
- * encoding: Encoding of input and output files. Defaults to utf-8.
+ Arguments:
+ input: File object or path. Reads from `stdin` if `None`.
+ output: File object or path. Writes to `stdout` if `None`.
+ encoding: Encoding of input and output files. Defaults to `utf-8`.
"""
@@ -325,8 +424,6 @@ class Markdown:
input_file.close()
else:
text = sys.stdin.read()
- if not isinstance(text, str): # pragma: no cover
- text = text.decode(encoding)
text = text.lstrip('\ufeff') # remove the byte-order mark
@@ -349,12 +446,7 @@ class Markdown:
else:
# Encode manually and write bytes to stdout.
html = html.encode(encoding, "xmlcharrefreplace")
- try:
- # Write bytes directly to buffer (Python 3).
- sys.stdout.buffer.write(html)
- except AttributeError: # pragma: no cover
- # Probably Python 2, which works with bytes by default.
- sys.stdout.write(html)
+ sys.stdout.buffer.write(html)
return self
@@ -363,42 +455,46 @@ class Markdown:
EXPORTED FUNCTIONS
=============================================================================
-Those are the two functions we really mean to export: markdown() and
-markdownFromFile().
+Those are the two functions we really mean to export: `markdown()` and
+`markdownFromFile()`.
"""
-def markdown(text, **kwargs):
- """Convert a markdown string to HTML and return HTML as a unicode string.
+def markdown(text: str, **kwargs: Any) -> str:
+ """
+ Convert a markdown string to HTML and return HTML as a Unicode string.
- This is a shortcut function for `Markdown` class to cover the most
- basic use case. It initializes an instance of Markdown, loads the
+ This is a shortcut function for [`Markdown`][markdown.Markdown] class to cover the most
+ basic use case. It initializes an instance of [`Markdown`][markdown.Markdown], loads the
necessary extensions and runs the parser on the given text.
- Keyword arguments:
+ Arguments:
+ text: Markdown formatted text as Unicode or ASCII string.
- * text: Markdown formatted text as Unicode or ASCII string.
- * Any arguments accepted by the Markdown class.
+ Keyword arguments:
+ **kwargs: Any arguments accepted by the Markdown class.
- Returns: An HTML document as a string.
+ Returns:
+ A string in the specified output format.
"""
md = Markdown(**kwargs)
return md.convert(text)
-def markdownFromFile(**kwargs):
- """Read markdown code from a file and write it to a file or a stream.
+def markdownFromFile(**kwargs: Any):
+ """
+ Read Markdown text from a file and write output to a file or a stream.
- This is a shortcut function which initializes an instance of Markdown,
- and calls the convertFile method rather than convert.
+ This is a shortcut function which initializes an instance of [`Markdown`][markdown.Markdown],
+ and calls the [`convertFile`][markdown.Markdown.convertFile] method rather than
+ [`convert`][markdown.Markdown.convert].
Keyword arguments:
-
- * input: a file name or readable object.
- * output: a file name or writable object.
- * encoding: Encoding of input and output.
- * Any arguments accepted by the Markdown class.
+ input (str | BinaryIO): A file name or readable object.
+ output (str | BinaryIO): A file name or writable object.
+ encoding (str): Encoding of input and output.
+ **kwargs: Any arguments accepted by the `Markdown` class.
"""
md = Markdown(**kwargs)
diff --git a/libs/markdown/extensions/__init__.py b/libs/markdown/extensions/__init__.py
index 2d8d72a1e..a5ec07b27 100644
--- a/libs/markdown/extensions/__init__.py
+++ b/libs/markdown/extensions/__init__.py
@@ -1,83 +1,142 @@
-"""
-Python Markdown
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
-A Python implementation of John Gruber's Markdown.
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# License: BSD (see LICENSE.md for details).
-License: BSD (see LICENSE.md for details).
"""
+Markdown accepts an [`Extension`][markdown.extensions.Extension] instance for each extension. Therefore, each extension
+must to define a class that extends [`Extension`][markdown.extensions.Extension] and over-rides the
+[`extendMarkdown`][markdown.extensions.Extension.extendMarkdown] method. Within this class one can manage configuration
+options for their extension and attach the various processors and patterns which make up an extension to the
+[`Markdown`][markdown.Markdown] instance.
+"""
+
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Iterable, Mapping
from ..util import parseBoolValue
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+
class Extension:
""" Base class for extensions to subclass. """
- # Default config -- to be overridden by a subclass
- # Must be of the following format:
- # {
- # 'key': ['value', 'description']
- # }
- # Note that Extension.setConfig will raise a KeyError
- # if a default is not set here.
- config = {}
+ config: Mapping[str, list] = {}
+ """
+ Default configuration for an extension.
+
+ This attribute is to be defined in a subclass and must be of the following format:
+
+ ``` python
+ config = {
+ 'key': ['value', 'description']
+ }
+ ```
+
+ Note that [`setConfig`][markdown.extensions.Extension.setConfig] will raise a [`KeyError`][]
+ if a default is not set for each option.
+ """
def __init__(self, **kwargs):
""" Initiate Extension and set up configs. """
self.setConfigs(kwargs)
- def getConfig(self, key, default=''):
- """ Return a setting for the given key or an empty string. """
+ def getConfig(self, key: str, default: Any = '') -> Any:
+ """
+ Return a single configuration option value.
+
+ Arguments:
+ key: The configuration option name.
+ default: Default value to return if key is not set.
+
+ Returns:
+ Value of stored configuration option.
+ """
if key in self.config:
return self.config[key][0]
else:
return default
- def getConfigs(self):
- """ Return all configs settings as a dict. """
+ def getConfigs(self) -> dict[str, Any]:
+ """
+ Return all configuration options.
+
+ Returns:
+ All configuration options.
+ """
return {key: self.getConfig(key) for key in self.config.keys()}
- def getConfigInfo(self):
- """ Return all config descriptions as a list of tuples. """
+ def getConfigInfo(self) -> list[tuple[str, str]]:
+ """
+ Return descriptions of all configuration options.
+
+ Returns:
+ All descriptions of configuration options.
+ """
return [(key, self.config[key][1]) for key in self.config.keys()]
- def setConfig(self, key, value):
- """ Set a config setting for `key` with the given `value`. """
+ def setConfig(self, key: str, value: Any) -> None:
+ """
+ Set a configuration option.
+
+ If the corresponding default value set in [`config`][markdown.extensions.Extension.config]
+ is a `bool` value or `None`, then `value` is passed through
+ [`parseBoolValue`][markdown.util.parseBoolValue] before being stored.
+
+ Arguments:
+ key: Name of configuration option to set.
+ value: Value to assign to option.
+
+ Raises:
+ KeyError: If `key` is not known.
+ """
if isinstance(self.config[key][0], bool):
value = parseBoolValue(value)
if self.config[key][0] is None:
value = parseBoolValue(value, preserve_none=True)
self.config[key][0] = value
- def setConfigs(self, items):
- """ Set multiple config settings given a dict or list of tuples. """
+ def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]) -> None:
+ """
+ Loop through a collection of configuration options, passing each to
+ [`setConfig`][markdown.extensions.Extension.setConfig].
+
+ Arguments:
+ items: Collection of configuration options.
+
+ Raises:
+ KeyError: for any unknown key.
+ """
if hasattr(items, 'items'):
# it's a dict
items = items.items()
for key, value in items:
self.setConfig(key, value)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
"""
Add the various processors and patterns to the Markdown Instance.
This method must be overridden by every extension.
- Keyword arguments:
-
- * md: The Markdown instance.
+ Arguments:
+ md: The Markdown instance.
"""
raise NotImplementedError(
diff --git a/libs/markdown/extensions/abbr.py b/libs/markdown/extensions/abbr.py
index 9879314f5..738368afe 100644
--- a/libs/markdown/extensions/abbr.py
+++ b/libs/markdown/extensions/abbr.py
@@ -1,20 +1,26 @@
-'''
-Abbreviation Extension for Python-Markdown
-==========================================
+# Abbreviation Extension for Python-Markdown
+# ==========================================
-This extension adds abbreviation handling to Python-Markdown.
+# This extension adds abbreviation handling to Python-Markdown.
+
+# See https://Python-Markdown.github.io/extensions/abbreviations
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/abbreviations>
-for documentation.
+# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/)
+# and [Seemant Kulleen](http://www.kulleen.org/)
-Oringinal code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and
- [Seemant Kulleen](http://www.kulleen.org/)
+# All changes Copyright 2008-2014 The Python Markdown Project
-All changes Copyright 2008-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
+
+"""
+This extension adds abbreviation handling to Python-Markdown.
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+See the [documentation](https://Python-Markdown.github.io/extensions/abbreviations)
+for details.
+"""
-'''
+from __future__ import annotations
from . import Extension
from ..blockprocessors import BlockProcessor
@@ -28,7 +34,7 @@ class AbbrExtension(Extension):
""" Abbreviation Extension for Python-Markdown. """
def extendMarkdown(self, md):
- """ Insert AbbrPreprocessor before ReferencePreprocessor. """
+ """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)
@@ -37,15 +43,15 @@ class AbbrPreprocessor(BlockProcessor):
RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return True
- def run(self, parent, blocks):
- '''
+ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
+ """
Find and remove all Abbreviation references from the text.
- Each reference is set as a new AbbrPattern in the markdown instance.
+ Each reference is set as a new `AbbrPattern` in the markdown instance.
- '''
+ """
block = blocks.pop(0)
m = self.RE.search(block)
if m:
@@ -65,8 +71,8 @@ class AbbrPreprocessor(BlockProcessor):
blocks.insert(0, block)
return False
- def _generate_pattern(self, text):
- '''
+ def _generate_pattern(self, text: str) -> str:
+ """
Given a string, returns an regex pattern to match that string.
'HTML' -> r'(?P<abbr>[H][T][M][L])'
@@ -74,7 +80,7 @@ class AbbrPreprocessor(BlockProcessor):
Note: we force each char as a literal match (in brackets) as we don't
know what they will be beforehand.
- '''
+ """
chars = list(text)
for i in range(len(chars)):
chars[i] = r'[%s]' % chars[i]
@@ -84,11 +90,11 @@ class AbbrPreprocessor(BlockProcessor):
class AbbrInlineProcessor(InlineProcessor):
""" Abbreviation inline pattern. """
- def __init__(self, pattern, title):
+ def __init__(self, pattern: str, title: str):
super().__init__(pattern)
self.title = title
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
abbr = etree.Element('abbr')
abbr.text = AtomicString(m.group('abbr'))
abbr.set('title', self.title)
diff --git a/libs/markdown/extensions/admonition.py b/libs/markdown/extensions/admonition.py
index cb8d9015c..01c2316d2 100644
--- a/libs/markdown/extensions/admonition.py
+++ b/libs/markdown/extensions/admonition.py
@@ -1,26 +1,39 @@
-"""
-Admonition extension for Python-Markdown
-========================================
+# Admonition extension for Python-Markdown
+# ========================================
-Adds rST-style admonitions. Inspired by [rST][] feature with the same name.
+# Adds rST-style admonitions. Inspired by [rST][] feature with the same name.
+
+# [rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions
-[rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions # noqa
+# See https://Python-Markdown.github.io/extensions/admonition
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/admonition>
-for documentation.
+# Original code Copyright [Tiago Serafim](https://www.tiagoserafim.com/).
-Original code Copyright [Tiago Serafim](https://www.tiagoserafim.com/).
+# All changes Copyright The Python Markdown Project
-All changes Copyright The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+Adds rST-style admonitions. Inspired by [rST][] feature with the same name.
+
+[rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions
+
+See the [documentation](https://Python-Markdown.github.io/extensions/admonition)
+for details.
+"""
+
+from __future__ import annotations
from . import Extension
from ..blockprocessors import BlockProcessor
import xml.etree.ElementTree as etree
import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import blockparser
class AdmonitionExtension(Extension):
@@ -40,15 +53,15 @@ class AdmonitionProcessor(BlockProcessor):
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
RE_SPACES = re.compile(' +')
- def __init__(self, parser):
+ def __init__(self, parser: blockparser.BlockParser):
"""Initialization."""
super().__init__(parser)
- self.current_sibling = None
- self.content_indention = 0
+ self.current_sibling: etree.Element | None = None
+ self.content_indent = 0
- def parse_content(self, parent, block):
+ def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]:
"""Get sibling admonition.
Retrieve the appropriate sibling element. This can get tricky when
@@ -69,23 +82,23 @@ class AdmonitionProcessor(BlockProcessor):
sibling = self.lastChild(parent)
- if sibling is None or sibling.get('class', '').find(self.CLASSNAME) == -1:
+ if sibling is None or sibling.tag != 'div' or sibling.get('class', '').find(self.CLASSNAME) == -1:
sibling = None
else:
# If the last child is a list and the content is sufficiently indented
# to be under it, then the content's sibling is in the list.
last_child = self.lastChild(sibling)
indent = 0
- while last_child:
+ while last_child is not None:
if (
- sibling and block.startswith(' ' * self.tab_length * 2) and
- last_child and last_child.tag in ('ul', 'ol', 'dl')
+ sibling is not None and block.startswith(' ' * self.tab_length * 2) and
+ last_child is not None and last_child.tag in ('ul', 'ol', 'dl')
):
- # The expectation is that we'll find an <li> or <dt>.
+ # The expectation is that we'll find an `<li>` or `<dt>`.
# We should get its last child as well.
sibling = self.lastChild(last_child)
- last_child = self.lastChild(sibling) if sibling else None
+ last_child = self.lastChild(sibling) if sibling is not None else None
# Context has been lost at this point, so we must adjust the
# text's indentation level so it will be evaluated correctly
@@ -106,14 +119,14 @@ class AdmonitionProcessor(BlockProcessor):
return sibling, block, the_rest
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
if self.RE.search(block):
return True
else:
return self.parse_content(parent, block)[0] is not None
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
m = self.RE.search(block)
@@ -151,11 +164,11 @@ class AdmonitionProcessor(BlockProcessor):
# list for future processing.
blocks.insert(0, theRest)
- def get_class_and_title(self, match):
+ def get_class_and_title(self, match: re.Match[str]) -> tuple[str, str | None]:
klass, title = match.group(1).lower(), match.group(2)
klass = self.RE_SPACES.sub(' ', klass)
if title is None:
- # no title was provided, use the capitalized classname as title
+ # no title was provided, use the capitalized class name as title
# e.g.: `!!! note` will render
# `<p class="admonition-title">Note</p>`
title = klass.split(' ', 1)[0].capitalize()
diff --git a/libs/markdown/extensions/attr_list.py b/libs/markdown/extensions/attr_list.py
index 9a675519c..7ce3f9925 100644
--- a/libs/markdown/extensions/attr_list.py
+++ b/libs/markdown/extensions/attr_list.py
@@ -1,26 +1,38 @@
-"""
-Attribute List Extension for Python-Markdown
-============================================
+# Attribute List Extension for Python-Markdown
+# ============================================
-Adds attribute list syntax. Inspired by
-[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
-feature of the same name.
+# Adds attribute list syntax. Inspired by
+# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
+# feature of the same name.
-See <https://Python-Markdown.github.io/extensions/attr_list>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/attr_list
+# for documentation.
-Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
+# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
-All changes Copyright 2011-2014 The Python Markdown Project
+# All changes Copyright 2011-2014 The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+ Adds attribute list syntax. Inspired by
+[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
+feature of the same name.
+
+See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
+for details.
+"""
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
from . import Extension
from ..treeprocessors import Treeprocessor
import re
+if TYPE_CHECKING: # pragma: no cover
+ from xml.etree.ElementTree import Element
+
def _handle_double_quote(s, t):
k, v = t.split('=', 1)
@@ -53,12 +65,12 @@ _scanner = re.Scanner([
])
-def get_attrs(str):
+def get_attrs(str: str) -> list[tuple[str, str]]:
""" Parse attribute list and return a list of attribute tuples. """
return _scanner.scan(str)[0]
-def isheader(elem):
+def isheader(elem: Element) -> bool:
return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
@@ -74,36 +86,36 @@ class AttrListTreeprocessor(Treeprocessor):
r'\uf900-\ufdcf\ufdf0-\ufffd'
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
- def run(self, doc):
+ def run(self, doc: Element) -> None:
for elem in doc.iter():
if self.md.is_block_level(elem.tag):
- # Block level: check for attrs on last line of text
+ # Block level: check for `attrs` on last line of text
RE = self.BLOCK_RE
if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
- # header, def-term, or table cell: check for attrs at end of element
+ # header, def-term, or table cell: check for attributes at end of element
RE = self.HEADER_RE
if len(elem) and elem.tag == 'li':
- # special case list items. children may include a ul or ol.
+ # special case list items. children may include a `ul` or `ol`.
pos = None
- # find the ul or ol position
+ # find the `ul` or `ol` position
for i, child in enumerate(elem):
if child.tag in ['ul', 'ol']:
pos = i
break
if pos is None and elem[-1].tail:
- # use tail of last child. no ul or ol.
+ # use tail of last child. no `ul` or `ol`.
m = RE.search(elem[-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[-1].tail = elem[-1].tail[:m.start()]
elif pos is not None and pos > 0 and elem[pos-1].tail:
- # use tail of last child before ul or ol
+ # use tail of last child before `ul` or `ol`
m = RE.search(elem[pos-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
elif elem.text:
- # use text. ul is first child.
+ # use text. `ul` is first child.
m = RE.search(elem.text)
if m:
self.assign_attrs(elem, m.group(1))
@@ -127,15 +139,15 @@ class AttrListTreeprocessor(Treeprocessor):
# clean up trailing #s
elem.text = elem.text.rstrip('#').rstrip()
else:
- # inline: check for attrs at start of tail
+ # inline: check for `attrs` at start of tail
if elem.tail:
m = self.INLINE_RE.match(elem.tail)
if m:
self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():]
- def assign_attrs(self, elem, attrs):
- """ Assign attrs to element. """
+ def assign_attrs(self, elem: Element, attrs: str) -> None:
+ """ Assign `attrs` to element. """
for k, v in get_attrs(attrs):
if k == '.':
# add to class
@@ -145,10 +157,10 @@ class AttrListTreeprocessor(Treeprocessor):
else:
elem.set('class', v)
else:
- # assign attr k with v
+ # assign attribute `k` with `v`
elem.set(self.sanitize_name(k), v)
- def sanitize_name(self, name):
+ def sanitize_name(self, name: str) -> str:
"""
Sanitize name as 'an XML Name, minus the ":"'.
See https://www.w3.org/TR/REC-xml-names/#NT-NCName
@@ -157,6 +169,7 @@ class AttrListTreeprocessor(Treeprocessor):
class AttrListExtension(Extension):
+ """ Attribute List extension for Python-Markdown """
def extendMarkdown(self, md):
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
md.registerExtension(self)
diff --git a/libs/markdown/extensions/codehilite.py b/libs/markdown/extensions/codehilite.py
index a54ba21c0..92e7d8f2b 100644
--- a/libs/markdown/extensions/codehilite.py
+++ b/libs/markdown/extensions/codehilite.py
@@ -1,23 +1,33 @@
-"""
-CodeHilite Extension for Python-Markdown
-========================================
+# CodeHilite Extension for Python-Markdown
+# ========================================
-Adds code/syntax highlighting to standard Python-Markdown code blocks.
+# Adds code/syntax highlighting to standard Python-Markdown code blocks.
-See <https://Python-Markdown.github.io/extensions/code_hilite>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/code_hilite
+# for documentation.
-Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
+# Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).
-All changes Copyright 2008-2014 The Python Markdown Project
+# All changes Copyright 2008-2014 The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+Adds code/syntax highlighting to standard Python-Markdown code blocks.
+
+See the [documentation](https://Python-Markdown.github.io/extensions/code_hilite)
+for details.
+"""
+
+from __future__ import annotations
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import parseBoolValue
+from typing import TYPE_CHECKING, Callable, Any
+
+if TYPE_CHECKING: # pragma: no cover
+ import xml.etree.ElementTree as etree
try: # pragma: no cover
from pygments import highlight
@@ -29,11 +39,11 @@ except ImportError: # pragma: no cover
pygments = False
-def parse_hl_lines(expr):
+def parse_hl_lines(expr: str) -> list[int]:
"""Support our syntax for emphasizing certain lines of code.
- expr should be like '1 2' to emphasize lines 1 and 2 of a code block.
- Returns a list of ints, the line numbers to emphasize.
+ `expr` should be like '1 2' to emphasize lines 1 and 2 of a code block.
+ Returns a list of integers, the line numbers to emphasize.
"""
if not expr:
return []
@@ -50,82 +60,84 @@ class CodeHilite:
Determine language of source code, and pass it on to the Pygments highlighter.
Usage:
- code = CodeHilite(src=some_code, lang='python')
- html = code.hilite()
-
- Arguments:
- * src: Source string or any object with a .readline attribute.
-
- * lang: String name of Pygments lexer to use for highlighting. Default: `None`.
-
- * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
- value. Default: `True`.
-
- * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
- instead wrapped for highlighting by a JavaScript library. Default: `True`.
-
- * pygments_formatter: The name of a Pygments formatter or a formatter class used for
- highlighting the code blocks. Default: `html`.
- * linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
+ ```python
+ code = CodeHilite(src=some_code, lang='python')
+ html = code.hilite()
+ ```
- * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
-
- * lang_prefix: Prefix prepended to the language. Default: "language-".
+ Arguments:
+ src: Source string or any object with a `.readline` attribute.
+
+ Keyword arguments:
+ lang (str): String name of Pygments lexer to use for highlighting. Default: `None`.
+ guess_lang (bool): Auto-detect which lexer to use.
+ Ignored if `lang` is set to a valid value. Default: `True`.
+ use_pygments (bool): Pass code to Pygments for code highlighting. If `False`, the code is
+ instead wrapped for highlighting by a JavaScript library. Default: `True`.
+ pygments_formatter (str): The name of a Pygments formatter or a formatter class used for
+ highlighting the code blocks. Default: `html`.
+ linenums (bool): An alias to Pygments `linenos` formatter option. Default: `None`.
+ css_class (str): An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
+ lang_prefix (str): Prefix prepended to the language. Default: "language-".
Other Options:
+
Any other options are accepted and passed on to the lexer and formatter. Therefore,
valid options include any options which are accepted by the `html` formatter or
whichever lexer the code's language uses. Note that most lexers do not have any
options. However, a few have very useful options, such as PHP's `startinline` option.
Any invalid options are ignored without error.
- Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
- Lexer Options: https://pygments.org/docs/lexers/
+ * **Formatter options**: <https://pygments.org/docs/formatters/#HtmlFormatter>
+ * **Lexer Options**: <https://pygments.org/docs/lexers/>
Additionally, when Pygments is enabled, the code's language is passed to the
formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`.
- This option has no effect to the Pygments's builtin formatters.
+ This option has no effect to the Pygments' builtin formatters.
Advanced Usage:
- code = CodeHilite(
- src = some_code,
- lang = 'php',
- startinline = True, # Lexer option. Snippet does not start with `<?php`.
- linenostart = 42, # Formatter option. Snippet starts on line 42.
- hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
- linenos = 'inline' # Formatter option. Avoid alignment problems.
- )
- html = code.hilite()
+
+ ```python
+ code = CodeHilite(
+ src = some_code,
+ lang = 'php',
+ startinline = True, # Lexer option. Snippet does not start with `<?php`.
+ linenostart = 42, # Formatter option. Snippet starts on line 42.
+ hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
+ linenos = 'inline' # Formatter option. Avoid alignment problems.
+ )
+ html = code.hilite()
+ ```
"""
- def __init__(self, src, **options):
+ def __init__(self, src: str, **options):
self.src = src
- self.lang = options.pop('lang', None)
- self.guess_lang = options.pop('guess_lang', True)
- self.use_pygments = options.pop('use_pygments', True)
- self.lang_prefix = options.pop('lang_prefix', 'language-')
- self.pygments_formatter = options.pop('pygments_formatter', 'html')
+ self.lang: str | None = options.pop('lang', None)
+ self.guess_lang: bool = options.pop('guess_lang', True)
+ self.use_pygments: bool = options.pop('use_pygments', True)
+ self.lang_prefix: str = options.pop('lang_prefix', 'language-')
+ self.pygments_formatter: str | Callable = options.pop('pygments_formatter', 'html')
if 'linenos' not in options:
options['linenos'] = options.pop('linenums', None)
if 'cssclass' not in options:
options['cssclass'] = options.pop('css_class', 'codehilite')
if 'wrapcode' not in options:
- # Override pygments default
+ # Override Pygments default
options['wrapcode'] = True
# Disallow use of `full` option
options['full'] = False
self.options = options
- def hilite(self, shebang=True):
+ def hilite(self, shebang: bool = True) -> str:
"""
- Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with
- optional line numbers. The output should then be styled with css to
+ Pass code to the [Pygments](https://pygments.org/) highlighter with
+ optional line numbers. The output should then be styled with CSS to
your liking. No styles are applied by default - only styling hooks
- (i.e.: <span class="k">).
+ (i.e.: `<span class="k">`).
returns : A string of html.
@@ -160,7 +172,7 @@ class CodeHilite:
formatter = self.pygments_formatter(lang_str=lang_str, **self.options)
return highlight(self.src, lexer, formatter)
else:
- # just escape and build markup usable by JS highlighting libs
+ # just escape and build markup usable by JavaScript highlighting libraries
txt = self.src.replace('&', '&amp;')
txt = txt.replace('<', '&lt;')
txt = txt.replace('>', '&gt;')
@@ -179,17 +191,17 @@ class CodeHilite:
txt
)
- def _parseHeader(self):
+ def _parseHeader(self) -> None:
"""
Determines language of a code block from shebang line and whether the
- said line should be removed or left in place. If the sheband line
+ said line should be removed or left in place. If the shebang line
contains a path (even a single /) then it is assumed to be a real
shebang line and left alone. However, if no path is given
- (e.i.: #!python or :::python) then it is assumed to be a mock shebang
+ (e.i.: `#!python` or `:::python`) then it is assumed to be a mock shebang
for language identification of a code fragment and removed from the
code block prior to processing for code highlighting. When a mock
- shebang (e.i: #!python) is found, line numbering is turned on. When
- colons are found in place of a shebang (e.i.: :::python), line
+ shebang (e.i: `#!python`) is found, line numbering is turned on. When
+ colons are found in place of a shebang (e.i.: `:::python`), line
numbering is left in the current state - off by default.
Also parses optional list of highlight lines, like:
@@ -241,7 +253,9 @@ class CodeHilite:
class HiliteTreeprocessor(Treeprocessor):
""" Highlight source code in code blocks. """
- def code_unescape(self, text):
+ config: dict[str, Any]
+
+ def code_unescape(self, text: str) -> str:
"""Unescape code."""
text = text.replace("&lt;", "<")
text = text.replace("&gt;", ">")
@@ -250,59 +264,62 @@ class HiliteTreeprocessor(Treeprocessor):
text = text.replace("&amp;", "&")
return text
- def run(self, root):
- """ Find code blocks and store in htmlStash. """
+ def run(self, root: etree.Element) -> None:
+ """ Find code blocks and store in `htmlStash`. """
blocks = root.iter('pre')
for block in blocks:
if len(block) == 1 and block[0].tag == 'code':
local_config = self.config.copy()
+ text = block[0].text
+ if text is None:
+ continue
code = CodeHilite(
- self.code_unescape(block[0].text),
+ self.code_unescape(text),
tab_length=self.md.tab_length,
style=local_config.pop('pygments_style', 'default'),
**local_config
)
placeholder = self.md.htmlStash.store(code.hilite())
- # Clear codeblock in etree instance
+ # Clear code block in `etree` instance
block.clear()
- # Change to p element which will later
+ # Change to `p` element which will later
# be removed when inserting raw html
block.tag = 'p'
block.text = placeholder
class CodeHiliteExtension(Extension):
- """ Add source code highlighting to markdown codeblocks. """
+ """ Add source code highlighting to markdown code blocks. """
def __init__(self, **kwargs):
# define default configs
self.config = {
- 'linenums': [None,
- "Use lines numbers. True|table|inline=yes, False=no, None=auto"],
- 'guess_lang': [True,
- "Automatic language detection - Default: True"],
- 'css_class': ["codehilite",
- "Set class name for wrapper <div> - "
- "Default: codehilite"],
- 'pygments_style': ['default',
- 'Pygments HTML Formatter Style '
- '(Colorscheme) - Default: default'],
- 'noclasses': [False,
- 'Use inline styles instead of CSS classes - '
- 'Default false'],
- 'use_pygments': [True,
- 'Use Pygments to Highlight code blocks. '
- 'Disable if using a JavaScript library. '
- 'Default: True'],
+ 'linenums': [
+ None, "Use lines numbers. True|table|inline=yes, False=no, None=auto. Default: `None`."
+ ],
+ 'guess_lang': [
+ True, "Automatic language detection - Default: `True`."
+ ],
+ 'css_class': [
+ "codehilite", "Set class name for wrapper <div> - Default: `codehilite`."
+ ],
+ 'pygments_style': [
+ 'default', 'Pygments HTML Formatter Style (Colorscheme). Default: `default`.'
+ ],
+ 'noclasses': [
+ False, 'Use inline styles instead of CSS classes - Default `False`.'
+ ],
+ 'use_pygments': [
+ True, 'Highlight code blocks with pygments. Disable if using a JavaScript library. Default: `True`.'
+ ],
'lang_prefix': [
- 'language-',
- 'Prefix prepended to the language when use_pygments is false. Default: "language-"'
+ 'language-', 'Prefix prepended to the language when `use_pygments` is false. Default: `language-`.'
+ ],
+ 'pygments_formatter': [
+ 'html', 'Use a specific formatter for Pygments highlighting. Default: `html`.'
],
- 'pygments_formatter': ['html',
- 'Use a specific formatter for Pygments highlighting.'
- 'Default: "html"',
- ],
- }
+ }
+ """ Default configuration options. """
for key, value in kwargs.items():
if key in self.config:
@@ -311,14 +328,14 @@ class CodeHiliteExtension(Extension):
# manually set unknown keywords.
if isinstance(value, str):
try:
- # Attempt to parse str as a bool value
+ # Attempt to parse `str` as a boolean value
value = parseBoolValue(value, preserve_none=True)
except ValueError:
- pass # Assume it's not a bool value. Use as-is.
+ pass # Assume it's not a boolean value. Use as-is.
self.config[key] = [value, '']
def extendMarkdown(self, md):
- """ Add HilitePostprocessor to Markdown instance. """
+ """ Add `HilitePostprocessor` to Markdown instance. """
hiliter = HiliteTreeprocessor(md)
hiliter.config = self.getConfigs()
md.treeprocessors.register(hiliter, 'hilite', 30)
diff --git a/libs/markdown/extensions/def_list.py b/libs/markdown/extensions/def_list.py
index 17549f031..5324bf193 100644
--- a/libs/markdown/extensions/def_list.py
+++ b/libs/markdown/extensions/def_list.py
@@ -1,20 +1,26 @@
-"""
-Definition List Extension for Python-Markdown
-=============================================
+# Definition List Extension for Python-Markdown
+# =============================================
-Adds parsing of Definition Lists to Python-Markdown.
+# Adds parsing of Definition Lists to Python-Markdown.
+
+# See https://Python-Markdown.github.io/extensions/definition_lists
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/definition_lists>
-for documentation.
+# Original code Copyright 2008 [Waylan Limberg](http://achinghead.com)
-Original code Copyright 2008 [Waylan Limberg](http://achinghead.com)
+# All changes Copyright 2008-2014 The Python Markdown Project
-All changes Copyright 2008-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+"""
+Adds parsing of Definition Lists to Python-Markdown.
+See the [documentation](https://Python-Markdown.github.io/extensions/definition_lists)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
from ..blockprocessors import BlockProcessor, ListIndentProcessor
import xml.etree.ElementTree as etree
@@ -27,10 +33,10 @@ class DefListProcessor(BlockProcessor):
RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)')
NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]')
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.search(block))
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
raw_block = blocks.pop(0)
m = self.RE.search(raw_block)
@@ -89,10 +95,12 @@ class DefListIndentProcessor(ListIndentProcessor):
# Definition lists need to be aware of all list types
ITEM_TYPES = ['dd', 'li']
+ """ Include `dd` in list item types. """
LIST_TYPES = ['dl', 'ol', 'ul']
+ """ Include `dl` is list types. """
- def create_item(self, parent, block):
- """ Create a new dd or li (depending on parent) and parse the block with it as the parent. """
+ def create_item(self, parent: etree.Element, block: str) -> None:
+ """ Create a new `dd` or `li` (depending on parent) and parse the block with it as the parent. """
dd = etree.SubElement(parent, 'dd')
self.parser.parseBlocks(dd, [block])
@@ -102,7 +110,7 @@ class DefListExtension(Extension):
""" Add definition lists to Markdown. """
def extendMarkdown(self, md):
- """ Add an instance of DefListProcessor to BlockParser. """
+ """ Add an instance of `DefListProcessor` to `BlockParser`. """
md.parser.blockprocessors.register(DefListIndentProcessor(md.parser), 'defindent', 85)
md.parser.blockprocessors.register(DefListProcessor(md.parser), 'deflist', 25)
diff --git a/libs/markdown/extensions/extra.py b/libs/markdown/extensions/extra.py
index 909ba075a..74ebc192c 100644
--- a/libs/markdown/extensions/extra.py
+++ b/libs/markdown/extensions/extra.py
@@ -1,12 +1,22 @@
-"""
-Python-Markdown Extra Extension
-===============================
+# Python-Markdown Extra Extension
+# ===============================
+
+# A compilation of various Python-Markdown extensions that imitates
+# [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/).
+
+# See https://Python-Markdown.github.io/extensions/extra
+# for documentation.
+
+# Copyright The Python Markdown Project
+
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
+"""
A compilation of various Python-Markdown extensions that imitates
[PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/).
Note that each of the individual extensions still need to be available
-on your PYTHONPATH. This extension simply wraps them all up as a
+on your `PYTHONPATH`. This extension simply wraps them all up as a
convenience so that only one extension needs to be listed when
initiating Markdown. See the documentation for each individual
extension for specifics about that extension.
@@ -20,15 +30,12 @@ under a different name. You could also edit the `extensions` global
variable defined below, but be aware that such changes may be lost
when you upgrade to any future version of Python-Markdown.
-See <https://Python-Markdown.github.io/extensions/extra>
-for documentation.
-
-Copyright The Python Markdown Project
-
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
-
+See the [documentation](https://Python-Markdown.github.io/extensions/extra)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
extensions = [
@@ -40,13 +47,14 @@ extensions = [
'abbr',
'md_in_html'
]
+""" The list of included extensions. """
class ExtraExtension(Extension):
""" Add various extensions to Markdown class."""
def __init__(self, **kwargs):
- """ config is a dumb holder which gets passed to actual ext later. """
+ """ `config` is a dumb holder which gets passed to the actual extension later. """
self.config = kwargs
def extendMarkdown(self, md):
diff --git a/libs/markdown/extensions/fenced_code.py b/libs/markdown/extensions/fenced_code.py
index 409166ad8..da1a9be1e 100644
--- a/libs/markdown/extensions/fenced_code.py
+++ b/libs/markdown/extensions/fenced_code.py
@@ -1,20 +1,25 @@
-"""
-Fenced Code Extension for Python Markdown
-=========================================
+# Fenced Code Extension for Python Markdown
+# =========================================
-This extension adds Fenced Code Blocks to Python-Markdown.
+# This extension adds Fenced Code Blocks to Python-Markdown.
-See <https://Python-Markdown.github.io/extensions/fenced_code_blocks>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/fenced_code_blocks
+# for documentation.
-Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
+# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
+# All changes Copyright 2008-2014 The Python Markdown Project
-All changes Copyright 2008-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+This extension adds Fenced Code Blocks to Python-Markdown.
+See the [documentation](https://Python-Markdown.github.io/extensions/fenced_code_blocks)
+for details.
+"""
+
+from __future__ import annotations
from textwrap import dedent
from . import Extension
@@ -24,6 +29,10 @@ from .attr_list import get_attrs, AttrListExtension
from ..util import parseBoolValue
from ..serializers import _escape_attrib_html
import re
+from typing import TYPE_CHECKING, Any, Iterable
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
class FencedCodeExtension(Extension):
@@ -31,16 +40,19 @@ class FencedCodeExtension(Extension):
self.config = {
'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"']
}
+ """ Default configuration options. """
super().__init__(**kwargs)
def extendMarkdown(self, md):
- """ Add FencedBlockPreprocessor to the Markdown instance. """
+ """ Add `FencedBlockPreprocessor` to the Markdown instance. """
md.registerExtension(self)
md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25)
class FencedBlockPreprocessor(Preprocessor):
+ """ Find and extract fenced code blocks. """
+
FENCED_BLOCK_RE = re.compile(
dedent(r'''
(?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
@@ -54,13 +66,13 @@ class FencedBlockPreprocessor(Preprocessor):
re.MULTILINE | re.DOTALL | re.VERBOSE
)
- def __init__(self, md, config):
+ def __init__(self, md: Markdown, config: dict[str, Any]):
super().__init__(md)
self.config = config
self.checked_for_deps = False
- self.codehilite_conf = {}
+ self.codehilite_conf: dict[str, Any] = {}
self.use_attr_list = False
- # List of options to convert to bool values
+ # List of options to convert to boolean values
self.bool_options = [
'linenums',
'guess_lang',
@@ -68,8 +80,8 @@ class FencedBlockPreprocessor(Preprocessor):
'use_pygments'
]
- def run(self, lines):
- """ Match and store Fenced Code Blocks in the HtmlStash. """
+ def run(self, lines: list[str]) -> list[str]:
+ """ Match and store Fenced Code Blocks in the `HtmlStash`. """
# Check for dependent extensions
if not self.checked_for_deps:
@@ -94,16 +106,16 @@ class FencedBlockPreprocessor(Preprocessor):
if m.group('lang'):
lang = m.group('lang')
if m.group('hl_lines'):
- # Support hl_lines outside of attrs for backward-compatibility
+ # Support `hl_lines` outside of `attrs` for backward-compatibility
config['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
- # If config is not empty, then the codehighlite extension
+ # If `config` is not empty, then the `codehighlite` extension
# is enabled, so we call it to highlight the code
if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
local_config = self.codehilite_conf.copy()
local_config.update(config)
- # Combine classes with cssclass. Ensure cssclass is at end
- # as pygments appends a suffix under certain circumstances.
+ # Combine classes with `cssclass`. Ensure `cssclass` is at end
+ # as Pygments appends a suffix under certain circumstances.
# Ignore ID as Pygments does not offer an option to set it.
if classes:
local_config['css_class'] = '{} {}'.format(
@@ -128,9 +140,9 @@ class FencedBlockPreprocessor(Preprocessor):
if id:
id_attr = f' id="{_escape_attrib_html(id)}"'
if self.use_attr_list and config and not config.get('use_pygments', False):
- # Only assign key/value pairs to code element if attr_list ext is enabled, key/value pairs
- # were defined on the code block, and the `use_pygments` key was not set to True. The
- # `use_pygments` key could be either set to False or not defined. It is omitted from output.
+ # Only assign key/value pairs to code element if `attr_list` extension is enabled, key/value
+ # pairs were defined on the code block, and the `use_pygments` key was not set to `True`. The
+ # `use_pygments` key could be either set to `False` or not defined. It is omitted from output.
kv_pairs = ''.join(
f' {k}="{_escape_attrib_html(v)}"' for k, v in config.items() if k != 'use_pygments'
)
@@ -143,8 +155,8 @@ class FencedBlockPreprocessor(Preprocessor):
break
return text.split("\n")
- def handle_attrs(self, attrs):
- """ Return tuple: (id, [list, of, classes], {configs}) """
+ def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str], dict[str, Any]]:
+ """ Return tuple: `(id, [list, of, classes], {configs})` """
id = ''
classes = []
configs = {}
@@ -161,7 +173,7 @@ class FencedBlockPreprocessor(Preprocessor):
configs[k] = v
return id, classes, configs
- def _escape(self, txt):
+ def _escape(self, txt: str) -> str:
""" basic html escaping """
txt = txt.replace('&', '&amp;')
txt = txt.replace('<', '&lt;')
diff --git a/libs/markdown/extensions/footnotes.py b/libs/markdown/extensions/footnotes.py
index 96ed5c25d..30c081138 100644
--- a/libs/markdown/extensions/footnotes.py
+++ b/libs/markdown/extensions/footnotes.py
@@ -1,18 +1,24 @@
-"""
-Footnotes Extension for Python-Markdown
-=======================================
+# Footnotes Extension for Python-Markdown
+# =======================================
-Adds footnote handling to Python-Markdown.
+# Adds footnote handling to Python-Markdown.
-See <https://Python-Markdown.github.io/extensions/footnotes>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/footnotes
+# for documentation.
-Copyright The Python Markdown Project
+# Copyright The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
+
+"""
+Adds footnote handling to Python-Markdown.
+See the [documentation](https://Python-Markdown.github.io/extensions/footnotes)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
from ..blockprocessors import BlockProcessor
from ..inlinepatterns import InlineProcessor
@@ -36,36 +42,34 @@ class FootnoteExtension(Extension):
""" Setup configs. """
self.config = {
- 'PLACE_MARKER':
- ["///Footnotes Go Here///",
- "The text string that marks where the footnotes go"],
- 'UNIQUE_IDS':
- [False,
- "Avoid name collisions across "
- "multiple calls to reset()."],
- "BACKLINK_TEXT":
- ["&#8617;",
- "The text string that links from the footnote "
- "to the reader's place."],
- "SUPERSCRIPT_TEXT":
- ["{}",
- "The text string that links from the reader's place "
- "to the footnote."],
- "BACKLINK_TITLE":
- ["Jump back to footnote %d in the text",
- "The text string used for the title HTML attribute "
- "of the backlink. %d will be replaced by the "
- "footnote number."],
- "SEPARATOR":
- [":",
- "Footnote separator."]
+ 'PLACE_MARKER': [
+ '///Footnotes Go Here///', 'The text string that marks where the footnotes go'
+ ],
+ 'UNIQUE_IDS': [
+ False, 'Avoid name collisions across multiple calls to `reset()`.'
+ ],
+ 'BACKLINK_TEXT': [
+ '&#8617;', "The text string that links from the footnote to the reader's place."
+ ],
+ 'SUPERSCRIPT_TEXT': [
+ '{}', "The text string that links from the reader's place to the footnote."
+ ],
+ 'BACKLINK_TITLE': [
+ 'Jump back to footnote %d in the text',
+ 'The text string used for the title HTML attribute of the backlink. '
+ '%d will be replaced by the footnote number.'
+ ],
+ 'SEPARATOR': [
+ ':', 'Footnote separator.'
+ ]
}
+ """ Default configuration options. """
super().__init__(**kwargs)
# In multiple invocations, emit links that don't get tangled.
self.unique_prefix = 0
- self.found_refs = {}
- self.used_refs = set()
+ self.found_refs: dict[str, int] = {}
+ self.used_refs: set[str] = set()
self.reset()
@@ -74,34 +78,34 @@ class FootnoteExtension(Extension):
md.registerExtension(self)
self.parser = md.parser
self.md = md
- # Insert a blockprocessor before ReferencePreprocessor
+ # Insert a `blockprocessor` before `ReferencePreprocessor`
md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
- # Insert an inline pattern before ImageReferencePattern
+ # Insert an inline pattern before `ImageReferencePattern`
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
# Insert a tree-processor that would actually add the footnote div
- # This must be before all other treeprocessors (i.e., inline and
- # codehilite) so they can run on the the contents of the div.
+ # This must be before all other tree-processors (i.e., `inline` and
+ # `codehilite`) so they can run on the the contents of the div.
md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50)
# Insert a tree-processor that will run after inline is done.
# In this tree-processor we want to check our duplicate footnote tracker
- # And add additional backrefs to the footnote pointing back to the
+ # And add additional `backrefs` to the footnote pointing back to the
# duplicated references.
md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15)
# Insert a postprocessor after amp_substitute processor
md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
- def reset(self):
+ def reset(self) -> None:
""" Clear footnotes on reset, and prepare for distinct document. """
- self.footnotes = OrderedDict()
+ self.footnotes: OrderedDict[str, str] = OrderedDict()
self.unique_prefix += 1
self.found_refs = {}
self.used_refs = set()
- def unique_ref(self, reference, found=False):
+ def unique_ref(self, reference: str, found: bool = False) -> str:
""" Get a unique reference if there are duplicates. """
if not found:
return reference
@@ -122,7 +126,9 @@ class FootnoteExtension(Extension):
self.found_refs[original_ref] = 1
return reference
- def findFootnotesPlaceholder(self, root):
+ def findFootnotesPlaceholder(
+ self, root: etree.Element
+ ) -> tuple[etree.Element, etree.Element, bool] | None:
""" Return ElementTree Element that contains Footnote placeholder. """
def finder(element):
for child in element:
@@ -140,30 +146,30 @@ class FootnoteExtension(Extension):
res = finder(root)
return res
- def setFootnote(self, id, text):
+ def setFootnote(self, id: str, text: str) -> None:
""" Store a footnote for later retrieval. """
self.footnotes[id] = text
- def get_separator(self):
+ def get_separator(self) -> str:
""" Get the footnote separator. """
return self.getConfig("SEPARATOR")
- def makeFootnoteId(self, id):
+ def makeFootnoteId(self, id: str) -> str:
""" Return footnote link id. """
if self.getConfig("UNIQUE_IDS"):
return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
else:
return 'fn{}{}'.format(self.get_separator(), id)
- def makeFootnoteRefId(self, id, found=False):
+ def makeFootnoteRefId(self, id: str, found: bool = False) -> str:
""" Return footnote back-link id. """
if self.getConfig("UNIQUE_IDS"):
return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
else:
return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found)
- def makeFootnotesDiv(self, root):
- """ Return div of footnotes as et Element. """
+ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
+ """ Return `div` of footnotes as `etree` Element. """
if not list(self.footnotes.keys()):
return None
@@ -180,9 +186,9 @@ class FootnoteExtension(Extension):
for index, id in enumerate(self.footnotes.keys(), start=1):
li = etree.SubElement(ol, "li")
li.set("id", self.makeFootnoteId(id))
- # Parse footnote with surrogate parent as li cannot be used.
- # List block handlers have special logic to deal with li.
- # When we are done parsing, we will copy everything over to li.
+ # Parse footnote with surrogate parent as `li` cannot be used.
+ # List block handlers have special logic to deal with `li`.
+ # When we are done parsing, we will copy everything over to `li`.
self.parser.parseChunk(surrogate_parent, self.footnotes[id])
for el in list(surrogate_parent):
li.append(el)
@@ -212,14 +218,14 @@ class FootnoteBlockProcessor(BlockProcessor):
RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
- def __init__(self, footnotes):
+ def __init__(self, footnotes: FootnoteExtension):
super().__init__(footnotes.parser)
self.footnotes = footnotes
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
return True
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
""" Find, set, and remove footnote definitions. """
block = blocks.pop(0)
m = self.RE.search(block)
@@ -255,10 +261,11 @@ class FootnoteBlockProcessor(BlockProcessor):
blocks.insert(0, block)
return False
- def detectTabbed(self, blocks):
- """ Find indented text and remove indent before further proccesing.
+ def detectTabbed(self, blocks: list[str]) -> list[str]:
+ """ Find indented text and remove indent before further processing.
- Returns: a list of blocks with indentation removed.
+ Returns:
+ A list of blocks with indentation removed.
"""
fn_blocks = []
while blocks:
@@ -283,7 +290,7 @@ class FootnoteBlockProcessor(BlockProcessor):
break
return fn_blocks
- def detab(self, block):
+ def detab(self, block: str) -> str:
""" Remove one level of indent from a block.
Preserve lazily indented blocks by only removing indent from indented lines.
@@ -296,13 +303,13 @@ class FootnoteBlockProcessor(BlockProcessor):
class FootnoteInlineProcessor(InlineProcessor):
- """ InlinePattern for footnote markers in a document's body text. """
+ """ `InlineProcessor` for footnote markers in a document's body text. """
- def __init__(self, pattern, footnotes):
+ def __init__(self, pattern: str, footnotes: FootnoteExtension):
super().__init__(pattern)
self.footnotes = footnotes
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
id = m.group(1)
if id in self.footnotes.footnotes.keys():
sup = etree.Element("sup")
@@ -321,11 +328,11 @@ class FootnoteInlineProcessor(InlineProcessor):
class FootnotePostTreeprocessor(Treeprocessor):
""" Amend footnote div with duplicates. """
- def __init__(self, footnotes):
+ def __init__(self, footnotes: FootnoteExtension):
self.footnotes = footnotes
- def add_duplicates(self, li, duplicates):
- """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """
+ def add_duplicates(self, li: etree.Element, duplicates: int) -> None:
+ """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
for link in li.iter('a'):
# Find the link that needs to be duplicated.
if link.attrib.get('class', '') == 'footnote-backref':
@@ -344,13 +351,13 @@ class FootnotePostTreeprocessor(Treeprocessor):
el.append(link)
break
- def get_num_duplicates(self, li):
+ def get_num_duplicates(self, li: etree.Element) -> int:
""" Get the number of duplicate refs of the footnote. """
fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
return self.footnotes.found_refs.get(link_id, 0)
- def handle_duplicates(self, parent):
+ def handle_duplicates(self, parent: etree.Element) -> None:
""" Find duplicate footnotes and format and add the duplicates. """
for li in list(parent):
# Check number of duplicates footnotes and insert
@@ -359,7 +366,7 @@ class FootnotePostTreeprocessor(Treeprocessor):
if count > 1:
self.add_duplicates(li, count)
- def run(self, root):
+ def run(self, root: etree.Element) -> None:
""" Crawl the footnote div and add missing duplicate footnotes. """
self.offset = 0
for div in root.iter('div'):
@@ -374,10 +381,10 @@ class FootnotePostTreeprocessor(Treeprocessor):
class FootnoteTreeprocessor(Treeprocessor):
""" Build and append footnote div to end of document. """
- def __init__(self, footnotes):
+ def __init__(self, footnotes: FootnoteExtension):
self.footnotes = footnotes
- def run(self, root):
+ def run(self, root: etree.Element) -> None:
footnotesDiv = self.footnotes.makeFootnotesDiv(root)
if footnotesDiv is not None:
result = self.footnotes.findFootnotesPlaceholder(root)
@@ -396,10 +403,10 @@ class FootnoteTreeprocessor(Treeprocessor):
class FootnotePostprocessor(Postprocessor):
""" Replace placeholders with html entities. """
- def __init__(self, footnotes):
+ def __init__(self, footnotes: FootnoteExtension):
self.footnotes = footnotes
- def run(self, text):
+ def run(self, text: str) -> str:
text = text.replace(
FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
)
@@ -407,5 +414,5 @@ class FootnotePostprocessor(Postprocessor):
def makeExtension(**kwargs): # pragma: no cover
- """ Return an instance of the FootnoteExtension """
+ """ Return an instance of the `FootnoteExtension` """
return FootnoteExtension(**kwargs)
diff --git a/libs/markdown/extensions/legacy_attrs.py b/libs/markdown/extensions/legacy_attrs.py
index 445aba111..6641e6ea6 100644
--- a/libs/markdown/extensions/legacy_attrs.py
+++ b/libs/markdown/extensions/legacy_attrs.py
@@ -1,46 +1,49 @@
-"""
-Python Markdown
-
-A Python implementation of John Gruber's Markdown.
+# Python Markdown
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# A Python implementation of John Gruber's Markdown.
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-License: BSD (see LICENSE.md for details).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-Legacy Attributes Extension
-===========================
+# License: BSD (see LICENSE.md for details).
+"""
An extension to Python Markdown which implements legacy attributes.
Prior to Python-Markdown version 3.0, the Markdown class had an `enable_attributes`
keyword which was on by default and provided for attributes to be defined for elements
using the format `{@key=value}`. This extension is provided as a replacement for
-backward compatibility. New documents should be authored using attr_lists. However,
-numerious documents exist which have been using the old attribute format for many
+backward compatibility. New documents should be authored using `attr_lists`. However,
+numerous documents exist which have been using the old attribute format for many
years. This extension can be used to continue to render those documents correctly.
"""
+from __future__ import annotations
+
import re
from markdown.treeprocessors import Treeprocessor, isString
from markdown.extensions import Extension
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ import xml.etree.ElementTree as etree
ATTR_RE = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
class LegacyAttrs(Treeprocessor):
- def run(self, doc):
+ def run(self, doc: etree.Element) -> None:
"""Find and set values of attributes ({@key=value}). """
for el in doc.iter():
alt = el.get('alt', None)
@@ -51,15 +54,16 @@ class LegacyAttrs(Treeprocessor):
if el.tail and isString(el.tail):
el.tail = self.handleAttributes(el, el.tail)
- def handleAttributes(self, el, txt):
+ def handleAttributes(self, el: etree.Element, txt: str) -> str:
""" Set attributes and return text without definitions. """
- def attributeCallback(match):
+ def attributeCallback(match: re.Match[str]):
el.set(match.group(1), match.group(2).replace('\n', ' '))
return ATTR_RE.sub(attributeCallback, txt)
class LegacyAttrExtension(Extension):
def extendMarkdown(self, md):
+ """ Add `LegacyAttrs` to Markdown instance. """
md.treeprocessors.register(LegacyAttrs(md), 'legacyattrs', 15)
diff --git a/libs/markdown/extensions/legacy_em.py b/libs/markdown/extensions/legacy_em.py
index 360988b6d..a6f67b7ef 100644
--- a/libs/markdown/extensions/legacy_em.py
+++ b/libs/markdown/extensions/legacy_em.py
@@ -1,14 +1,17 @@
-'''
-Legacy Em Extension for Python-Markdown
-=======================================
+# Legacy Em Extension for Python-Markdown
+# =======================================
-This extension provides legacy behavior for _connected_words_.
+# This extension provides legacy behavior for _connected_words_.
+
+# Copyright 2015-2018 The Python Markdown Project
-Copyright 2015-2018 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+"""
+This extension provides legacy behavior for _connected_words_.
+"""
-'''
+from __future__ import annotations
from . import Extension
from ..inlinepatterns import UnderscoreProcessor, EmStrongItem, EM_STRONG2_RE, STRONG_EM2_RE
@@ -45,5 +48,5 @@ class LegacyEmExtension(Extension):
def makeExtension(**kwargs): # pragma: no cover
- """ Return an instance of the LegacyEmExtension """
+ """ Return an instance of the `LegacyEmExtension` """
return LegacyEmExtension(**kwargs)
diff --git a/libs/markdown/extensions/md_in_html.py b/libs/markdown/extensions/md_in_html.py
index ec7dcba0e..64b84a5f4 100644
--- a/libs/markdown/extensions/md_in_html.py
+++ b/libs/markdown/extensions/md_in_html.py
@@ -1,18 +1,25 @@
-"""
-Python-Markdown Markdown in HTML Extension
-===============================
+# Python-Markdown Markdown in HTML Extension
+# ===============================
-An implementation of [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/)'s
-parsing of Markdown syntax in raw HTML.
+# An implementation of [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/)'s
+# parsing of Markdown syntax in raw HTML.
-See <https://Python-Markdown.github.io/extensions/raw_html>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/raw_html
+# for documentation.
-Copyright The Python Markdown Project
+# Copyright The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+An implementation of [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/)'s
+parsing of Markdown syntax in raw HTML.
+
+See the [documentation](https://Python-Markdown.github.io/extensions/raw_html)
+for details.
+"""
+
+from __future__ import annotations
from . import Extension
from ..blockprocessors import BlockProcessor
@@ -21,14 +28,19 @@ from ..postprocessors import RawHtmlPostprocessor
from .. import util
from ..htmlparser import HTMLExtractor, blank_line_re
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Literal, Mapping
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
class HTMLExtractorExtra(HTMLExtractor):
"""
- Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown.
+ Override `HTMLExtractor` and create `etree` `Elements` for any elements which should have content parsed as
+ Markdown.
"""
- def __init__(self, md, *args, **kwargs):
+ def __init__(self, md: Markdown, *args, **kwargs):
# All block-level tags.
self.block_level_tags = set(md.block_level_elements.copy())
# Block-level tags in which the content only gets span level parsing
@@ -46,9 +58,9 @@ class HTMLExtractorExtra(HTMLExtractor):
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
- self.mdstack = [] # When markdown=1, stack contains a list of tags
+ self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags
self.treebuilder = etree.TreeBuilder()
- self.mdstate = [] # one of 'block', 'span', 'off', or None
+ self.mdstate: list[Literal['block', 'span', 'off', None]] = []
super().reset()
def close(self):
@@ -56,17 +68,17 @@ class HTMLExtractorExtra(HTMLExtractor):
super().close()
# Handle any unclosed tags.
if self.mdstack:
- # Close the outermost parent. handle_endtag will close all unclosed children.
+ # Close the outermost parent. `handle_endtag` will close all unclosed children.
self.handle_endtag(self.mdstack[0])
- def get_element(self):
- """ Return element from treebuilder and reset treebuilder for later use. """
+ def get_element(self) -> etree.Element:
+ """ Return element from `treebuilder` and reset `treebuilder` for later use. """
element = self.treebuilder.close()
self.treebuilder = etree.TreeBuilder()
return element
- def get_state(self, tag, attrs):
- """ Return state from tag and `markdown` attr. One of 'block', 'span', or 'off'. """
+ def get_state(self, tag, attrs: Mapping[str, str]) -> Literal['block', 'span', 'off', None]:
+ """ Return state from tag and `markdown` attribute. One of 'block', 'span', or 'off'. """
md_attr = attrs.get('markdown', '0')
if md_attr == 'markdown':
# `<tag markdown>` is the same as `<tag markdown='1'>`.
@@ -100,7 +112,7 @@ class HTMLExtractorExtra(HTMLExtractor):
return
if tag in self.block_level_tags and (self.at_line_start() or self.intail):
- # Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
+ # Valueless attribute (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)
@@ -157,7 +169,7 @@ class HTMLExtractorExtra(HTMLExtractor):
# Check if element has a tail
if not blank_line_re.match(
self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
- # More content exists after endtag.
+ # More content exists after `endtag`.
self.intail = True
else:
# Treat orphan closing tag as a span level tag.
@@ -207,20 +219,20 @@ class HTMLExtractorExtra(HTMLExtractor):
else:
self.handle_data(self.md.htmlStash.store(data))
- def parse_pi(self, i):
+ def parse_pi(self, i: int) -> int:
if self.at_line_start() or self.intail or self.mdstack:
- # The same override exists in HTMLExtractor without the check
- # for mdstack. Therefore, use HTMLExtractor's parent instead.
+ # The same override exists in `HTMLExtractor` without the check
+ # for `mdstack`. Therefore, use parent of `HTMLExtractor` instead.
return super(HTMLExtractor, self).parse_pi(i)
# This is not the beginning of a raw block so treat as plain data
# and avoid consuming any tags which may follow (see #1066).
self.handle_data('<?')
return i + 2
- def parse_html_declaration(self, i):
+ def parse_html_declaration(self, i: int) -> int:
if self.at_line_start() or self.intail or self.mdstack:
- # The same override exists in HTMLExtractor without the check
- # for mdstack. Therefore, use HTMLExtractor's parent instead.
+ # The same override exists in `HTMLExtractor` without the check
+ # for `mdstack`. Therefore, use parent of `HTMLExtractor` instead.
return super(HTMLExtractor, self).parse_html_declaration(i)
# This is not the beginning of a raw block so treat as plain data
# and avoid consuming any tags which may follow (see #1066).
@@ -231,7 +243,7 @@ class HTMLExtractorExtra(HTMLExtractor):
class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""
- def run(self, lines):
+ def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
parser = HTMLExtractorExtra(self.md)
parser.feed(source)
@@ -240,19 +252,19 @@ class HtmlBlockPreprocessor(Preprocessor):
class MarkdownInHtmlProcessor(BlockProcessor):
- """Process Markdown Inside HTML Blocks which have been stored in the HtmlStash."""
+ """Process Markdown Inside HTML Blocks which have been stored in the `HtmlStash`."""
- def test(self, parent, block):
- # ALways return True. `run` will return `False` it not a valid match.
+ def test(self, parent: etree.Element, block: str) -> bool:
+ # Always return True. `run` will return `False` it not a valid match.
return True
- def parse_element_content(self, element):
+ def parse_element_content(self, element: etree.Element) -> None:
"""
- Recursively parse the text content of an etree Element as Markdown.
+ Recursively parse the text content of an `etree` Element as Markdown.
Any block level elements generated from the Markdown will be inserted as children of the element in place
of the text content. All `markdown` attributes are removed. For any elements in which Markdown parsing has
- been disabled, the text content of it and its chidlren are wrapped in an `AtomicString`.
+ been disabled, the text content of it and its children are wrapped in an `AtomicString`.
"""
md_attr = element.attrib.pop('markdown', 'off')
@@ -301,7 +313,7 @@ class MarkdownInHtmlProcessor(BlockProcessor):
element.insert(0, child)
elif md_attr == 'span':
- # Span level parsing will be handled by inlineprocessors.
+ # Span level parsing will be handled by inline processors.
# Walk children here to remove any `markdown` attributes.
for child in list(element):
self.parse_element_content(child)
@@ -316,7 +328,7 @@ class MarkdownInHtmlProcessor(BlockProcessor):
if child.tail:
child.tail = util.AtomicString(child.tail)
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> bool:
m = util.HTML_PLACEHOLDER_RE.match(blocks[0])
if m:
index = int(m.group(1))
@@ -329,15 +341,15 @@ class MarkdownInHtmlProcessor(BlockProcessor):
# Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
self.parser.md.htmlStash.rawHtmlBlocks.pop(index)
self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '')
- # Confirm the match to the blockparser.
+ # Confirm the match to the `blockparser`.
return True
# No match found.
return False
class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
- def stash_to_string(self, text):
- """ Override default to handle any etree elements still in the stash. """
+ def stash_to_string(self, text: str | etree.Element) -> str:
+ """ Override default to handle any `etree` elements still in the stash. """
if isinstance(text, etree.Element):
return self.md.serializer(text)
else:
@@ -352,7 +364,7 @@ class MarkdownInHtmlExtension(Extension):
# Replace raw HTML preprocessor
md.preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
- # Add blockprocessor which handles the placeholders for etree elements
+ # Add `blockprocessor` which handles the placeholders for `etree` elements
md.parser.blockprocessors.register(
MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
)
diff --git a/libs/markdown/extensions/meta.py b/libs/markdown/extensions/meta.py
index 10dee1184..cb703399b 100644
--- a/libs/markdown/extensions/meta.py
+++ b/libs/markdown/extensions/meta.py
@@ -1,24 +1,31 @@
-"""
-Meta Data Extension for Python-Markdown
-=======================================
+# Meta Data Extension for Python-Markdown
+# =======================================
-This extension adds Meta Data handling to markdown.
+# This extension adds Meta Data handling to markdown.
+
+# See https://Python-Markdown.github.io/extensions/meta_data
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/meta_data>
-for documentation.
+# Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com).
-Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com).
+# All changes Copyright 2008-2014 The Python Markdown Project
-All changes Copyright 2008-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+"""
+This extension adds Meta Data handling to markdown.
+See the [documentation](https://Python-Markdown.github.io/extensions/meta_data)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
from ..preprocessors import Preprocessor
import re
import logging
+from typing import Any
log = logging.getLogger('MARKDOWN')
@@ -33,21 +40,21 @@ class MetaExtension (Extension):
""" Meta-Data extension for Python-Markdown. """
def extendMarkdown(self, md):
- """ Add MetaPreprocessor to Markdown instance. """
+ """ Add `MetaPreprocessor` to Markdown instance. """
md.registerExtension(self)
self.md = md
md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)
- def reset(self):
+ def reset(self) -> None:
self.md.Meta = {}
class MetaPreprocessor(Preprocessor):
""" Get Meta-Data. """
- def run(self, lines):
+ def run(self, lines: list[str]) -> list[str]:
""" Parse Meta-Data and store in Markdown.Meta. """
- meta = {}
+ meta: dict[str, Any] = {}
key = None
if lines and BEGIN_RE.match(lines[0]):
lines.pop(0)
diff --git a/libs/markdown/extensions/nl2br.py b/libs/markdown/extensions/nl2br.py
index 6c7491bca..177df1ee4 100644
--- a/libs/markdown/extensions/nl2br.py
+++ b/libs/markdown/extensions/nl2br.py
@@ -1,21 +1,28 @@
-"""
-NL2BR Extension
-===============
+# `NL2BR` Extension
+# ===============
-A Python-Markdown extension to treat newlines as hard breaks; like
-GitHub-flavored Markdown does.
+# A Python-Markdown extension to treat newlines as hard breaks; like
+# GitHub-flavored Markdown does.
+
+# See https://Python-Markdown.github.io/extensions/nl2br
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/nl2br>
-for documentation.
+# Original code Copyright 2011 [Brian Neal](https://deathofagremmie.com/)
-Oringinal code Copyright 2011 [Brian Neal](https://deathofagremmie.com/)
+# All changes Copyright 2011-2014 The Python Markdown Project
-All changes Copyright 2011-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+"""
+A Python-Markdown extension to treat newlines as hard breaks; like
+GitHub-flavored Markdown does.
+See the [documentation](https://Python-Markdown.github.io/extensions/nl2br)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
from ..inlinepatterns import SubstituteTagInlineProcessor
@@ -25,6 +32,7 @@ BR_RE = r'\n'
class Nl2BrExtension(Extension):
def extendMarkdown(self, md):
+ """ Add a `SubstituteTagInlineProcessor` to Markdown. """
br_tag = SubstituteTagInlineProcessor(BR_RE, 'br')
md.inlinePatterns.register(br_tag, 'nl', 5)
diff --git a/libs/markdown/extensions/sane_lists.py b/libs/markdown/extensions/sane_lists.py
index e27eb1803..be421f943 100644
--- a/libs/markdown/extensions/sane_lists.py
+++ b/libs/markdown/extensions/sane_lists.py
@@ -1,41 +1,56 @@
-"""
-Sane List Extension for Python-Markdown
-=======================================
+# Sane List Extension for Python-Markdown
+# =======================================
-Modify the behavior of Lists in Python-Markdown to act in a sane manor.
+# Modify the behavior of Lists in Python-Markdown to act in a sane manor.
-See <https://Python-Markdown.github.io/extensions/sane_lists>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/sane_lists
+# for documentation.
-Original code Copyright 2011 [Waylan Limberg](http://achinghead.com)
+# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com)
-All changes Copyright 2011-2014 The Python Markdown Project
+# All changes Copyright 2011-2014 The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
+Modify the behavior of Lists in Python-Markdown to act in a sane manor.
+
+See [documentation](https://Python-Markdown.github.io/extensions/sane_lists)
+for details.
+"""
+
+from __future__ import annotations
from . import Extension
from ..blockprocessors import OListProcessor, UListProcessor
import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from .. import blockparser
class SaneOListProcessor(OListProcessor):
+ """ Override `SIBLING_TAGS` to not include `ul` and set `LAZY_OL` to `False`. """
SIBLING_TAGS = ['ol']
+ """ Exclude `ul` from list of siblings. """
LAZY_OL = False
+ """ Disable lazy list behavior. """
- def __init__(self, parser):
+ def __init__(self, parser: blockparser.BlockParser):
super().__init__(parser)
self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.))[ ]+(.*)' %
(self.tab_length - 1))
class SaneUListProcessor(UListProcessor):
+ """ Override `SIBLING_TAGS` to not include `ol`. """
SIBLING_TAGS = ['ul']
+ """ Exclude `ol` from list of siblings. """
- def __init__(self, parser):
+ def __init__(self, parser: blockparser.BlockParser):
super().__init__(parser)
self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' %
(self.tab_length - 1))
diff --git a/libs/markdown/extensions/smarty.py b/libs/markdown/extensions/smarty.py
index c4bfd58a0..0ce7772a7 100644
--- a/libs/markdown/extensions/smarty.py
+++ b/libs/markdown/extensions/smarty.py
@@ -1,90 +1,102 @@
-'''
-Smarty extension for Python-Markdown
-====================================
+# Smarty extension for Python-Markdown
+# ====================================
-Adds conversion of ASCII dashes, quotes and ellipses to their HTML
-entity equivalents.
+# Adds conversion of ASCII dashes, quotes and ellipses to their HTML
+# entity equivalents.
+
+# See https://Python-Markdown.github.io/extensions/smarty
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/smarty>
-for documentation.
+# Author: 2013, Dmitry Shachnev <[email protected]>
-Author: 2013, Dmitry Shachnev <[email protected]>
+# All changes Copyright 2013-2014 The Python Markdown Project
-All changes Copyright 2013-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# SmartyPants license:
-SmartyPants license:
+# Copyright (c) 2003 John Gruber <https://daringfireball.net/>
+# All rights reserved.
- Copyright (c) 2003 John Gruber <https://daringfireball.net/>
- All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
+# * Neither the name "SmartyPants" nor the names of its contributors
+# may be used to endorse or promote products derived from this
+# software without specific prior written permission.
- * Neither the name "SmartyPants" nor the names of its contributors
- may be used to endorse or promote products derived from this
- software without specific prior written permission.
+# This software is provided by the copyright holders and contributors "as
+# is" and any express or implied warranties, including, but not limited
+# to, the implied warranties of merchantability and fitness for a
+# particular purpose are disclaimed. In no event shall the copyright
+# owner or contributors be liable for any direct, indirect, incidental,
+# special, exemplary, or consequential damages (including, but not
+# limited to, procurement of substitute goods or services; loss of use,
+# data, or profits; or business interruption) however caused and on any
+# theory of liability, whether in contract, strict liability, or tort
+# (including negligence or otherwise) arising in any way out of the use
+# of this software, even if advised of the possibility of such damage.
- This software is provided by the copyright holders and contributors "as
- is" and any express or implied warranties, including, but not limited
- to, the implied warranties of merchantability and fitness for a
- particular purpose are disclaimed. In no event shall the copyright
- owner or contributors be liable for any direct, indirect, incidental,
- special, exemplary, or consequential damages (including, but not
- limited to, procurement of substitute goods or services; loss of use,
- data, or profits; or business interruption) however caused and on any
- theory of liability, whether in contract, strict liability, or tort
- (including negligence or otherwise) arising in any way out of the use
- of this software, even if advised of the possibility of such damage.
+# `smartypants.py` license:
-smartypants.py license:
+# `smartypants.py` is a derivative work of SmartyPants.
+# Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/>
- smartypants.py is a derivative work of SmartyPants.
- Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/>
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
+# This software is provided by the copyright holders and contributors "as
+# is" and any express or implied warranties, including, but not limited
+# to, the implied warranties of merchantability and fitness for a
+# particular purpose are disclaimed. In no event shall the copyright
+# owner or contributors be liable for any direct, indirect, incidental,
+# special, exemplary, or consequential damages (including, but not
+# limited to, procurement of substitute goods or services; loss of use,
+# data, or profits; or business interruption) however caused and on any
+# theory of liability, whether in contract, strict liability, or tort
+# (including negligence or otherwise) arising in any way out of the use
+# of this software, even if advised of the possibility of such damage.
- This software is provided by the copyright holders and contributors "as
- is" and any express or implied warranties, including, but not limited
- to, the implied warranties of merchantability and fitness for a
- particular purpose are disclaimed. In no event shall the copyright
- owner or contributors be liable for any direct, indirect, incidental,
- special, exemplary, or consequential damages (including, but not
- limited to, procurement of substitute goods or services; loss of use,
- data, or profits; or business interruption) however caused and on any
- theory of liability, whether in contract, strict liability, or tort
- (including negligence or otherwise) arising in any way out of the use
- of this software, even if advised of the possibility of such damage.
+"""
+Adds conversion of ASCII dashes, quotes and ellipses to their HTML
+entity equivalents.
-'''
+See the [documentation](https://Python-Markdown.github.io/extensions/smarty)
+for details.
+"""
+from __future__ import annotations
from . import Extension
from ..inlinepatterns import HtmlInlineProcessor, HTML_RE
from ..treeprocessors import InlineProcessor
from ..util import Registry
+from typing import TYPE_CHECKING, Sequence
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+ from .. import inlinepatterns
+ import re
+ import xml.etree.ElementTree as etree
# Constants for quote education.
punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
@@ -95,7 +107,7 @@ openingQuotesBase = (
r'(\s' # a whitespace char
r'|&nbsp;' # or a non-breaking space entity
r'|--' # or dashes
- r'|–|—' # or unicode
+ r'|–|—' # or Unicode
r'|&[mn]dash;' # or named dash entities
r'|&#8211;|&#8212;' # or decimal entities
r')'
@@ -139,7 +151,7 @@ openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase
# Single closing quotes:
closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass
-closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass
+closingSingleQuotesRegex2 = r"'(\s|s\b)"
# All remaining quotes should be opening ones
remainingSingleQuotesRegex = r"'"
@@ -149,13 +161,13 @@ HTML_STRICT_RE = HTML_RE + r'(?!\>)'
class SubstituteTextPattern(HtmlInlineProcessor):
- def __init__(self, pattern, replace, md):
+ def __init__(self, pattern: str, replace: Sequence[int | str | etree.Element], md: Markdown):
""" Replaces matches with some text. """
HtmlInlineProcessor.__init__(self, pattern)
self.replace = replace
self.md = md
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
result = ''
for part in self.replace:
if isinstance(part, int):
@@ -166,6 +178,7 @@ class SubstituteTextPattern(HtmlInlineProcessor):
class SmartyExtension(Extension):
+ """ Add Smarty to Markdown. """
def __init__(self, **kwargs):
self.config = {
'smart_quotes': [True, 'Educate quotes'],
@@ -174,18 +187,25 @@ class SmartyExtension(Extension):
'smart_ellipses': [True, 'Educate ellipses'],
'substitutions': [{}, 'Overwrite default substitutions'],
}
+ """ Default configuration options. """
super().__init__(**kwargs)
- self.substitutions = dict(substitutions)
+ self.substitutions: dict[str, str] = dict(substitutions)
self.substitutions.update(self.getConfig('substitutions', default={}))
- def _addPatterns(self, md, patterns, serie, priority):
+ def _addPatterns(
+ self,
+ md: Markdown,
+ patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]],
+ serie: str,
+ priority: int,
+ ):
for ind, pattern in enumerate(patterns):
pattern += (md,)
pattern = SubstituteTextPattern(*pattern)
name = 'smarty-%s-%d' % (serie, ind)
self.inlinePatterns.register(pattern, name, priority-ind)
- def educateDashes(self, md):
+ def educateDashes(self, md: Markdown) -> None:
emDashesPattern = SubstituteTextPattern(
r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
)
@@ -195,13 +215,13 @@ class SmartyExtension(Extension):
self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)
- def educateEllipses(self, md):
+ def educateEllipses(self, md: Markdown) -> None:
ellipsesPattern = SubstituteTextPattern(
r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
)
self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)
- def educateAngledQuotes(self, md):
+ def educateAngledQuotes(self, md: Markdown) -> None:
leftAngledQuotePattern = SubstituteTextPattern(
r'\<\<', (self.substitutions['left-angle-quote'],), md
)
@@ -211,7 +231,7 @@ class SmartyExtension(Extension):
self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)
- def educateQuotes(self, md):
+ def educateQuotes(self, md: Markdown) -> None:
lsquo = self.substitutions['left-single-quote']
rsquo = self.substitutions['right-single-quote']
ldquo = self.substitutions['left-double-quote']
@@ -235,14 +255,14 @@ class SmartyExtension(Extension):
def extendMarkdown(self, md):
configs = self.getConfigs()
- self.inlinePatterns = Registry()
+ self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry()
if configs['smart_ellipses']:
self.educateEllipses(md)
if configs['smart_quotes']:
self.educateQuotes(md)
if configs['smart_angled_quotes']:
self.educateAngledQuotes(md)
- # Override HTML_RE from inlinepatterns.py so that it does not
+ # Override `HTML_RE` from `inlinepatterns.py` so that it does not
# process tags with duplicate closing quotes.
md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90)
if configs['smart_dashes']:
diff --git a/libs/markdown/extensions/tables.py b/libs/markdown/extensions/tables.py
index c8b1024a5..6e2fa1742 100644
--- a/libs/markdown/extensions/tables.py
+++ b/libs/markdown/extensions/tables.py
@@ -1,24 +1,35 @@
-"""
-Tables Extension for Python-Markdown
-====================================
+# Tables Extension for Python-Markdown
+# ====================================
-Added parsing of tables to Python-Markdown.
+# Added parsing of tables to Python-Markdown.
-See <https://Python-Markdown.github.io/extensions/tables>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/tables
+# for documentation.
-Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
+# Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
-All changes Copyright 2008-2014 The Python Markdown Project
+# All changes Copyright 2008-2014 The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
+
+"""
+Added parsing of tables to Python-Markdown.
+See the [documentation](https://Python-Markdown.github.io/extensions/tables)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
from ..blockprocessors import BlockProcessor
import xml.etree.ElementTree as etree
import re
+from typing import TYPE_CHECKING, Any, Sequence
+
+if TYPE_CHECKING: # pragma: no cover
+ from .. import blockparser
+
PIPE_NONE = 0
PIPE_LEFT = 1
PIPE_RIGHT = 2
@@ -30,14 +41,14 @@ class TableProcessor(BlockProcessor):
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
- def __init__(self, parser, config):
- self.border = False
- self.separator = ''
+ def __init__(self, parser: blockparser.BlockParser, config: dict[str, Any]):
+ self.border: bool | int = False
+ self.separator: Sequence[str] = ''
self.config = config
super().__init__(parser)
- def test(self, parent, block):
+ def test(self, parent: etree.Element, block: str) -> bool:
"""
Ensure first two rows (column header and separator row) are valid table rows.
@@ -73,14 +84,14 @@ class TableProcessor(BlockProcessor):
return is_table
- def run(self, parent, blocks):
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
""" Parse a table block and build table. """
block = blocks.pop(0).split('\n')
header = block[0].strip(' ')
rows = [] if len(block) < 3 else block[2:]
# Get alignment of columns
- align = []
+ align: list[str | None] = []
for c in self.separator:
c = c.strip(' ')
if c.startswith(':') and c.endswith(':'):
@@ -104,7 +115,7 @@ class TableProcessor(BlockProcessor):
for row in rows:
self._build_row(row.strip(' '), tbody, align)
- def _build_empty_row(self, parent, align):
+ def _build_empty_row(self, parent: etree.Element, align: Sequence[str | None]) -> None:
"""Build an empty row."""
tr = etree.SubElement(parent, 'tr')
count = len(align)
@@ -112,7 +123,7 @@ class TableProcessor(BlockProcessor):
etree.SubElement(tr, 'td')
count -= 1
- def _build_row(self, row, parent, align):
+ def _build_row(self, row: str, parent: etree.Element, align: Sequence[str | None]) -> None:
""" Given a row of text, build table cells. """
tr = etree.SubElement(parent, 'tr')
tag = 'td'
@@ -133,7 +144,7 @@ class TableProcessor(BlockProcessor):
else:
c.set('style', f'text-align: {a};')
- def _split_row(self, row):
+ def _split_row(self, row: str) -> list[str]:
""" split a row of text into list of cells. """
if self.border:
if row.startswith('|'):
@@ -141,7 +152,7 @@ class TableProcessor(BlockProcessor):
row = self.RE_END_BORDER.sub('', row)
return self._split(row)
- def _split(self, row):
+ def _split(self, row: str) -> list[str]:
""" split a row of text with some code into a list of cells. """
elements = []
pipes = []
@@ -221,11 +232,12 @@ class TableExtension(Extension):
self.config = {
'use_align_attribute': [False, 'True to use align attribute instead of style.'],
}
+ """ Default configuration options. """
super().__init__(**kwargs)
def extendMarkdown(self, md):
- """ Add an instance of TableProcessor to BlockParser. """
+ """ Add an instance of `TableProcessor` to `BlockParser`. """
if '|' not in md.ESCAPED_CHARS:
md.ESCAPED_CHARS.append('|')
processor = TableProcessor(md.parser, self.getConfigs())
diff --git a/libs/markdown/extensions/toc.py b/libs/markdown/extensions/toc.py
index 1ded18d63..a17d7241c 100644
--- a/libs/markdown/extensions/toc.py
+++ b/libs/markdown/extensions/toc.py
@@ -1,18 +1,24 @@
-"""
-Table of Contents Extension for Python-Markdown
-===============================================
+# Table of Contents Extension for Python-Markdown
+# ===============================================
+
+# See https://Python-Markdown.github.io/extensions/toc
+# for documentation.
-See <https://Python-Markdown.github.io/extensions/toc>
-for documentation.
+# Original code Copyright 2008 [Jack Miller](https://codezen.org/)
-Oringinal code Copyright 2008 [Jack Miller](https://codezen.org/)
+# All changes Copyright 2008-2014 The Python Markdown Project
-All changes Copyright 2008-2014 The Python Markdown Project
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+"""
+Add table of contents support to Python-Markdown.
+See the [documentation](https://Python-Markdown.github.io/extensions/toc)
+for details.
"""
+from __future__ import annotations
+
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
@@ -21,19 +27,23 @@ import re
import html
import unicodedata
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Any, Iterator, MutableSet
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
-def slugify(value, separator, unicode=False):
+def slugify(value: str, separator: str, unicode: bool = False) -> str:
""" Slugify a string, to make it URL friendly. """
if not unicode:
- # Replace Extended Latin characters with ASCII, i.e. žlutý → zluty
+ # Replace Extended Latin characters with ASCII, i.e. `žlutý` => `zluty`
value = unicodedata.normalize('NFKD', value)
value = value.encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
return re.sub(r'[{}\s]+'.format(separator), separator, value)
-def slugify_unicode(value, separator):
+def slugify_unicode(value: str, separator: str) -> str:
""" Slugify a string, to make it URL friendly while preserving Unicode characters. """
return slugify(value, separator, unicode=True)
@@ -41,7 +51,7 @@ def slugify_unicode(value, separator):
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
-def unique(id, ids):
+def unique(id: str, ids: MutableSet[str]) -> str:
""" Ensure id is unique in set of ids. Append '_1', '_2'... if not """
while id in ids or not id:
m = IDCOUNT_RE.match(id)
@@ -53,7 +63,7 @@ def unique(id, ids):
return id
-def get_name(el):
+def get_name(el: etree.Element) -> str:
"""Get title name."""
text = []
@@ -65,9 +75,9 @@ def get_name(el):
return ''.join(text).strip()
-def stashedHTML2text(text, md, strip_entities=True):
+def stashedHTML2text(text: str, md: Markdown, strip_entities: bool = True) -> str:
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
- def _html_sub(m):
+ def _html_sub(m: re.Match[str]) -> str:
""" Substitute raw html with plain text. """
try:
raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
@@ -82,7 +92,7 @@ def stashedHTML2text(text, md, strip_entities=True):
return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
-def unescape(text):
+def unescape(text: str) -> str:
""" Unescape escaped text. """
c = UnescapeTreeprocessor()
return c.unescape(text)
@@ -90,14 +100,16 @@ def unescape(text):
def nest_toc_tokens(toc_list):
"""Given an unsorted list with errors and skips, return a nested one.
- [{'level': 1}, {'level': 2}]
- =>
- [{'level': 1, 'children': [{'level': 2, 'children': []}]}]
+
+ [{'level': 1}, {'level': 2}]
+ =>
+ [{'level': 1, 'children': [{'level': 2, 'children': []}]}]
A wrong list is also converted:
- [{'level': 2}, {'level': 1}]
- =>
- [{'level': 2, 'children': []}, {'level': 1, 'children': []}]
+
+ [{'level': 2}, {'level': 1}]
+ =>
+ [{'level': 2, 'children': []}, {'level': 1, 'children': []}]
"""
ordered_list = []
@@ -152,22 +164,26 @@ def nest_toc_tokens(toc_list):
class TocTreeprocessor(Treeprocessor):
- def __init__(self, md, config):
+ """ Step through document and build TOC. """
+
+ def __init__(self, md: Markdown, config: dict[str, Any]):
super().__init__(md)
- self.marker = config["marker"]
- self.title = config["title"]
+ self.marker: str = config["marker"]
+ self.title: str = config["title"]
self.base_level = int(config["baselevel"]) - 1
self.slugify = config["slugify"]
self.sep = config["separator"]
self.toc_class = config["toc_class"]
- self.use_anchors = parseBoolValue(config["anchorlink"])
- self.anchorlink_class = config["anchorlink_class"]
+ self.title_class: str = config["title_class"]
+ self.use_anchors: bool = parseBoolValue(config["anchorlink"])
+ self.anchorlink_class: str = config["anchorlink_class"]
self.use_permalinks = parseBoolValue(config["permalink"], False)
if self.use_permalinks is None:
self.use_permalinks = config["permalink"]
- self.permalink_class = config["permalink_class"]
- self.permalink_title = config["permalink_title"]
+ self.permalink_class: str = config["permalink_class"]
+ self.permalink_title: str = config["permalink_title"]
+ self.permalink_leading: bool | None = parseBoolValue(config["permalink_leading"], False)
self.header_rgx = re.compile("[Hh][123456]")
if isinstance(config["toc_depth"], str) and '-' in config["toc_depth"]:
self.toc_top, self.toc_bottom = [int(x) for x in config["toc_depth"].split('-')]
@@ -175,46 +191,46 @@ class TocTreeprocessor(Treeprocessor):
self.toc_top = 1
self.toc_bottom = int(config["toc_depth"])
- def iterparent(self, node):
- ''' Iterator wrapper to get allowed parent and child all at once. '''
+ def iterparent(self, node: etree.Element) -> Iterator[tuple[etree.Element, etree.Element]]:
+ """ Iterator wrapper to get allowed parent and child all at once. """
# We do not allow the marker inside a header as that
- # would causes an enless loop of placing a new TOC
+ # would causes an endless loop of placing a new TOC
# inside previously generated TOC.
for child in node:
if not self.header_rgx.match(child.tag) and child.tag not in ['pre', 'code']:
yield node, child
yield from self.iterparent(child)
- def replace_marker(self, root, elem):
- ''' Replace marker with elem. '''
+ def replace_marker(self, root: etree.Element, elem: etree.Element) -> None:
+ """ Replace marker with elem. """
for (p, c) in self.iterparent(root):
text = ''.join(c.itertext()).strip()
if not text:
continue
# To keep the output from screwing up the
- # validation by putting a <div> inside of a <p>
- # we actually replace the <p> in its entirety.
+ # validation by putting a `<div>` inside of a `<p>`
+ # we actually replace the `<p>` in its entirety.
- # The <p> element may contain more than a single text content
- # (nl2br can introduce a <br>). In this situation, c.text returns
+ # The `<p>` element may contain more than a single text content
+ # (`nl2br` can introduce a `<br>`). In this situation, `c.text` returns
# the very first content, ignore children contents or tail content.
- # len(c) == 0 is here to ensure there is only text in the <p>.
+ # `len(c) == 0` is here to ensure there is only text in the `<p>`.
if c.text and c.text.strip() == self.marker and len(c) == 0:
for i in range(len(p)):
if p[i] == c:
p[i] = elem
break
- def set_level(self, elem):
- ''' Adjust header level according to base level. '''
+ def set_level(self, elem: etree.Element) -> None:
+ """ Adjust header level according to base level. """
level = int(elem.tag[-1]) + self.base_level
if level > 6:
level = 6
elem.tag = 'h%d' % level
- def add_anchor(self, c, elem_id): # @ReservedAssignment
+ def add_anchor(self, c: etree.Element, elem_id: str) -> None:
anchor = etree.Element("a")
anchor.text = c.text
anchor.attrib["href"] = "#" + elem_id
@@ -226,7 +242,7 @@ class TocTreeprocessor(Treeprocessor):
c.remove(c[0])
c.append(anchor)
- def add_permalink(self, c, elem_id):
+ def add_permalink(self, c: etree.Element, elem_id: str) -> None:
permalink = etree.Element("a")
permalink.text = ("%spara;" % AMP_SUBSTITUTE
if self.use_permalinks is True
@@ -235,9 +251,14 @@ class TocTreeprocessor(Treeprocessor):
permalink.attrib["class"] = self.permalink_class
if self.permalink_title:
permalink.attrib["title"] = self.permalink_title
- c.append(permalink)
+ if self.permalink_leading:
+ permalink.tail = c.text
+ c.text = ""
+ c.insert(0, permalink)
+ else:
+ c.append(permalink)
- def build_toc_div(self, toc_list):
+ def build_toc_div(self, toc_list: list) -> etree.Element:
""" Return a string div given a toc list. """
div = etree.Element("div")
div.attrib["class"] = self.toc_class
@@ -245,10 +266,11 @@ class TocTreeprocessor(Treeprocessor):
# Add title to the div
if self.title:
header = etree.SubElement(div, "span")
- header.attrib["class"] = "toctitle"
+ if self.title_class:
+ header.attrib["class"] = self.title_class
header.text = self.title
- def build_etree_ul(toc_list, parent):
+ def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element:
ul = etree.SubElement(parent, "ul")
for item in toc_list:
# List item link, to be inserted into the toc div
@@ -267,7 +289,7 @@ class TocTreeprocessor(Treeprocessor):
return div
- def run(self, doc):
+ def run(self, doc: etree.Element) -> None:
# Get a list of id attributes
used_ids = set()
for el in doc.iter():
@@ -289,10 +311,10 @@ class TocTreeprocessor(Treeprocessor):
toc_tokens.append({
'level': int(el.tag[-1]),
'id': el.attrib["id"],
- 'name': stashedHTML2text(
+ 'name': unescape(stashedHTML2text(
code_escape(el.attrib.get('data-toc-label', text)),
self.md, strip_entities=False
- )
+ ))
})
# Remove the data-toc-label attribute as it is no longer needed
@@ -323,59 +345,65 @@ class TocExtension(Extension):
def __init__(self, **kwargs):
self.config = {
- "marker": ['[TOC]',
- 'Text to find and replace with Table of Contents - '
- 'Set to an empty string to disable. Defaults to "[TOC]"'],
- "title": ["",
- "Title to insert into TOC <div> - "
- "Defaults to an empty string"],
- "toc_class": ['toc',
- 'CSS class(es) used for the link. '
- 'Defaults to "toclink"'],
- "anchorlink": [False,
- "True if header should be a self link - "
- "Defaults to False"],
- "anchorlink_class": ['toclink',
- 'CSS class(es) used for the link. '
- 'Defaults to "toclink"'],
- "permalink": [0,
- "True or link text if a Sphinx-style permalink should "
- "be added - Defaults to False"],
- "permalink_class": ['headerlink',
- 'CSS class(es) used for the link. '
- 'Defaults to "headerlink"'],
- "permalink_title": ["Permanent link",
- "Title attribute of the permalink - "
- "Defaults to 'Permanent link'"],
- "baselevel": ['1', 'Base level for headers.'],
- "slugify": [slugify,
- "Function to generate anchors based on header text - "
- "Defaults to the headerid ext's slugify function."],
- 'separator': ['-', 'Word separator. Defaults to "-".'],
- "toc_depth": [6,
- 'Define the range of section levels to include in'
- 'the Table of Contents. A single integer (b) defines'
- 'the bottom section level (<h1>..<hb>) only.'
- 'A string consisting of two digits separated by a hyphen'
- 'in between ("2-5"), define the top (t) and the'
- 'bottom (b) (<ht>..<hb>). Defaults to `6` (bottom).'],
+ 'marker': [
+ '[TOC]',
+ 'Text to find and replace with Table of Contents. Set to an empty string to disable. '
+ 'Default: `[TOC]`.'
+ ],
+ 'title': [
+ '', 'Title to insert into TOC `<div>`. Default: an empty string.'
+ ],
+ 'title_class': [
+ 'toctitle', 'CSS class used for the title. Default: `toctitle`.'
+ ],
+ 'toc_class': [
+ 'toc', 'CSS class(es) used for the link. Default: `toclink`.'
+ ],
+ 'anchorlink': [
+ False, 'True if header should be a self link. Default: `False`.'
+ ],
+ 'anchorlink_class': [
+ 'toclink', 'CSS class(es) used for the link. Defaults: `toclink`.'
+ ],
+ 'permalink': [
+ 0, 'True or link text if a Sphinx-style permalink should be added. Default: `False`.'
+ ],
+ 'permalink_class': [
+ 'headerlink', 'CSS class(es) used for the link. Default: `headerlink`.'
+ ],
+ 'permalink_title': [
+ 'Permanent link', 'Title attribute of the permalink. Default: `Permanent link`.'
+ ],
+ 'permalink_leading': [
+ False,
+ 'True if permalinks should be placed at start of the header, rather than end. Default: False.'
+ ],
+ 'baselevel': ['1', 'Base level for headers. Default: `1`.'],
+ 'slugify': [
+ slugify, 'Function to generate anchors based on header text. Default: `slugify`.'
+ ],
+ 'separator': ['-', 'Word separator. Default: `-`.'],
+ 'toc_depth': [
+ 6,
+ 'Define the range of section levels to include in the Table of Contents. A single integer '
+ '(b) defines the bottom section level (<h1>..<hb>) only. A string consisting of two digits '
+ 'separated by a hyphen in between (`2-5`) defines the top (t) and the bottom (b) (<ht>..<hb>). '
+ 'Default: `6` (bottom).'
+ ],
}
+ """ Default configuration options. """
super().__init__(**kwargs)
def extendMarkdown(self, md):
+ """ Add TOC tree processor to Markdown. """
md.registerExtension(self)
self.md = md
self.reset()
tocext = self.TreeProcessorClass(md, self.getConfigs())
- # Headerid ext is set to '>prettify'. With this set to '_end',
- # it should always come after headerid ext (and honor ids assigned
- # by the header id extension) if both are used. Same goes for
- # attr_list extension. This must come last because we don't want
- # to redefine ids after toc is created. But we do want toc prettified.
md.treeprocessors.register(tocext, 'toc', 5)
- def reset(self):
+ def reset(self) -> None:
self.md.toc = ''
self.md.toc_tokens = []
diff --git a/libs/markdown/extensions/wikilinks.py b/libs/markdown/extensions/wikilinks.py
index cddee7ad7..3f3cbe2dd 100644
--- a/libs/markdown/extensions/wikilinks.py
+++ b/libs/markdown/extensions/wikilinks.py
@@ -1,33 +1,41 @@
-'''
-WikiLinks Extension for Python-Markdown
-======================================
+# WikiLinks Extension for Python-Markdown
+# ======================================
-Converts [[WikiLinks]] to relative links.
+# Converts [[WikiLinks]] to relative links.
-See <https://Python-Markdown.github.io/extensions/wikilinks>
-for documentation.
+# See https://Python-Markdown.github.io/extensions/wikilinks
+# for documentation.
-Original code Copyright [Waylan Limberg](http://achinghead.com/).
+# Original code Copyright [Waylan Limberg](http://achinghead.com/).
-All changes Copyright The Python Markdown Project
+# All changes Copyright The Python Markdown Project
-License: [BSD](https://opensource.org/licenses/bsd-license.php)
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
-'''
+"""
+Converts `[[WikiLinks]]` to relative links.
+
+See the [documentation](https://Python-Markdown.github.io/extensions/wikilinks)
+for details.
+"""
+
+from __future__ import annotations
from . import Extension
from ..inlinepatterns import InlineProcessor
import xml.etree.ElementTree as etree
import re
+from typing import Any
-def build_url(label, base, end):
- """ Build a url from the label, a base, and an end. """
+def build_url(label: str, base: str, end: str) -> str:
+ """ Build a URL from the label, a base, and an end. """
clean_label = re.sub(r'([ ]+_)|(_[ ]+)|([ ]+)', '_', label)
return '{}{}{}'.format(base, clean_label, end)
class WikiLinkExtension(Extension):
+ """ Add inline processor to Markdown. """
def __init__(self, **kwargs):
self.config = {
@@ -36,7 +44,7 @@ class WikiLinkExtension(Extension):
'html_class': ['wikilink', 'CSS hook. Leave blank for none.'],
'build_url': [build_url, 'Callable formats URL from label.'],
}
-
+ """ Default configuration options. """
super().__init__(**kwargs)
def extendMarkdown(self, md):
@@ -50,11 +58,13 @@ class WikiLinkExtension(Extension):
class WikiLinksInlineProcessor(InlineProcessor):
- def __init__(self, pattern, config):
+ """ Build link from `wikilink`. """
+
+ def __init__(self, pattern: str, config: dict[str, Any]):
super().__init__(pattern)
self.config = config
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]:
if m.group(1).strip():
base_url, end_url, html_class = self._getMeta()
label = m.group(1).strip()
@@ -68,8 +78,8 @@ class WikiLinksInlineProcessor(InlineProcessor):
a = ''
return a, m.start(0), m.end(0)
- def _getMeta(self):
- """ Return meta data or config data. """
+ def _getMeta(self) -> tuple[str, str, str]:
+ """ Return meta data or `config` data. """
base_url = self.config['base_url']
end_url = self.config['end_url']
html_class = self.config['html_class']
diff --git a/libs/markdown/htmlparser.py b/libs/markdown/htmlparser.py
index 3512d1a77..33b918d54 100644
--- a/libs/markdown/htmlparser.py
+++ b/libs/markdown/htmlparser.py
@@ -1,27 +1,37 @@
-"""
-Python Markdown
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
-A Python implementation of John Gruber's Markdown.
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-Copyright 2007-2020 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# License: BSD (see LICENSE.md for details).
-License: BSD (see LICENSE.md for details).
"""
+This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
+A copy is imported rather than the module being directly imported as this ensures that the user can import
+and use the unmodified library for their own needs.
+"""
+
+from __future__ import annotations
import re
import importlib.util
import sys
+from typing import TYPE_CHECKING, Sequence
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
@@ -31,15 +41,15 @@ htmlparser = importlib.util.module_from_spec(spec)
spec.loader.exec_module(htmlparser)
sys.modules['htmlparser'] = htmlparser
-# Monkeypatch HTMLParser to only accept `?>` to close Processing Instructions.
+# Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions.
htmlparser.piclose = re.compile(r'\?>')
-# Monkeypatch HTMLParser to only recognize entity references with a closing semicolon.
+# Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon.
htmlparser.entityref = re.compile(r'&([a-zA-Z][-.a-zA-Z0-9]*);')
-# Monkeypatch HTMLParser to no longer support partial entities. We are always feeding a complete block,
-# so the 'incomplete' functionality is unnecessary. As the entityref regex is run right before incomplete,
+# Monkeypatch `HTMLParser` to no longer support partial entities. We are always feeding a complete block,
+# so the 'incomplete' functionality is unnecessary. As the `entityref` regex is run right before incomplete,
# and the two regex are the same, then incomplete will simply never match and we avoid the logic within.
htmlparser.incomplete = htmlparser.entityref
-# Monkeypatch HTMLParser to not accept a backtick in a tag name, attribute name, or bare value.
+# Monkeypatch `HTMLParser` to not accept a backtick in a tag name, attribute name, or bare value.
htmlparser.locatestarttagend_tolerant = re.compile(r"""
<[a-zA-Z][^`\t\n\r\f />\x00]* # tag name <= added backtick here
(?:[\s/]* # optional whitespace before attribute name
@@ -65,17 +75,20 @@ class HTMLExtractor(htmlparser.HTMLParser):
"""
Extract raw HTML from text.
- The raw HTML is stored in the `htmlStash` of the Markdown instance passed
- to `md` and the remaining text is stored in `cleandoc` as a list of strings.
+ The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
+ [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
+ is stored in `cleandoc` as a list of strings.
"""
- def __init__(self, md, *args, **kwargs):
+ def __init__(self, md: Markdown, *args, **kwargs):
if 'convert_charrefs' not in kwargs:
kwargs['convert_charrefs'] = False
# Block tags that should contain no content (self closing)
self.empty_tags = set(['hr'])
+ self.lineno_start_cache = [0]
+
# This calls self.reset
super().__init__(*args, **kwargs)
self.md = md
@@ -84,9 +97,11 @@ class HTMLExtractor(htmlparser.HTMLParser):
"""Reset this instance. Loses all unprocessed data."""
self.inraw = False
self.intail = False
- self.stack = [] # When inraw==True, stack contains a list of tags
- self._cache = []
- self.cleandoc = []
+ self.stack: list[str] = [] # When `inraw==True`, stack contains a list of tags
+ self._cache: list[str] = []
+ self.cleandoc: list[str] = []
+ self.lineno_start_cache = [0]
+
super().reset()
def close(self):
@@ -105,19 +120,19 @@ class HTMLExtractor(htmlparser.HTMLParser):
self._cache = []
@property
- def line_offset(self):
- """Returns char index in self.rawdata for the start of the current line. """
- if self.lineno > 1 and '\n' in self.rawdata:
- m = re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata)
- if m:
- return m.end()
- else: # pragma: no cover
- # Value of self.lineno must exceed total number of lines.
- # Find index of beginning of last line.
- return self.rawdata.rfind('\n')
- return 0
-
- def at_line_start(self):
+ def line_offset(self) -> int:
+ """Returns char index in `self.rawdata` for the start of the current line. """
+ for ii in range(len(self.lineno_start_cache)-1, self.lineno-1):
+ last_line_start_pos = self.lineno_start_cache[ii]
+ lf_pos = self.rawdata.find('\n', last_line_start_pos)
+ if lf_pos == -1:
+ # No more newlines found. Use end of raw data as start of line beyond end.
+ lf_pos = len(self.rawdata)
+ self.lineno_start_cache.append(lf_pos+1)
+
+ return self.lineno_start_cache[self.lineno-1]
+
+ def at_line_start(self) -> bool:
"""
Returns True if current position is at start of line.
@@ -130,7 +145,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
# Confirm up to first 3 chars are whitespace
return self.rawdata[self.line_offset:self.line_offset + self.offset].strip() == ''
- def get_endtag_text(self, tag):
+ def get_endtag_text(self, tag: str) -> str:
"""
Returns the text of the end tag.
@@ -145,7 +160,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
# Failed to extract from raw data. Assume well formed and lowercase.
return '</{}>'.format(tag)
- def handle_starttag(self, tag, attrs):
+ def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]):
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
self.handle_startendtag(tag, attrs)
@@ -166,7 +181,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
# This is presumably a standalone tag in a code span (see #1036).
self.clear_cdata_mode()
- def handle_endtag(self, tag):
+ def handle_endtag(self, tag: str):
text = self.get_endtag_text(tag)
if self.inraw:
@@ -182,7 +197,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
# Preserve blank line and end of raw block.
self._cache.append('\n')
else:
- # More content exists after endtag.
+ # More content exists after `endtag`.
self.intail = True
# Reset stack.
self.inraw = False
@@ -193,7 +208,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
else:
self.cleandoc.append(text)
- def handle_data(self, data):
+ def handle_data(self, data: str):
if self.intail and '\n' in data:
self.intail = False
if self.inraw:
@@ -201,7 +216,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
else:
self.cleandoc.append(data)
- def handle_empty_tag(self, data, is_block):
+ def handle_empty_tag(self, data: str, is_block: bool):
""" Handle empty tags (`<data>`). """
if self.inraw or self.intail:
# Append this to the existing raw block
@@ -224,29 +239,29 @@ class HTMLExtractor(htmlparser.HTMLParser):
else:
self.cleandoc.append(data)
- def handle_startendtag(self, tag, attrs):
+ def handle_startendtag(self, tag: str, attrs):
self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag))
- def handle_charref(self, name):
+ def handle_charref(self, name: str):
self.handle_empty_tag('&#{};'.format(name), is_block=False)
- def handle_entityref(self, name):
+ def handle_entityref(self, name: str):
self.handle_empty_tag('&{};'.format(name), is_block=False)
- def handle_comment(self, data):
+ def handle_comment(self, data: str):
self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)
- def handle_decl(self, data):
+ def handle_decl(self, data: str):
self.handle_empty_tag('<!{}>'.format(data), is_block=True)
- def handle_pi(self, data):
+ def handle_pi(self, data: str):
self.handle_empty_tag('<?{}?>'.format(data), is_block=True)
- def unknown_decl(self, data):
+ def unknown_decl(self, data: str):
end = ']]>' if data.startswith('CDATA[') else ']>'
self.handle_empty_tag('<![{}{}'.format(data, end), is_block=True)
- def parse_pi(self, i):
+ def parse_pi(self, i: int) -> int:
if self.at_line_start() or self.intail:
return super().parse_pi(i)
# This is not the beginning of a raw block so treat as plain data
@@ -254,7 +269,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
self.handle_data('<?')
return i + 2
- def parse_html_declaration(self, i):
+ def parse_html_declaration(self, i: int) -> int:
if self.at_line_start() or self.intail:
return super().parse_html_declaration(i)
# This is not the beginning of a raw block so treat as plain data
@@ -262,17 +277,26 @@ class HTMLExtractor(htmlparser.HTMLParser):
self.handle_data('<!')
return i + 2
+ def parse_bogus_comment(self, i: int, report: int = 0) -> int:
+ # Override the default behavior so that bogus comments get passed
+ # through unaltered by setting `report` to `0` (see #1425).
+ pos = super().parse_bogus_comment(i, report)
+ if pos == -1: # pragma: no cover
+ return -1
+ self.handle_empty_tag(self.rawdata[i:pos], is_block=False)
+ return pos
+
# The rest has been copied from base class in standard lib to address #1036.
- # As __startag_text is private, all references to it must be in this subclass.
- # The last few lines of parse_starttag are reversed so that handle_starttag
- # can override cdata_mode in certain situations (in a code span).
- __starttag_text = None
+ # As `__startag_text` is private, all references to it must be in this subclass.
+ # The last few lines of `parse_starttag` are reversed so that `handle_starttag`
+ # can override `cdata_mode` in certain situations (in a code span).
+ __starttag_text: str | None = None
- def get_starttag_text(self):
- """Return full source of start tag: '<...>'."""
+ def get_starttag_text(self) -> str:
+ """Return full source of start tag: `<...>`."""
return self.__starttag_text
- def parse_starttag(self, i): # pragma: no cover
+ def parse_starttag(self, i: int) -> int: # pragma: no cover
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
@@ -280,7 +304,7 @@ class HTMLExtractor(htmlparser.HTMLParser):
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
- # Now parse the data between i+1 and j into a tag and attrs
+ # Now parse the data between `i+1` and `j` into a tag and `attrs`
attrs = []
match = htmlparser.tagfind_tolerant.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
@@ -313,10 +337,10 @@ class HTMLExtractor(htmlparser.HTMLParser):
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
- # XHTML-style empty tag: <span attr="value" />
+ # XHTML-style empty tag: `<span attr="value" />`
self.handle_startendtag(tag, attrs)
else:
- # *** set cdata_mode first so we can override it in handle_starttag (see #1036) ***
+ # *** set `cdata_mode` first so we can override it in `handle_starttag` (see #1036) ***
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
self.handle_starttag(tag, attrs)
diff --git a/libs/markdown/inlinepatterns.py b/libs/markdown/inlinepatterns.py
index eb313bd40..3d366ad96 100644
--- a/libs/markdown/inlinepatterns.py
+++ b/libs/markdown/inlinepatterns.py
@@ -1,77 +1,74 @@
-"""
-Python Markdown
+# Python Markdown
-A Python implementation of John Gruber's Markdown.
+# A Python implementation of John Gruber's Markdown.
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see LICENSE.md for details).
+# License: BSD (see LICENSE.md for details).
-INLINE PATTERNS
-=============================================================================
+"""
+In version 3.0, a new, more flexible inline processor was added, [`markdown.inlinepatterns.InlineProcessor`][]. The
+original inline patterns, which inherit from [`markdown.inlinepatterns.Pattern`][] or one of its children are still
+supported, though users are encouraged to migrate.
-Inline patterns such as *emphasis* are handled by means of auxiliary
-objects, one per pattern. Pattern objects must be instances of classes
-that extend markdown.Pattern. Each pattern object uses a single regular
-expression and needs support the following methods:
+The new `InlineProcessor` provides two major enhancements to `Patterns`:
- pattern.getCompiledRegExp() # returns a regular expression
+1. Inline Processors no longer need to match the entire block, so regular expressions no longer need to start with
+ `r'^(.*?)'` and end with `r'(.*?)%'`. This runs faster. The returned [`Match`][re.Match] object will only contain
+ what is explicitly matched in the pattern, and extension pattern groups now start with `m.group(1)`.
- pattern.handleMatch(m) # takes a match object and returns
- # an ElementTree element or just plain text
+2. The `handleMatch` method now takes an additional input called `data`, which is the entire block under analysis,
+ not just what is matched with the specified pattern. The method now returns the element *and* the indexes relative
+ to `data` that the return element is replacing (usually `m.start(0)` and `m.end(0)`). If the boundaries are
+ returned as `None`, it is assumed that the match did not take place, and nothing will be altered in `data`.
-All of python markdown's built-in patterns subclass from Pattern,
-but you can add additional patterns that don't.
+ This allows handling of more complex constructs than regular expressions can handle, e.g., matching nested
+ brackets, and explicit control of the span "consumed" by the processor.
-Also note that all the regular expressions used by inline must
-capture the whole block. For this reason, they all start with
-'^(.*)' and end with '(.*)!'. In case with built-in expression
-Pattern takes care of adding the "^(.*)" and "(.*)!".
+"""
-Finally, the order in which regular expressions are applied is very
-important - e.g. if we first replace http://.../ links with <a> tags
-and _then_ try to replace inline html, we would end up with a mess.
-So, we apply the expressions in the following order:
+from __future__ import annotations
-* escape and backticks have to go before everything else, so
- that we can preempt any markdown patterns by escaping them.
+from . import util
+from typing import TYPE_CHECKING, Any, Collection, NamedTuple
+import re
+import xml.etree.ElementTree as etree
+from html import entities
-* then we handle auto-links (must be done before inline html)
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
-* then we handle inline HTML. At this point we will simply
- replace all inline HTML strings with a placeholder and add
- the actual HTML to a hash.
-* then inline images (must be done before links)
+def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]:
+ """
+ Build the default set of inline patterns for Markdown.
-* then bracketed links, first regular then reference-style
+ The order in which processors and/or patterns are applied is very important - e.g. if we first replace
+ `http://.../` links with `<a>` tags and _then_ try to replace inline HTML, we would end up with a mess. So, we
+ apply the expressions in the following order:
-* finally we apply strong and emphasis
-"""
+ * backticks and escaped characters have to be handled before everything else so that we can preempt any markdown
+ patterns by escaping them;
-from . import util
-from collections import namedtuple
-import re
-import xml.etree.ElementTree as etree
-try: # pragma: no cover
- from html import entities
-except ImportError: # pragma: no cover
- import htmlentitydefs as entities
+ * then we handle the various types of links (auto-links must be handled before inline HTML);
+
+ * then we handle inline HTML. At this point we will simply replace all inline HTML strings with a placeholder
+ and add the actual HTML to a stash;
+ * finally we apply strong, emphasis, etc.
-def build_inlinepatterns(md, **kwargs):
- """ Build the default set of inline patterns for Markdown. """
+ """
inlinePatterns = util.Registry()
inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190)
inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180)
@@ -98,81 +95,80 @@ def build_inlinepatterns(md, **kwargs):
return inlinePatterns
-"""
-The actual regular expressions for patterns
------------------------------------------------------------------------------
-"""
+# The actual regular expressions for patterns
+# -----------------------------------------------------------------------------
NOIMG = r'(?<!\!)'
+""" Match not an image. Partial regular expression which matches if not preceded by `!`. """
-# `e=f()` or ``e=f("`")``
BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))'
+""" Match backtick quoted string (`` `e=f()` `` or ``` ``e=f("`")`` ```). """
-# \<
ESCAPE_RE = r'\\(.)'
+""" Match a backslash escaped character (`\\<` or `\\*`). """
-# *emphasis*
EMPHASIS_RE = r'(\*)([^\*]+)\1'
+""" Match emphasis with an asterisk (`*emphasis*`). """
-# **strong**
STRONG_RE = r'(\*{2})(.+?)\1'
+""" Match strong with an asterisk (`**strong**`). """
-# __smart__strong__
SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)'
+""" Match strong with underscore while ignoring middle word underscores (`__smart__strong__`). """
-# _smart_emphasis_
SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
+""" Match emphasis with underscore while ignoring middle word underscores (`_smart_emphasis_`). """
-# __strong _em__
SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)'
+""" Match strong emphasis with underscores (`__strong _em__`). """
-# ***strongem*** or ***em*strong**
EM_STRONG_RE = r'(\*)\1{2}(.+?)\1(.*?)\1{2}'
+""" Match emphasis strong with asterisk (`***strongem***` or `***em*strong**`). """
-# ___strongem___ or ___em_strong__
EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}'
+""" Match emphasis strong with underscores (`___emstrong___` or `___em_strong__`). """
-# ***strong**em*
STRONG_EM_RE = r'(\*)\1{2}(.+?)\1{2}(.*?)\1'
+""" Match strong emphasis with asterisk (`***strong**em*`). """
-# ___strong__em_
STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1'
+""" Match strong emphasis with underscores (`___strong__em_`). """
-# **strong*em***
STRONG_EM3_RE = r'(\*)\1(?!\1)([^*]+?)\1(?!\1)(.+?)\1{3}'
+""" Match strong emphasis with asterisk (`**strong*em***`). """
-# [text](url) or [text](<url>) or [text](url "title")
LINK_RE = NOIMG + r'\['
+""" Match start of in-line link (`[text](url)` or `[text](<url>)` or `[text](url "title")`). """
-# ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
IMAGE_LINK_RE = r'\!\['
+""" Match start of in-line image link (`![alttxt](url)` or `![alttxt](<url>)`). """
-# [Google][3]
REFERENCE_RE = LINK_RE
+""" Match start of reference link (`[Label][3]`). """
-# ![alt text][2]
IMAGE_REFERENCE_RE = IMAGE_LINK_RE
+""" Match start of image reference (`![alt text][2]`). """
-# stand-alone * or _
-NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'
+NOT_STRONG_RE = r'((^|(?<=\s))(\*{1,3}|_{1,3})(?=\s|$))'
+""" Match a stand-alone `*` or `_`. """
-# <http://www.123.com>
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
+""" Match an automatic link (`<http://www.example.com>`). """
AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>'
+""" Match an automatic email link (`<[email protected]>`). """
-# <...>
HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!<!--|-->).)*--)>)'
+""" Match an HTML tag (`<...>`). """
-# "&#38;" (decimal) or "&#x26;" (hex) or "&amp;" (named)
ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)'
+""" Match an HTML entity (`&#38;` (decimal) or `&#x26;` (hex) or `&amp;` (named)). """
-# two spaces at end of line
LINE_BREAK_RE = r' \n'
+""" Match two spaces at end of line. """
-def dequote(string):
+def dequote(string: str) -> str:
"""Remove quotes from around a string."""
if ((string.startswith('"') and string.endswith('"')) or
(string.startswith("'") and string.endswith("'"))):
@@ -181,28 +177,55 @@ def dequote(string):
return string
-class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])):
+class EmStrongItem(NamedTuple):
"""Emphasis/strong pattern item."""
+ pattern: re.Pattern[str]
+ builder: str
+ tags: str
-"""
-The pattern classes
------------------------------------------------------------------------------
-"""
+# The pattern classes
+# -----------------------------------------------------------------------------
class Pattern: # pragma: no cover
- """Base class that inline patterns subclass. """
+ """
+ Base class that inline patterns subclass.
+
+ Inline patterns are handled by means of `Pattern` subclasses, one per regular expression.
+ Each pattern object uses a single regular expression and must support the following methods:
+ [`getCompiledRegExp`][markdown.inlinepatterns.Pattern.getCompiledRegExp] and
+ [`handleMatch`][markdown.inlinepatterns.Pattern.handleMatch].
+
+ All the regular expressions used by `Pattern` subclasses must capture the whole block. For this
+ reason, they all start with `^(.*)` and end with `(.*)!`. When passing a regular expression on
+ class initialization, the `^(.*)` and `(.*)!` are added automatically and the regular expression
+ is pre-compiled.
+
+ It is strongly suggested that the newer style [`markdown.inlinepatterns.InlineProcessor`][] that
+ use a more efficient and flexible search approach be used instead. However, the older style
+ `Pattern` remains for backward compatibility with many existing third-party extensions.
+
+ """
+
+ ANCESTOR_EXCLUDES: Collection[str] = tuple()
+ """
+ A collection of elements which are undesirable ancestors. The processor will be skipped if it
+ would cause the content to be a descendant of one of the listed tag names.
+ """
- ANCESTOR_EXCLUDES = tuple()
+ compiled_re: re.Pattern[str]
+ md: Markdown | None
- def __init__(self, pattern, md=None):
+ def __init__(self, pattern: str, md: Markdown | None = None):
"""
Create an instant of an inline pattern.
- Keyword arguments:
+ Arguments:
+ pattern: A regular expression that matches a pattern.
+ md: An optional pointer to the instance of `markdown.Markdown` and is available as
+ `self.md` on the class instance.
- * pattern: A regular expression that matches a pattern
"""
self.pattern = pattern
@@ -211,27 +234,28 @@ class Pattern: # pragma: no cover
self.md = md
- def getCompiledRegExp(self):
+ def getCompiledRegExp(self) -> re.Pattern:
""" Return a compiled regular expression. """
return self.compiled_re
- def handleMatch(self, m):
+ def handleMatch(self, m: re.Match[str]) -> etree.Element | str:
"""Return a ElementTree element from the given match.
Subclasses should override this method.
- Keyword arguments:
+ Arguments:
+ m: A match object containing a match of the pattern.
- * m: A re match object containing a match of the pattern.
+ Returns: An ElementTree Element object.
"""
pass # pragma: no cover
- def type(self):
+ def type(self) -> str:
""" Return class name, to define pattern type """
return self.__class__.__name__
- def unescape(self, text):
+ def unescape(self, text: str) -> str:
""" Return unescaped text given text with an inline placeholder. """
try:
stash = self.md.treeprocessors['inline'].stashed_nodes
@@ -245,36 +269,38 @@ class Pattern: # pragma: no cover
if isinstance(value, str):
return value
else:
- # An etree Element - return text content only
+ # An `etree` Element - return text content only
return ''.join(value.itertext())
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
class InlineProcessor(Pattern):
"""
- Base class that inline patterns subclass.
+ Base class that inline processors subclass.
This is the newer style inline processor that uses a more
efficient and flexible search approach.
+
"""
- def __init__(self, pattern, md=None):
+ def __init__(self, pattern: str, md: Markdown | None = None):
"""
- Create an instant of an inline pattern.
-
- Keyword arguments:
+ Create an instant of an inline processor.
- * pattern: A regular expression that matches a pattern
+ Arguments:
+ pattern: A regular expression that matches a pattern.
+ md: An optional pointer to the instance of `markdown.Markdown` and is available as
+ `self.md` on the class instance.
"""
self.pattern = pattern
self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
- # Api for Markdown to pass safe_mode into instance
+ # API for Markdown to pass `safe_mode` into instance
self.safe_mode = False
self.md = md
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str | None, int | None, int | None]:
"""Return a ElementTree element from the given match and the
start and end index of the matched text.
@@ -283,37 +309,45 @@ class InlineProcessor(Pattern):
Subclasses should override this method.
- Keyword arguments:
-
- * m: A re match object containing a match of the pattern.
- * data: The buffer current under analysis
+ Arguments:
+ m: A re match object containing a match of the pattern.
+ data: The buffer currently under analysis.
Returns:
-
- * el: The ElementTree element, text or None.
- * start: The start of the region that has been matched or None.
- * end: The end of the region that has been matched or None.
+ el: The ElementTree element, text or None.
+ start: The start of the region that has been matched or None.
+ end: The end of the region that has been matched or None.
"""
pass # pragma: no cover
class SimpleTextPattern(Pattern): # pragma: no cover
- """ Return a simple text of group(2) of a Pattern. """
- def handleMatch(self, m):
+ """ Return a simple text of `group(2)` of a Pattern. """
+ def handleMatch(self, m: re.Match[str]) -> str:
+ """ Return string content of `group(2)` of a matching pattern. """
return m.group(2)
class SimpleTextInlineProcessor(InlineProcessor):
- """ Return a simple text of group(1) of a Pattern. """
- def handleMatch(self, m, data):
+ """ Return a simple text of `group(1)` of a Pattern. """
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
+ """ Return string content of `group(1)` of a matching pattern. """
return m.group(1), m.start(0), m.end(0)
class EscapeInlineProcessor(InlineProcessor):
""" Return an escaped character. """
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str | None, int, int]:
+ """
+ If the character matched by `group(1)` of a pattern is in [`ESCAPED_CHARS`][markdown.Markdown.ESCAPED_CHARS]
+ then return the integer representing the character's Unicode code point (as returned by [`ord`][]) wrapped
+ in [`util.STX`][markdown.util.STX] and [`util.ETX`][markdown.util.ETX].
+
+ If the matched character is not in [`ESCAPED_CHARS`][markdown.Markdown.ESCAPED_CHARS], then return `None`.
+ """
+
char = m.group(1)
if char in self.md.ESCAPED_CHARS:
return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0)
@@ -323,15 +357,28 @@ class EscapeInlineProcessor(InlineProcessor):
class SimpleTagPattern(Pattern): # pragma: no cover
"""
- Return element of type `tag` with a text attribute of group(3)
+ Return element of type `tag` with a text attribute of `group(3)`
of a Pattern.
"""
- def __init__(self, pattern, tag):
+ def __init__(self, pattern: str, tag: str):
+ """
+ Create an instant of an simple tag pattern.
+
+ Arguments:
+ pattern: A regular expression that matches a pattern.
+ tag: Tag of element.
+
+ """
Pattern.__init__(self, pattern)
self.tag = tag
+ """ The tag of the rendered element. """
- def handleMatch(self, m):
+ def handleMatch(self, m: re.Match[str]) -> etree.Element:
+ """
+ Return [`Element`][xml.etree.ElementTree.Element] of type `tag` with the string in `group(3)` of a
+ matching pattern as the Element's text.
+ """
el = etree.Element(self.tag)
el.text = m.group(3)
return el
@@ -339,15 +386,28 @@ class SimpleTagPattern(Pattern): # pragma: no cover
class SimpleTagInlineProcessor(InlineProcessor):
"""
- Return element of type `tag` with a text attribute of group(2)
+ Return element of type `tag` with a text attribute of `group(2)`
of a Pattern.
"""
- def __init__(self, pattern, tag):
+ def __init__(self, pattern: str, tag: str):
+ """
+ Create an instant of an simple tag processor.
+
+ Arguments:
+ pattern: A regular expression that matches a pattern.
+ tag: Tag of element.
+
+ """
InlineProcessor.__init__(self, pattern)
self.tag = tag
+ """ The tag of the rendered element. """
- def handleMatch(self, m, data): # pragma: no cover
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # pragma: no cover
+ """
+ Return [`Element`][xml.etree.ElementTree.Element] of type `tag` with the string in `group(2)` of a
+ matching pattern as the Element's text.
+ """
el = etree.Element(self.tag)
el.text = m.group(2)
return el, m.start(0), m.end(0)
@@ -355,24 +415,35 @@ class SimpleTagInlineProcessor(InlineProcessor):
class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover
""" Return an element of type `tag` with no children. """
- def handleMatch(self, m):
+ def handleMatch(self, m: re.Match[str]) -> etree.Element:
+ """ Return empty [`Element`][xml.etree.ElementTree.Element] of type `tag`. """
return etree.Element(self.tag)
class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
""" Return an element of type `tag` with no children. """
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
+ """ Return empty [`Element`][xml.etree.ElementTree.Element] of type `tag`. """
return etree.Element(self.tag), m.start(0), m.end(0)
class BacktickInlineProcessor(InlineProcessor):
- """ Return a `<code>` element containing the matching text. """
- def __init__(self, pattern):
+ """ Return a `<code>` element containing the escaped matching text. """
+ def __init__(self, pattern: str):
InlineProcessor.__init__(self, pattern)
self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX)
self.tag = 'code'
+ """ The tag of the rendered element. """
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]:
+ """
+ If the match contains `group(3)` of a pattern, then return a `code`
+ [`Element`][xml.etree.ElementTree.Element] which contains HTML escaped text (with
+ [`code_escape`][markdown.util.code_escape]) as an [`AtomicString`][markdown.util.AtomicString].
+
+ If the match does not contain `group(3)` then return the text of `group(1)` backslash escaped.
+
+ """
if m.group(3):
el = etree.Element(self.tag)
el.text = util.AtomicString(util.code_escape(m.group(3).strip()))
@@ -387,7 +458,12 @@ class DoubleTagPattern(SimpleTagPattern): # pragma: no cover
Useful for strong emphasis etc.
"""
- def handleMatch(self, m):
+ def handleMatch(self, m: re.Match[str]) -> etree.Element:
+ """
+ Return [`Element`][xml.etree.ElementTree.Element] in following format:
+ `<tag1><tag2>group(3)</tag2>group(4)</tag2>` where `group(4)` is optional.
+
+ """
tag1, tag2 = self.tag.split(",")
el1 = etree.Element(tag1)
el2 = etree.SubElement(el1, tag2)
@@ -403,7 +479,12 @@ class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
Useful for strong emphasis etc.
"""
- def handleMatch(self, m, data): # pragma: no cover
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # pragma: no cover
+ """
+ Return [`Element`][xml.etree.ElementTree.Element] in following format:
+ `<tag1><tag2>group(2)</tag2>group(3)</tag2>` where `group(3)` is optional.
+
+ """
tag1, tag2 = self.tag.split(",")
el1 = etree.Element(tag1)
el2 = etree.SubElement(el1, tag2)
@@ -415,19 +496,20 @@ class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
class HtmlInlineProcessor(InlineProcessor):
""" Store raw inline html and return a placeholder. """
- def handleMatch(self, m, data):
- rawhtml = self.unescape(m.group(1))
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
+ """ Store the text of `group(1)` of a pattern and return a placeholder string. """
+ rawhtml = self.backslash_unescape(self.unescape(m.group(1)))
place_holder = self.md.htmlStash.store(rawhtml)
return place_holder, m.start(0), m.end(0)
- def unescape(self, text):
+ def unescape(self, text: str) -> str:
""" Return unescaped text given text with an inline placeholder. """
try:
stash = self.md.treeprocessors['inline'].stashed_nodes
except KeyError: # pragma: no cover
return text
- def get_stash(m):
+ def get_stash(m: re.Match[str]) -> str:
id = m.group(1)
value = stash.get(id)
if value is not None:
@@ -438,6 +520,18 @@ class HtmlInlineProcessor(InlineProcessor):
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
+ def backslash_unescape(self, text: str) -> str:
+ """ Return text with backslash escapes undone (backslashes are restored). """
+ try:
+ RE = self.md.treeprocessors['unescape'].RE
+ except KeyError: # pragma: no cover
+ return text
+
+ def _unescape(m: re.Match[str]) -> str:
+ return chr(int(m.group(1)))
+
+ return RE.sub(_unescape, text)
+
class AsteriskProcessor(InlineProcessor):
"""Emphasis processor for handling strong and em matches inside asterisks."""
@@ -449,15 +543,16 @@ class AsteriskProcessor(InlineProcessor):
EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
]
+ """ The various strong and emphasis patterns handled by this processor. """
- def build_single(self, m, tag, idx):
+ def build_single(self, m: re.Match[str], tag: str, idx: int) -> etree.Element:
"""Return single tag."""
el1 = etree.Element(tag)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
return el1
- def build_double(self, m, tags, idx):
+ def build_double(self, m: re.Match[str], tags: str, idx: int) -> etree.Element:
"""Return double tag."""
tag1, tag2 = tags.split(",")
@@ -471,7 +566,7 @@ class AsteriskProcessor(InlineProcessor):
self.parse_sub_patterns(text, el1, el2, idx)
return el1
- def build_double2(self, m, tags, idx):
+ def build_double2(self, m: re.Match[str], tags: str, idx: int) -> etree.Element:
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
tag1, tag2 = tags.split(",")
@@ -484,22 +579,19 @@ class AsteriskProcessor(InlineProcessor):
self.parse_sub_patterns(text, el2, None, idx)
return el1
- def parse_sub_patterns(self, data, parent, last, idx):
+ def parse_sub_patterns(
+ self, data: str, parent: etree.Element, last: etree.Element | None, idx: int
+ ) -> None:
"""
Parses sub patterns.
- `data` (`str`):
- text to evaluate.
+ `data`: text to evaluate.
- `parent` (`etree.Element`):
- Parent to attach text and sub elements to.
+ `parent`: Parent to attach text and sub elements to.
- `last` (`etree.Element`):
- Last appended child to parent. Can also be None if parent has no children.
-
- `idx` (`int`):
- Current pattern index that was used to evaluate the parent.
+ `last`: Last appended child to parent. Can also be None if parent has no children.
+ `idx`: Current pattern index that was used to evaluate the parent.
"""
offset = 0
@@ -548,7 +640,7 @@ class AsteriskProcessor(InlineProcessor):
else:
parent.text = text
- def build_element(self, m, builder, tags, index):
+ def build_element(self, m: re.Match[str], builder: str, tags: str, index: int) -> etree.Element:
"""Element builder."""
if builder == 'double2':
@@ -558,7 +650,7 @@ class AsteriskProcessor(InlineProcessor):
else:
return self.build_single(m, tags, index)
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
"""Parse patterns."""
el = None
@@ -585,6 +677,7 @@ class UnderscoreProcessor(AsteriskProcessor):
EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
]
+ """ The various strong and emphasis patterns handled by this processor. """
class LinkInlineProcessor(InlineProcessor):
@@ -592,7 +685,8 @@ class LinkInlineProcessor(InlineProcessor):
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_TITLE_CLEAN = re.compile(r'\s')
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
+ """ Return an `a` [`Element`][xml.etree.ElementTree.Element] or `(None, None, None)`. """
text, index, handled = self.getText(data, m.end(0))
if not handled:
@@ -612,11 +706,11 @@ class LinkInlineProcessor(InlineProcessor):
return el, m.start(0), index
- def getLink(self, data, index):
+ def getLink(self, data: str, index: int) -> tuple[str, str | None, int, bool]:
"""Parse data between `()` of `[Text]()` allowing recursive `()`. """
href = ''
- title = None
+ title: str | None = None
handled = False
m = self.RE_LINK.match(data, pos=index)
@@ -636,7 +730,7 @@ class LinkInlineProcessor(InlineProcessor):
last_bracket = -1
# Primary (first found) quote tracking.
- quote = None
+ quote: str | None = None
start_quote = -1
exit_quote = -1
ignore_matches = False
@@ -710,7 +804,7 @@ class LinkInlineProcessor(InlineProcessor):
if c != ' ':
last = c
- # We have a scenario: [test](link"notitle)
+ # We have a scenario: `[test](link"notitle)`
# When we enter a string, we stop tracking bracket resolution in the main counter,
# but we do keep a backup counter up until we discover where we might resolve all brackets
# if the title string fails to resolve.
@@ -728,7 +822,7 @@ class LinkInlineProcessor(InlineProcessor):
return href, title, index, handled
- def getText(self, data, index):
+ def getText(self, data: str, index: int) -> tuple[str, int, bool]:
"""Parse the content between `[]` of the start of an image or link
resolving nested square brackets.
@@ -749,9 +843,10 @@ class LinkInlineProcessor(InlineProcessor):
class ImageInlineProcessor(LinkInlineProcessor):
- """ Return a img element from the given match. """
+ """ Return a `img` element from the given match. """
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
+ """ Return an `img` [`Element`][xml.etree.ElementTree.Element] or `(None, None, None)`. """
text, index, handled = self.getText(data, m.end(0))
if not handled:
return None, None, None
@@ -777,7 +872,11 @@ class ReferenceInlineProcessor(LinkInlineProcessor):
RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
+ """
+ Return [`Element`][xml.etree.ElementTree.Element] returned by `makeTag` method or `(None, None, None)`.
+
+ """
text, index, handled = self.getText(data, m.end(0))
if not handled:
return None, None, None
@@ -786,7 +885,7 @@ class ReferenceInlineProcessor(LinkInlineProcessor):
if not handled:
return None, None, None
- # Clean up linebreaks in id
+ # Clean up line breaks in id
id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
if id not in self.md.references: # ignore undefined refs
return None, m.start(0), end
@@ -795,11 +894,11 @@ class ReferenceInlineProcessor(LinkInlineProcessor):
return self.makeTag(href, title, text), m.start(0), end
- def evalId(self, data, index, text):
+ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, bool]:
"""
- Evaluate the id portion of [ref][id].
+ Evaluate the id portion of `[ref][id]`.
- If [ref][] use [ref].
+ If `[ref][]` use `[ref]`.
"""
m = self.RE_LINK.match(data, pos=index)
if not m:
@@ -811,7 +910,8 @@ class ReferenceInlineProcessor(LinkInlineProcessor):
id = text.lower()
return id, end, True
- def makeTag(self, href, title, text):
+ def makeTag(self, href: str, title: str, text: str) -> etree.Element:
+ """ Return an `a` [`Element`][xml.etree.ElementTree.Element]. """
el = etree.Element('a')
el.set('href', href)
@@ -823,16 +923,17 @@ class ReferenceInlineProcessor(LinkInlineProcessor):
class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
- """Short form of reference: [google]. """
- def evalId(self, data, index, text):
- """Evaluate the id from of [ref] """
+ """Short form of reference: `[google]`. """
+ def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]:
+ """Evaluate the id of `[ref]`. """
return text.lower(), index, True
class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
- """ Match to a stored reference and return img element. """
- def makeTag(self, href, title, text):
+ """ Match to a stored reference and return `img` element. """
+ def makeTag(self, href: str, title: str, text: str) -> etree.Element:
+ """ Return an `img` [`Element`][xml.etree.ElementTree.Element]. """
el = etree.Element("img")
el.set("src", href)
if title:
@@ -842,16 +943,17 @@ class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor):
- """ Short form of inage reference: ![ref]. """
- def evalId(self, data, index, text):
- """Evaluate the id from of [ref] """
+ """ Short form of image reference: `![ref]`. """
+ def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]:
+ """Evaluate the id of `[ref]`. """
return text.lower(), index, True
class AutolinkInlineProcessor(InlineProcessor):
- """ Return a link Element given an autolink (`<http://example/com>`). """
- def handleMatch(self, m, data):
+ """ Return a link Element given an auto-link (`<http://example/com>`). """
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
+ """ Return an `a` [`Element`][xml.etree.ElementTree.Element] of `group(1)`. """
el = etree.Element("a")
el.set('href', self.unescape(m.group(1)))
el.text = util.AtomicString(m.group(1))
@@ -860,15 +962,16 @@ class AutolinkInlineProcessor(InlineProcessor):
class AutomailInlineProcessor(InlineProcessor):
"""
- Return a mailto link Element given an automail link (`<[email protected]>`).
+ Return a `mailto` link Element given an auto-mail link (`<[email protected]>`).
"""
- def handleMatch(self, m, data):
+ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
+ """ Return an [`Element`][xml.etree.ElementTree.Element] containing a `mailto` link of `group(1)`. """
el = etree.Element('a')
email = self.unescape(m.group(1))
if email.startswith("mailto:"):
email = email[len("mailto:"):]
- def codepoint2name(code):
+ def codepoint2name(code: int) -> str:
"""Return entity definition by code, or the code if not defined."""
entity = entities.codepoint2name.get(code)
if entity:
diff --git a/libs/markdown/postprocessors.py b/libs/markdown/postprocessors.py
index 498f7e892..7f5ede90c 100644
--- a/libs/markdown/postprocessors.py
+++ b/libs/markdown/postprocessors.py
@@ -1,38 +1,43 @@
-"""
-Python Markdown
+# Python Markdown
-A Python implementation of John Gruber's Markdown.
+# A Python implementation of John Gruber's Markdown.
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see LICENSE.md for details).
+# License: BSD (see LICENSE.md for details).
-POST-PROCESSORS
-=============================================================================
+"""
-Markdown also allows post-processors, which are similar to preprocessors in
-that they need to implement a "run" method. However, they are run after core
-processing.
+Post-processors run on the text of the entire document after is has been serialized into a string.
+Postprocessors should be used to work with the text just before output. Usually, they are used add
+back sections that were extracted in a preprocessor, fix up outgoing encodings, or wrap the whole
+document.
"""
+from __future__ import annotations
+
from collections import OrderedDict
+from typing import TYPE_CHECKING, Any
from . import util
import re
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+
-def build_postprocessors(md, **kwargs):
+def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]:
""" Build the default postprocessors for Markdown. """
postprocessors = util.Registry()
postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
@@ -44,16 +49,16 @@ class Postprocessor(util.Processor):
"""
Postprocessors are run after the ElementTree it converted back into text.
- Each Postprocessor implements a "run" method that takes a pointer to a
+ Each Postprocessor implements a `run` method that takes a pointer to a
text string, modifies it as necessary and returns a text string.
- Postprocessors must extend markdown.Postprocessor.
+ Postprocessors must extend `Postprocessor`.
"""
- def run(self, text):
+ def run(self, text: str) -> str:
"""
- Subclasses of Postprocessor should implement a `run` method, which
+ Subclasses of `Postprocessor` should implement a `run` method, which
takes the html document as a single text string and returns a
(possibly modified) string.
@@ -66,7 +71,7 @@ class RawHtmlPostprocessor(Postprocessor):
BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)')
- def run(self, text):
+ def run(self, text: str) -> str:
""" Iterate over html stash and restore html. """
replacements = OrderedDict()
for i in range(self.md.htmlStash.html_counter):
@@ -76,7 +81,7 @@ class RawHtmlPostprocessor(Postprocessor):
self.md.htmlStash.get_placeholder(i))] = html
replacements[self.md.htmlStash.get_placeholder(i)] = html
- def substitute_match(m):
+ def substitute_match(m: re.Match[str]) -> str:
key = m.group(0)
if key not in replacements:
@@ -99,16 +104,17 @@ class RawHtmlPostprocessor(Postprocessor):
else:
return self.run(processed_text)
- def isblocklevel(self, html):
+ def isblocklevel(self, html: str) -> bool:
+ """ Check is block of HTML is block-level. """
m = self.BLOCK_LEVEL_REGEX.match(html)
if m:
if m.group(1)[0] in ('!', '?', '@', '%'):
- # Comment, php etc...
+ # Comment, PHP etc...
return True
return self.md.is_block_level(m.group(1))
return False
- def stash_to_string(self, text):
+ def stash_to_string(self, text: str) -> str:
""" Convert a stashed object to a string. """
return str(text)
@@ -116,22 +122,22 @@ class RawHtmlPostprocessor(Postprocessor):
class AndSubstitutePostprocessor(Postprocessor):
""" Restore valid entities """
- def run(self, text):
+ def run(self, text: str) -> str:
text = text.replace(util.AMP_SUBSTITUTE, "&")
return text
@util.deprecated(
- "This class will be removed in the future; "
- "use 'treeprocessors.UnescapeTreeprocessor' instead."
+ "This class is deprecated and will be removed in the future; "
+ "use [`UnescapeTreeprocessor`][markdown.treeprocessors.UnescapeTreeprocessor] instead."
)
class UnescapePostprocessor(Postprocessor):
- """ Restore escaped chars """
+ """ Restore escaped chars. """
RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
- def unescape(self, m):
+ def unescape(self, m: re.Match[str]) -> str:
return chr(int(m.group(1)))
- def run(self, text):
+ def run(self, text: str) -> str:
return self.RE.sub(self.unescape, text)
diff --git a/libs/markdown/preprocessors.py b/libs/markdown/preprocessors.py
index e1023c59a..0f63cdd36 100644
--- a/libs/markdown/preprocessors.py
+++ b/libs/markdown/preprocessors.py
@@ -1,37 +1,41 @@
-"""
-Python Markdown
-
-A Python implementation of John Gruber's Markdown.
+# Python Markdown
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# A Python implementation of John Gruber's Markdown.
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-License: BSD (see LICENSE.md for details).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-PRE-PROCESSORS
-=============================================================================
+# License: BSD (see LICENSE.md for details).
-Preprocessors work on source text before we start doing anything too
-complicated.
"""
+Preprocessors work on source text before it is broken down into its individual parts.
+This is an excellent place to clean up bad characters or to extract portions for later
+processing that the parser may otherwise choke on.
+"""
+
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
from . import util
from .htmlparser import HTMLExtractor
import re
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
-def build_preprocessors(md, **kwargs):
- """ Build the default set of preprocessors used by Markdown. """
+
+def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Preprocessor]:
+ """ Build and return the default set of preprocessors used by Markdown. """
preprocessors = util.Registry()
preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)
preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
@@ -42,16 +46,16 @@ class Preprocessor(util.Processor):
"""
Preprocessors are run after the text is broken into lines.
- Each preprocessor implements a "run" method that takes a pointer to a
+ Each preprocessor implements a `run` method that takes a pointer to a
list of lines of the document, modifies it as necessary and returns
either the same pointer or a pointer to a new list.
- Preprocessors must extend markdown.Preprocessor.
+ Preprocessors must extend `Preprocessor`.
"""
- def run(self, lines):
+ def run(self, lines: list[str]) -> list[str]:
"""
- Each subclass of Preprocessor should override the `run` method, which
+ Each subclass of `Preprocessor` should override the `run` method, which
takes the document as a list of strings split by newlines and returns
the (possibly modified) list of lines.
@@ -62,7 +66,7 @@ class Preprocessor(util.Processor):
class NormalizeWhitespace(Preprocessor):
""" Normalize whitespace for consistent parsing. """
- def run(self, lines):
+ def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
source = source.replace(util.STX, "").replace(util.ETX, "")
source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
@@ -72,9 +76,14 @@ class NormalizeWhitespace(Preprocessor):
class HtmlBlockPreprocessor(Preprocessor):
- """Remove html blocks from the text and store them for later retrieval."""
+ """
+ Remove html blocks from the text and store them for later retrieval.
+
+ The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
+ [`Markdown`][markdown.Markdown] instance.
+ """
- def run(self, lines):
+ def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
parser = HTMLExtractor(self.md)
parser.feed(source)
diff --git a/libs/markdown/serializers.py b/libs/markdown/serializers.py
index 59bab18df..573b26483 100644
--- a/libs/markdown/serializers.py
+++ b/libs/markdown/serializers.py
@@ -1,6 +1,4 @@
-# markdown/searializers.py
-#
-# Add x/html serialization to Elementree
+# Add x/html serialization to `Elementree`
# Taken from ElementTree 1.3 preview with slight modifications
#
# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
@@ -36,30 +34,34 @@
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
+"""
+Python-Markdown provides two serializers which render [`ElementTree.Element`][xml.etree.ElementTree.Element]
+objects to a string of HTML. Both functions wrap the same underlying code with only a few minor
+differences as outlined below:
+
+1. Empty (self-closing) tags are rendered as `<tag>` for HTML and as `<tag />` for XHTML.
+2. Boolean attributes are rendered as `attrname` for HTML and as `attrname="attrname"` for XHTML.
+"""
+
+from __future__ import annotations
from xml.etree.ElementTree import ProcessingInstruction
-from xml.etree.ElementTree import Comment, ElementTree, QName
+from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY
import re
+from typing import Callable, Literal, NoReturn
__all__ = ['to_html_string', 'to_xhtml_string']
-HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
- "img", "input", "isindex", "link", "meta", "param")
RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I)
-try:
- HTML_EMPTY = set(HTML_EMPTY)
-except NameError: # pragma: no cover
- pass
-
-def _raise_serialization_error(text): # pragma: no cover
+def _raise_serialization_error(text: str) -> NoReturn: # pragma: no cover
raise TypeError(
"cannot serialize {!r} (type {})".format(text, type(text).__name__)
)
-def _escape_cdata(text):
+def _escape_cdata(text) -> str:
# escape character data
try:
# it's worth avoiding do-nothing calls for strings that are
@@ -77,7 +79,7 @@ def _escape_cdata(text):
_raise_serialization_error(text)
-def _escape_attrib(text):
+def _escape_attrib(text: str) -> str:
# escape attribute value
try:
if "&" in text:
@@ -96,7 +98,7 @@ def _escape_attrib(text):
_raise_serialization_error(text)
-def _escape_attrib_html(text):
+def _escape_attrib_html(text: str) -> str:
# escape attribute value
try:
if "&" in text:
@@ -113,7 +115,7 @@ def _escape_attrib_html(text):
_raise_serialization_error(text)
-def _serialize_html(write, elem, format):
+def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None:
tag = elem.tag
text = elem.text
if tag is Comment:
@@ -128,7 +130,7 @@ def _serialize_html(write, elem, format):
else:
namespace_uri = None
if isinstance(tag, QName):
- # QNAME objects store their data as a string: `{uri}tag`
+ # `QNAME` objects store their data as a string: `{uri}tag`
if tag.text[:1] == "{":
namespace_uri, tag = tag.text[1:].split("}", 1)
else:
@@ -139,10 +141,10 @@ def _serialize_html(write, elem, format):
items = sorted(items) # lexical order
for k, v in items:
if isinstance(k, QName):
- # Assume a text only QName
+ # Assume a text only `QName`
k = k.text
if isinstance(v, QName):
- # Assume a text only QName
+ # Assume a text only `QName`
v = v.text
else:
v = _escape_attrib_html(v)
@@ -170,9 +172,9 @@ def _serialize_html(write, elem, format):
write(_escape_cdata(elem.tail))
-def _write_html(root, format="html"):
+def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str:
assert root is not None
- data = []
+ data: list[str] = []
write = data.append
_serialize_html(write, root, format)
return "".join(data)
@@ -181,9 +183,12 @@ def _write_html(root, format="html"):
# --------------------------------------------------------------------
# public functions
-def to_html_string(element):
+
+def to_html_string(element: Element) -> str:
+ """ Serialize element and its children to a string of HTML5. """
return _write_html(ElementTree(element).getroot(), format="html")
-def to_xhtml_string(element):
+def to_xhtml_string(element: Element) -> str:
+ """ Serialize element and its children to a string of XHTML. """
return _write_html(ElementTree(element).getroot(), format="xhtml")
diff --git a/libs/markdown/test_tools.py b/libs/markdown/test_tools.py
index 2ce0e74f7..895e44ec5 100644
--- a/libs/markdown/test_tools.py
+++ b/libs/markdown/test_tools.py
@@ -1,28 +1,31 @@
-"""
-Python Markdown
+# Python Markdown
-A Python implementation of John Gruber's Markdown.
+# A Python implementation of John Gruber's Markdown.
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-License: BSD (see LICENSE.md for details).
-"""
+# License: BSD (see LICENSE.md for details).
+
+""" A collection of tools for testing the Markdown code base and extensions. """
+
+from __future__ import annotations
import os
import sys
import unittest
import textwrap
+from typing import Any
from . import markdown, Markdown, util
try:
@@ -35,30 +38,31 @@ __all__ = ['TestCase', 'LegacyTestCase', 'Kwargs']
class TestCase(unittest.TestCase):
"""
- A unittest.TestCase subclass with helpers for testing Markdown output.
+ A [`unittest.TestCase`][] subclass with helpers for testing Markdown output.
- Define `default_kwargs` as a dict of keywords to pass to Markdown for each
+ Define `default_kwargs` as a `dict` of keywords to pass to Markdown for each
test. The defaults can be overridden on individual tests.
The `assertMarkdownRenders` method accepts the source text, the expected
output, and any keywords to pass to Markdown. The `default_kwargs` are used
except where overridden by `kwargs`. The output and expected output are passed
- to `TestCase.assertMultiLineEqual`. An AssertionError is raised with a diff
+ to `TestCase.assertMultiLineEqual`. An `AssertionError` is raised with a diff
if the actual output does not equal the expected output.
The `dedent` method is available to dedent triple-quoted strings if
necessary.
- In all other respects, behaves as unittest.TestCase.
+ In all other respects, behaves as `unittest.TestCase`.
"""
- default_kwargs = {}
+ default_kwargs: dict[str, Any] = {}
+ """ Default options to pass to Markdown for each test. """
def assertMarkdownRenders(self, source, expected, expected_attrs=None, **kwargs):
"""
Test that source Markdown text renders to expected output with given keywords.
- `expected_attrs` accepts a dict. Each key should be the name of an attribute
+ `expected_attrs` accepts a `dict`. Each key should be the name of an attribute
on the `Markdown` instance and the value should be the expected value after
the source text is parsed by Markdown. After the expected output is tested,
the expected value for each attribute is compared against the actual
@@ -80,7 +84,7 @@ class TestCase(unittest.TestCase):
"""
# TODO: If/when actual output ends with a newline, then use:
- # return textwrap.dedent(text.strip('/n'))
+ # return textwrap.dedent(text.strip('/n'))
return textwrap.dedent(text).strip()
@@ -93,10 +97,12 @@ class recursionlimit:
Example usage:
- with recursionlimit(20):
- # test code here
+ ``` python
+ with recursionlimit(20):
+ # test code here
+ ```
- See https://stackoverflow.com/a/50120316/866026
+ See <https://stackoverflow.com/a/50120316/866026>.
"""
def __init__(self, limit):
@@ -116,12 +122,12 @@ class recursionlimit:
class Kwargs(dict):
- """ A dict like class for holding keyword arguments. """
+ """ A `dict` like class for holding keyword arguments. """
pass
def _normalize_whitespace(text):
- """ Normalize whitespace for a string of html using tidylib. """
+ """ Normalize whitespace for a string of HTML using `tidylib`. """
output, errors = tidylib.tidy_fragment(text, options={
'drop_empty_paras': 0,
'fix_backslash': 0,
@@ -189,22 +195,20 @@ class LegacyTestMeta(type):
class LegacyTestCase(unittest.TestCase, metaclass=LegacyTestMeta):
"""
- A `unittest.TestCase` subclass for running Markdown's legacy file-based tests.
+ A [`unittest.TestCase`][] subclass for running Markdown's legacy file-based tests.
A subclass should define various properties which point to a directory of
text-based test files and define various behaviors/defaults for those tests.
The following properties are supported:
- location: A path to the directory of test files. An absolute path is preferred.
- exclude: A list of tests to exclude. Each test name should comprise the filename
- without an extension.
- normalize: A boolean value indicating if the HTML should be normalized.
- Default: `False`.
- input_ext: A string containing the file extension of input files. Default: `.txt`.
- ouput_ext: A string containing the file extension of expected output files.
- Default: `html`.
- default_kwargs: A `Kwargs` instance which stores the default set of keyword
- arguments for all test files in the directory.
+ Attributes:
+ location (str): A path to the directory of test files. An absolute path is preferred.
+ exclude (list[str]): A list of tests to exclude. Each test name should comprise the filename
+ without an extension.
+ normalize (bool): A boolean value indicating if the HTML should be normalized. Default: `False`.
+ input_ext (str): A string containing the file extension of input files. Default: `.txt`.
+ output_ext (str): A string containing the file extension of expected output files. Default: `html`.
+ default_kwargs (Kwargs[str, Any]): The default set of keyword arguments for all test files in the directory.
In addition, properties can be defined for each individual set of test files within
the directory. The property should be given the name of the file without the file
@@ -214,7 +218,7 @@ class LegacyTestCase(unittest.TestCase, metaclass=LegacyTestMeta):
test file. The keyword arguments will "update" the `default_kwargs`.
When the class instance is created, it will walk the given directory and create
- a separate unitttest for each set of test files using the naming scheme:
- `test_filename`. One unittest will be run for each set of input and output files.
+ a separate `Unitttest` for each set of test files using the naming scheme:
+ `test_filename`. One `Unittest` will be run for each set of input and output files.
"""
pass
diff --git a/libs/markdown/treeprocessors.py b/libs/markdown/treeprocessors.py
index e9f48ca11..83630999e 100644
--- a/libs/markdown/treeprocessors.py
+++ b/libs/markdown/treeprocessors.py
@@ -1,32 +1,43 @@
-"""
-Python Markdown
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
-A Python implementation of John Gruber's Markdown.
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# License: BSD (see LICENSE.md for details).
-License: BSD (see LICENSE.md for details).
"""
+Tree processors manipulate the tree created by block processors. They can even create an entirely
+new `ElementTree` object. This is an excellent place for creating summaries, adding collected
+references, or last minute adjustments.
+
+"""
+
+from __future__ import annotations
import re
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Any
from . import util
from . import inlinepatterns
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
-def build_treeprocessors(md, **kwargs):
- """ Build the default treeprocessors for Markdown. """
+
+def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
+ """ Build the default `treeprocessors` for Markdown. """
treeprocessors = util.Registry()
treeprocessors.register(InlineProcessor(md), 'inline', 20)
treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
@@ -34,8 +45,8 @@ def build_treeprocessors(md, **kwargs):
return treeprocessors
-def isString(s):
- """ Check if it's string """
+def isString(s: object) -> bool:
+ """ Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """
if not isinstance(s, util.AtomicString):
return isinstance(s, str)
return False
@@ -43,31 +54,30 @@ def isString(s):
class Treeprocessor(util.Processor):
"""
- Treeprocessors are run on the ElementTree object before serialization.
+ `Treeprocessor`s are run on the `ElementTree` object before serialization.
- Each Treeprocessor implements a "run" method that takes a pointer to an
- ElementTree, modifies it as necessary and returns an ElementTree
- object.
+ Each `Treeprocessor` implements a `run` method that takes a pointer to an
+ `Element` and modifies it as necessary.
- Treeprocessors must extend markdown.Treeprocessor.
+ `Treeprocessors` must extend `markdown.Treeprocessor`.
"""
- def run(self, root):
+ def run(self, root: etree.Element) -> etree.Element | None:
"""
- Subclasses of Treeprocessor should implement a `run` method, which
- takes a root ElementTree. This method can return another ElementTree
- object, and the existing root ElementTree will be replaced, or it can
- modify the current tree and return None.
+ Subclasses of `Treeprocessor` should implement a `run` method, which
+ takes a root `Element`. This method can return another `Element`
+ object, and the existing root `Element` will be replaced, or it can
+ modify the current tree and return `None`.
"""
pass # pragma: no cover
class InlineProcessor(Treeprocessor):
"""
- A Treeprocessor that traverses a tree, applying inline patterns.
+ A `Treeprocessor` that traverses a tree, applying inline patterns.
"""
- def __init__(self, md):
+ def __init__(self, md: Markdown):
self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
self.__placeholder_suffix = util.ETX
self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
@@ -75,24 +85,24 @@ class InlineProcessor(Treeprocessor):
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
self.md = md
self.inlinePatterns = md.inlinePatterns
- self.ancestors = []
+ self.ancestors: list[str] = []
- def __makePlaceholder(self, type):
+ def __makePlaceholder(self, type: str) -> tuple[str, str]:
""" Generate a placeholder """
id = "%04d" % len(self.stashed_nodes)
hash = util.INLINE_PLACEHOLDER % id
return hash, id
- def __findPlaceholder(self, data, index):
+ def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]:
"""
- Extract id from data string, start from index
-
- Keyword arguments:
+ Extract id from data string, start from index.
- * data: string
- * index: index, from which we start search
+ Arguments:
+ data: String.
+ index: Index, from which we start search.
- Returns: placeholder id and string index, after the found placeholder.
+ Returns:
+ Placeholder id and string index, after the found placeholder.
"""
m = self.__placeholder_re.search(data, index)
@@ -101,23 +111,22 @@ class InlineProcessor(Treeprocessor):
else:
return None, index + 1
- def __stashNode(self, node, type):
- """ Add node to stash """
+ def __stashNode(self, node: etree.Element | str, type: str) -> str:
+ """ Add node to stash. """
placeholder, id = self.__makePlaceholder(type)
self.stashed_nodes[id] = node
return placeholder
- def __handleInline(self, data, patternIndex=0):
+ def __handleInline(self, data: str, patternIndex: int = 0) -> str:
"""
- Process string with inline patterns and replace it
- with placeholders
-
- Keyword arguments:
+ Process string with inline patterns and replace it with placeholders.
- * data: A line of Markdown text
- * patternIndex: The index of the inlinePattern to start with
+ Arguments:
+ data: A line of Markdown text.
+ patternIndex: The index of the `inlinePattern` to start with.
- Returns: String with placeholders.
+ Returns:
+ String with placeholders.
"""
if not isinstance(data, util.AtomicString):
@@ -131,18 +140,15 @@ class InlineProcessor(Treeprocessor):
patternIndex += 1
return data
- def __processElementText(self, node, subnode, isText=True):
+ def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None:
"""
- Process placeholders in Element.text or Element.tail
- of Elements popped from self.stashed_nodes.
-
- Keywords arguments:
-
- * node: parent node
- * subnode: processing node
- * isText: bool variable, True - it's text, False - it's tail
+ Process placeholders in `Element.text` or `Element.tail`
+ of Elements popped from `self.stashed_nodes`.
- Returns: None
+ Arguments:
+ node: Parent node.
+ subnode: Processing node.
+ isText: Boolean variable, True - it's text, False - it's a tail.
"""
if isText:
@@ -163,19 +169,25 @@ class InlineProcessor(Treeprocessor):
for newChild in childResult:
node.insert(pos, newChild[0])
- def __processPlaceholders(self, data, parent, isText=True):
+ def __processPlaceholders(
+ self,
+ data: str | None,
+ parent: etree.Element,
+ isText: bool = True
+ ) -> list[tuple[etree.Element, list[str]]]:
"""
- Process string with placeholders and generate ElementTree tree.
-
- Keyword arguments:
+ Process string with placeholders and generate `ElementTree` tree.
- * data: string with placeholders instead of ElementTree elements.
- * parent: Element, which contains processing inline data
+ Arguments:
+ data: String with placeholders instead of `ElementTree` elements.
+ parent: Element, which contains processing inline data.
+ isText: Boolean variable, True - it's text, False - it's a tail.
- Returns: list with ElementTree elements with applied inline patterns.
+ Returns:
+ List with `ElementTree` elements with applied inline patterns.
"""
- def linkText(text):
+ def linkText(text: str | None) -> None:
if text:
if result:
if result[-1][0].tail:
@@ -206,7 +218,7 @@ class InlineProcessor(Treeprocessor):
text = data[strartIndex:index]
linkText(text)
- if not isString(node): # it's Element
+ if not isinstance(node, str): # it's Element
for child in [node] + list(node):
if child.tail:
if child.tail.strip():
@@ -231,26 +243,32 @@ class InlineProcessor(Treeprocessor):
else:
text = data[strartIndex:]
if isinstance(data, util.AtomicString):
- # We don't want to loose the AtomicString
+ # We don't want to loose the `AtomicString`
text = util.AtomicString(text)
linkText(text)
data = ""
return result
- def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
+ def __applyPattern(
+ self,
+ pattern: inlinepatterns.Pattern,
+ data: str,
+ patternIndex: int,
+ startIndex: int = 0
+ ) -> tuple[str, bool, int]:
"""
Check if the line fits the pattern, create the necessary
- elements, add it to stashed_nodes.
-
- Keyword arguments:
+ elements, add it to `stashed_nodes`.
- * data: the text to be processed
- * pattern: the pattern to be checked
- * patternIndex: index of current pattern
- * startIndex: string index, from which we start searching
+ Arguments:
+ data: The text to be processed.
+ pattern: The pattern to be checked.
+ patternIndex: Index of current pattern.
+ startIndex: String index, from which we start searching.
- Returns: String with placeholders instead of ElementTree elements.
+ Returns:
+ String with placeholders instead of `ElementTree` elements.
"""
new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
@@ -261,7 +279,7 @@ class InlineProcessor(Treeprocessor):
if new_style:
match = None
- # Since handleMatch may reject our first match,
+ # Since `handleMatch` may reject our first match,
# we iterate over the buffer looking for matches
# until we can't find any more.
for match in pattern.getCompiledRegExp().finditer(data, startIndex):
@@ -286,7 +304,7 @@ class InlineProcessor(Treeprocessor):
if node is None:
return data, True, end
- if not isString(node):
+ if not isinstance(node, str):
if not isinstance(node.text, util.AtomicString):
# We need to process current node too
for child in [node] + list(node):
@@ -312,7 +330,7 @@ class InlineProcessor(Treeprocessor):
match.group(1),
placeholder, match.groups()[-1]), True, 0
- def __build_ancestors(self, parent, parents):
+ def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None:
"""Build the ancestor list."""
ancestors = []
while parent is not None:
@@ -322,25 +340,25 @@ class InlineProcessor(Treeprocessor):
ancestors.reverse()
parents.extend(ancestors)
- def run(self, tree, ancestors=None):
+ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element:
"""Apply inline patterns to a parsed Markdown tree.
- Iterate over ElementTree, find elements with inline tag, apply inline
- patterns and append newly created Elements to tree. If you don't
- want to process your data with inline patterns, instead of normal
- string, use subclass AtomicString:
+ Iterate over `Element`, find elements with inline tag, apply inline
+ patterns and append newly created Elements to tree. To avoid further
+ processing of string with inline patterns, instead of normal string,
+ use subclass [`AtomicString`][markdown.util.AtomicString]:
- node.text = markdown.AtomicString("This will not be processed.")
+ node.text = markdown.util.AtomicString("This will not be processed.")
Arguments:
+ tree: `Element` object, representing Markdown tree.
+ ancestors: List of parent tag names that precede the tree node (if needed).
- * tree: ElementTree object, representing Markdown tree.
- * ancestors: List of parent tag names that precede the tree node (if needed).
-
- Returns: ElementTree object with applied inline patterns.
+ Returns:
+ An element tree object with applied inline patterns.
"""
- self.stashed_nodes = {}
+ self.stashed_nodes: dict[str, etree.Element | str] = {}
# Ensure a valid parent list, but copy passed in lists
# to ensure we don't have the user accidentally change it on us.
@@ -395,10 +413,10 @@ class InlineProcessor(Treeprocessor):
class PrettifyTreeprocessor(Treeprocessor):
- """ Add linebreaks to the html document. """
+ """ Add line breaks to the html document. """
- def _prettifyETree(self, elem):
- """ Recursively add linebreaks to ElementTree children. """
+ def _prettifyETree(self, elem: etree.Element) -> None:
+ """ Recursively add line breaks to `ElementTree` children. """
i = "\n"
if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
@@ -411,12 +429,12 @@ class PrettifyTreeprocessor(Treeprocessor):
if not elem.tail or not elem.tail.strip():
elem.tail = i
- def run(self, root):
- """ Add linebreaks to ElementTree root object. """
+ def run(self, root: etree.Element) -> None:
+ """ Add line breaks to `Element` object and its children. """
self._prettifyETree(root)
- # Do <br />'s separately as they are often in the middle of
- # inline content and missed by _prettifyETree.
+ # Do `<br />`'s separately as they are often in the middle of
+ # inline content and missed by `_prettifyETree`.
brs = root.iter('br')
for br in brs:
if not br.tail or not br.tail.strip():
@@ -438,13 +456,13 @@ class UnescapeTreeprocessor(Treeprocessor):
RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
- def _unescape(self, m):
+ def _unescape(self, m: re.Match[str]) -> str:
return chr(int(m.group(1)))
- def unescape(self, text):
+ def unescape(self, text: str) -> str:
return self.RE.sub(self._unescape, text)
- def run(self, root):
+ def run(self, root: etree.Element) -> None:
""" Loop over all elements and unescape all text. """
for elem in root.iter():
# Unescape text content
diff --git a/libs/markdown/util.py b/libs/markdown/util.py
index e6b08e5ef..b4642023e 100644
--- a/libs/markdown/util.py
+++ b/libs/markdown/util.py
@@ -1,30 +1,41 @@
-"""
-Python Markdown
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
-A Python implementation of John Gruber's Markdown.
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
-Documentation: https://python-markdown.github.io/
-GitHub: https://github.com/Python-Markdown/markdown/
-PyPI: https://pypi.org/project/Markdown/
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
-Started by Manfred Stienstra (http://www.dwerg.net/).
-Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
-Currently maintained by Waylan Limberg (https://github.com/waylan),
-Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
-Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
+# License: BSD (see LICENSE.md for details).
-License: BSD (see LICENSE.md for details).
"""
+This module contains various contacts, classes and functions which get referenced and used
+throughout the code base.
+"""
+
+from __future__ import annotations
import re
import sys
import warnings
-from collections import namedtuple
from functools import wraps, lru_cache
from itertools import count
+from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
+ import xml.etree.ElementTree as etree
+
+_T = TypeVar('_T')
"""
@@ -33,7 +44,7 @@ Constants you might want to modify
"""
-BLOCK_LEVEL_ELEMENTS = [
+BLOCK_LEVEL_ELEMENTS: list[str] = [
# Elements which are invalid to wrap in a `<p>` tag.
# See https://w3c.github.io/html/grouping-content.html#the-p-element
'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
@@ -41,27 +52,39 @@ BLOCK_LEVEL_ELEMENTS = [
'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
'p', 'pre', 'section', 'table', 'ul',
# Other elements which Markdown should not be mucking up the contents of.
- 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend',
+ 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend',
'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
- 'style', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
+ 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
]
+"""
+List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
+attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the
+attribute on the class. This remains for compatibility with older extensions.
+"""
# Placeholders
-STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
-ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
+STX = '\u0002'
+""" "Start of Text" marker for placeholder templates. """
+ETX = '\u0003'
+""" "End of Text" marker for placeholder templates. """
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
+""" Prefix for inline placeholder template. """
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
+""" Placeholder template for stashed inline text. """
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
+""" Regular Expression which matches inline placeholders. """
AMP_SUBSTITUTE = STX+"amp"+ETX
+""" Placeholder template for HTML entities. """
HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
+""" Placeholder template for raw HTML. """
HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
+""" Regular expression which matches HTML placeholders. """
TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
+""" Placeholder template for tags. """
-"""
-Constants you probably do not need to change
------------------------------------------------------------------------------
-"""
+# Constants you probably do not need to change
+# -----------------------------------------------------------------------------
RTL_BIDI_RANGES = (
('\u0590', '\u07FF'),
@@ -72,30 +95,32 @@ RTL_BIDI_RANGES = (
)
-"""
-AUXILIARY GLOBAL FUNCTIONS
-=============================================================================
-"""
+# AUXILIARY GLOBAL FUNCTIONS
+# =============================================================================
@lru_cache(maxsize=None)
def get_installed_extensions():
+ """ Return all entry_points in the `markdown.extensions` group. """
if sys.version_info >= (3, 10):
from importlib import metadata
- else: # <PY310 use backport
+ else: # `<PY310` use backport
import importlib_metadata as metadata
# Only load extension entry_points once.
return metadata.entry_points(group='markdown.extensions')
-def deprecated(message, stacklevel=2):
+def deprecated(message: str, stacklevel: int = 2):
"""
- Raise a DeprecationWarning when wrapped function/method is called.
+ Raise a [`DeprecationWarning`][] when wrapped function/method is called.
Usage:
- @deprecated("This method will be removed in version X; use Y instead.")
- def some_method()"
- pass
+
+ ```python
+ @deprecated("This method will be removed in version X; use Y instead.")
+ def some_method():
+ pass
+ ```
"""
def wrapper(func):
@wraps(func)
@@ -110,11 +135,11 @@ def deprecated(message, stacklevel=2):
return wrapper
-def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
- """Parses a string representing bool value. If parsing was successful,
- returns True or False. If preserve_none=True, returns True, False,
- or None. If parsing was not successful, raises ValueError, or, if
- fail_on_errors=False, returns None."""
+def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:
+ """Parses a string representing a boolean value. If parsing was successful,
+ returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,
+ or `None`. If parsing was not successful, raises `ValueError`, or, if
+ `fail_on_errors=False`, returns `None`."""
if not isinstance(value, str):
if preserve_none and value is None:
return value
@@ -129,8 +154,8 @@ def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
raise ValueError('Cannot parse bool value: %r' % value)
-def code_escape(text):
- """Escape code."""
+def code_escape(text: str) -> str:
+ """HTML escape a string of code."""
if "&" in text:
text = text.replace("&", "&amp;")
if "<" in text:
@@ -140,7 +165,7 @@ def code_escape(text):
return text
-def _get_stack_depth(size=2):
+def _get_stack_depth(size: int = 2) -> int:
"""Get current stack depth, performantly.
"""
frame = sys._getframe(size)
@@ -151,15 +176,13 @@ def _get_stack_depth(size=2):
return size
-def nearing_recursion_limit():
+def nearing_recursion_limit() -> bool:
"""Return true if current stack depth is within 100 of maximum limit."""
return sys.getrecursionlimit() - _get_stack_depth() < 100
-"""
-MISC AUXILIARY CLASSES
-=============================================================================
-"""
+# MISC AUXILIARY CLASSES
+# =============================================================================
class AtomicString(str):
@@ -168,10 +191,27 @@ class AtomicString(str):
class Processor:
- def __init__(self, md=None):
+ """ The base class for all processors.
+
+ Attributes:
+ Processor.md: The `Markdown` instance passed in an initialization.
+
+ Arguments:
+ md: The `Markdown` instance this processor is a part of.
+
+ """
+ def __init__(self, md: Markdown | None = None):
self.md = md
+if TYPE_CHECKING: # pragma: no cover
+ class TagData(TypedDict):
+ tag: str
+ attrs: dict[str, str]
+ left_index: int
+ right_index: int
+
+
class HtmlStash:
"""
This class is used for stashing HTML objects that we extract
@@ -179,23 +219,23 @@ class HtmlStash:
"""
def __init__(self):
- """ Create a HtmlStash. """
+ """ Create an `HtmlStash`. """
self.html_counter = 0 # for counting inline html segments
- self.rawHtmlBlocks = []
+ self.rawHtmlBlocks: list[str | etree.Element] = []
self.tag_counter = 0
- self.tag_data = [] # list of dictionaries in the order tags appear
+ self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear
- def store(self, html):
+ def store(self, html: str | etree.Element) -> str:
"""
Saves an HTML segment for later reinsertion. Returns a
placeholder string that needs to be inserted into the
document.
Keyword arguments:
+ html: An html segment.
- * html: an html segment
-
- Returns : a placeholder string
+ Returns:
+ A placeholder string.
"""
self.rawHtmlBlocks.append(html)
@@ -203,30 +243,33 @@ class HtmlStash:
self.html_counter += 1
return placeholder
- def reset(self):
+ def reset(self) -> None:
+ """ Clear the stash. """
self.html_counter = 0
self.rawHtmlBlocks = []
- def get_placeholder(self, key):
+ def get_placeholder(self, key: int) -> str:
return HTML_PLACEHOLDER % key
- def store_tag(self, tag, attrs, left_index, right_index):
+ def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str:
"""Store tag data and return a placeholder."""
self.tag_data.append({'tag': tag, 'attrs': attrs,
'left_index': left_index,
'right_index': right_index})
placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
- self.tag_counter += 1 # equal to the tag's index in self.tag_data
+ self.tag_counter += 1 # equal to the tag's index in `self.tag_data`
return placeholder
# Used internally by `Registry` for each item in its sorted list.
# Provides an easier to read API when editing the code later.
# For example, `item.name` is more clear than `item[0]`.
-_PriorityItem = namedtuple('PriorityItem', ['name', 'priority'])
+class _PriorityItem(NamedTuple):
+ name: str
+ priority: float
-class Registry:
+class Registry(Generic[_T]):
"""
A priority sorted registry.
@@ -267,25 +310,33 @@ class Registry:
"""
def __init__(self):
- self._data = {}
- self._priority = []
+ self._data: dict[str, _T] = {}
+ self._priority: list[_PriorityItem] = []
self._is_sorted = False
- def __contains__(self, item):
+ def __contains__(self, item: str | _T) -> bool:
if isinstance(item, str):
# Check if an item exists by this name.
return item in self._data.keys()
# Check if this instance exists.
return item in self._data.values()
- def __iter__(self):
+ def __iter__(self) -> Iterator[_T]:
self._sort()
return iter([self._data[k] for k, p in self._priority])
- def __getitem__(self, key):
+ @overload
+ def __getitem__(self, key: str | int) -> _T: # pragma: no cover
+ ...
+
+ @overload
+ def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover
+ ...
+
+ def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]:
self._sort()
if isinstance(key, slice):
- data = Registry()
+ data: Registry[_T] = Registry()
for k, p in self._priority[key]:
data.register(self._data[k], k, p)
return data
@@ -293,13 +344,13 @@ class Registry:
return self._data[self._priority[key].name]
return self._data[key]
- def __len__(self):
+ def __len__(self) -> int:
return len(self._priority)
def __repr__(self):
return '<{}({})>'.format(self.__class__.__name__, list(self))
- def get_index_for_name(self, name):
+ def get_index_for_name(self, name: str) -> int:
"""
Return the index of the given name.
"""
@@ -310,15 +361,14 @@ class Registry:
)
raise ValueError('No item named "{}" exists.'.format(name))
- def register(self, item, name, priority):
+ def register(self, item: _T, name: str, priority: float) -> None:
"""
Add an item to the registry with the given name and priority.
- Parameters:
-
- * `item`: The item being registered.
- * `name`: A string used to reference the item.
- * `priority`: An integer or float used to sort against all items.
+ Arguments:
+ item: The item being registered.
+ name: A string used to reference the item.
+ priority: An integer or float used to sort against all items.
If an item is registered with a "name" which already exists, the
existing item is replaced with the new item. Treat carefully as the
@@ -333,11 +383,11 @@ class Registry:
self._data[name] = item
self._priority.append(_PriorityItem(name, priority))
- def deregister(self, name, strict=True):
+ def deregister(self, name: str, strict: bool = True) -> None:
"""
Remove an item from the registry.
- Set `strict=False` to fail silently.
+ Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`.
"""
try:
index = self.get_index_for_name(name)
@@ -347,7 +397,7 @@ class Registry:
if strict:
raise
- def _sort(self):
+ def _sort(self) -> None:
"""
Sort the registry by priority from highest to lowest.