diff options
author | Michiel van Baak Jansen <[email protected]> | 2021-04-13 06:02:29 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2021-04-13 00:02:29 -0400 |
commit | 4a0932b5d3052867f7f92984300d2ab4ec54fb0d (patch) | |
tree | 030c4b361e4df81f28ecd04301cc0e69c5fbbba0 /libs/commonmark | |
parent | 8e91beed83e6b5a4bec680d15b226a77ff3e224e (diff) | |
download | bazarr-4a0932b5d3052867f7f92984300d2ab4ec54fb0d.tar.gz bazarr-4a0932b5d3052867f7f92984300d2ab4ec54fb0d.zip |
Update ffsubsync and srt module
* Update ffsubsync to 0.4.11
* Update srt to 3.4.1
Diffstat (limited to 'libs/commonmark')
-rw-r--r-- | libs/commonmark/__init__.py | 8 | ||||
-rw-r--r-- | libs/commonmark/blocks.py | 908 | ||||
-rw-r--r-- | libs/commonmark/cmark.py | 53 | ||||
-rw-r--r-- | libs/commonmark/common.py | 113 | ||||
-rw-r--r-- | libs/commonmark/dump.py | 108 | ||||
-rw-r--r-- | libs/commonmark/entitytrans.py | 2353 | ||||
-rw-r--r-- | libs/commonmark/inlines.py | 882 | ||||
-rw-r--r-- | libs/commonmark/main.py | 41 | ||||
-rw-r--r-- | libs/commonmark/node.py | 179 | ||||
-rw-r--r-- | libs/commonmark/normalize_reference.py | 165 | ||||
-rw-r--r-- | libs/commonmark/render/__init__.py | 0 | ||||
-rw-r--r-- | libs/commonmark/render/html.py | 228 | ||||
-rw-r--r-- | libs/commonmark/render/renderer.py | 43 | ||||
-rw-r--r-- | libs/commonmark/render/rst.py | 159 | ||||
-rw-r--r-- | libs/commonmark/tests/__init__.py | 0 | ||||
-rw-r--r-- | libs/commonmark/tests/rst_tests.py | 172 | ||||
-rw-r--r-- | libs/commonmark/tests/run_spec_tests.py | 242 | ||||
-rw-r--r-- | libs/commonmark/tests/unit_tests.py | 157 |
18 files changed, 5811 insertions, 0 deletions
diff --git a/libs/commonmark/__init__.py b/libs/commonmark/__init__.py new file mode 100644 index 000000000..1c2193c5d --- /dev/null +++ b/libs/commonmark/__init__.py @@ -0,0 +1,8 @@ +# flake8: noqa +from __future__ import unicode_literals, absolute_import + +from commonmark.main import commonmark +from commonmark.dump import dumpAST, dumpJSON +from commonmark.blocks import Parser +from commonmark.render.html import HtmlRenderer +from commonmark.render.rst import ReStructuredTextRenderer diff --git a/libs/commonmark/blocks.py b/libs/commonmark/blocks.py new file mode 100644 index 000000000..aeec21da1 --- /dev/null +++ b/libs/commonmark/blocks.py @@ -0,0 +1,908 @@ +from __future__ import absolute_import, unicode_literals + +import re +from commonmark import common +from commonmark.common import unescape_string +from commonmark.inlines import InlineParser +from commonmark.node import Node + + +CODE_INDENT = 4 +reHtmlBlockOpen = [ + re.compile(r'.'), # dummy for 0 + re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE), + re.compile(r'^<!--'), + re.compile(r'^<[?]'), + re.compile(r'^<![A-Z]'), + re.compile(r'^<!\[CDATA\['), + re.compile( + r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|' + r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|' + r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|' + r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|' + r'nav|noframes|ol|optgroup|option|p|param|section|source|title|' + r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)' + r'(?:\s|[/]?[>]|$)', + re.IGNORECASE), + re.compile( + '^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$', + re.IGNORECASE), +] +reHtmlBlockClose = [ + re.compile(r'.'), # dummy for 0 + re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE), + re.compile(r'-->'), + re.compile(r'\?>'), + re.compile(r'>'), + re.compile(r'\]\]>'), +] +reThematicBreak = re.compile( + r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$') +reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]') +reNonSpace = re.compile(r'[^ \t\f\v\r\n]') +reBulletListMarker = re.compile(r'^[*+-]') +reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])') +reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)') +reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}') +reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)') +reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$') +reLineEnding = re.compile(r'\r\n|\n|\r') + + +def is_blank(s): + """Returns True if string contains only space characters.""" + return re.search(reNonSpace, s) is None + + +def is_space_or_tab(s): + return s in (' ', '\t') + + +def peek(ln, pos): + if pos < len(ln): + return ln[pos] + else: + return None + + +def ends_with_blank_line(block): + """ Returns true if block ends with a blank line, + descending if needed into lists and sublists.""" + while block: + if block.last_line_blank: + return True + if not block.last_line_checked and \ + block.t in ('list', 'item'): + block.last_line_checked = True + block = block.last_child + else: + block.last_line_checked = True + break + + return False + + +def parse_list_marker(parser, container): + """ Parse a list marker and return data on the marker (type, + start, delimiter, bullet character, padding) or None.""" + rest = parser.current_line[parser.next_nonspace:] + data = { + 'type': None, + 'tight': True, # lists are tight by default + 'bullet_char': None, + 'start': None, + 'delimiter': None, + 'padding': None, + 'marker_offset': parser.indent, + } + if parser.indent >= 4: + return None + m = re.search(reBulletListMarker, rest) + m2 = re.search(reOrderedListMarker, rest) + if m: + data['type'] = 'bullet' + data['bullet_char'] = m.group()[0] + elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'): + m = m2 + data['type'] = 'ordered' + data['start'] = int(m.group(1)) + data['delimiter'] = m.group(2) + else: + return None + + # make sure we have spaces after + nextc = peek(parser.current_line, parser.next_nonspace + len(m.group())) + if not (nextc is None or nextc == '\t' or nextc == ' '): + return None + + # if it interrupts paragraph, make sure first line isn't blank + if container.t == 'paragraph' and \ + not re.search( + reNonSpace, + parser.current_line[parser.next_nonspace + len(m.group()):]): + return None + + # we've got a match! advance offset and calculate padding + parser.advance_next_nonspace() # to start of marker + parser.advance_offset(len(m.group()), True) # to end of marker + spaces_start_col = parser.column + spaces_start_offset = parser.offset + while True: + parser.advance_offset(1, True) + nextc = peek(parser.current_line, parser.offset) + if parser.column - spaces_start_col < 5 and \ + is_space_or_tab(nextc): + pass + else: + break + blank_item = peek(parser.current_line, parser.offset) is None + spaces_after_marker = parser.column - spaces_start_col + if spaces_after_marker >= 5 or \ + spaces_after_marker < 1 or \ + blank_item: + data['padding'] = len(m.group()) + 1 + parser.column = spaces_start_col + parser.offset = spaces_start_offset + if is_space_or_tab(peek(parser.current_line, parser.offset)): + parser.advance_offset(1, True) + else: + data['padding'] = len(m.group()) + spaces_after_marker + + return data + + +def lists_match(list_data, item_data): + """ + Returns True if the two list items are of the same type, + with the same delimiter and bullet character. This is used + in agglomerating list items into lists. + """ + return list_data.get('type') == item_data.get('type') and \ + list_data.get('delimiter') == item_data.get('delimiter') and \ + list_data.get('bullet_char') == item_data.get('bullet_char') + + +class Block(object): + accepts_lines = None + + @staticmethod + def continue_(parser=None, container=None): + return + + @staticmethod + def finalize(parser=None, block=None): + return + + @staticmethod + def can_contain(t): + return + + +class Document(Block): + accepts_lines = False + + @staticmethod + def continue_(parser=None, container=None): + return 0 + + @staticmethod + def finalize(parser=None, block=None): + return + + @staticmethod + def can_contain(t): + return t != 'item' + + +class List(Block): + accepts_lines = False + + @staticmethod + def continue_(parser=None, container=None): + return 0 + + @staticmethod + def finalize(parser=None, block=None): + item = block.first_child + while item: + # check for non-final list item ending with blank line: + if ends_with_blank_line(item) and item.nxt: + block.list_data['tight'] = False + break + # recurse into children of list item, to see if there are + # spaces between any of them: + subitem = item.first_child + while subitem: + if ends_with_blank_line(subitem) and \ + (item.nxt or subitem.nxt): + block.list_data['tight'] = False + break + subitem = subitem.nxt + item = item.nxt + + @staticmethod + def can_contain(t): + return t == 'item' + + +class BlockQuote(Block): + accepts_lines = False + + @staticmethod + def continue_(parser=None, container=None): + ln = parser.current_line + if not parser.indented and peek(ln, parser.next_nonspace) == '>': + parser.advance_next_nonspace() + parser.advance_offset(1, False) + if is_space_or_tab(peek(ln, parser.offset)): + parser.advance_offset(1, True) + else: + return 1 + return 0 + + @staticmethod + def finalize(parser=None, block=None): + return + + @staticmethod + def can_contain(t): + return t != 'item' + + +class Item(Block): + accepts_lines = False + + @staticmethod + def continue_(parser=None, container=None): + if parser.blank: + if container.first_child is None: + # Blank line after empty list item + return 1 + else: + parser.advance_next_nonspace() + elif parser.indent >= (container.list_data['marker_offset'] + + container.list_data['padding']): + parser.advance_offset( + container.list_data['marker_offset'] + + container.list_data['padding'], True) + else: + return 1 + return 0 + + @staticmethod + def finalize(parser=None, block=None): + return + + @staticmethod + def can_contain(t): + return t != 'item' + + +class Heading(Block): + accepts_lines = False + + @staticmethod + def continue_(parser=None, container=None): + # A heading can never container > 1 line, so fail to match: + return 1 + + @staticmethod + def finalize(parser=None, block=None): + return + + @staticmethod + def can_contain(t): + return False + + +class ThematicBreak(Block): + accepts_lines = False + + @staticmethod + def continue_(parser=None, container=None): + # A thematic break can never container > 1 line, so fail to match: + return 1 + + @staticmethod + def finalize(parser=None, block=None): + return + + @staticmethod + def can_contain(t): + return False + + +class CodeBlock(Block): + accepts_lines = True + + @staticmethod + def continue_(parser=None, container=None): + ln = parser.current_line + indent = parser.indent + if container.is_fenced: + match = indent <= 3 and \ + len(ln) >= parser.next_nonspace + 1 and \ + ln[parser.next_nonspace] == container.fence_char and \ + re.search(reClosingCodeFence, ln[parser.next_nonspace:]) + if match and len(match.group()) >= container.fence_length: + # closing fence - we're at end of line, so we can return + parser.finalize(container, parser.line_number) + return 2 + else: + # skip optional spaces of fence offset + i = container.fence_offset + while i > 0 and is_space_or_tab(peek(ln, parser.offset)): + parser.advance_offset(1, True) + i -= 1 + else: + # indented + if indent >= CODE_INDENT: + parser.advance_offset(CODE_INDENT, True) + elif parser.blank: + parser.advance_next_nonspace() + else: + return 1 + return 0 + + @staticmethod + def finalize(parser=None, block=None): + if block.is_fenced: + # first line becomes info string + content = block.string_content + newline_pos = content.index('\n') + first_line = content[0:newline_pos] + rest = content[newline_pos + 1:] + block.info = unescape_string(first_line.strip()) + block.literal = rest + else: + # indented + block.literal = re.sub(r'(\n *)+$', '\n', block.string_content) + + block.string_content = None + + @staticmethod + def can_contain(t): + return False + + +class HtmlBlock(Block): + accepts_lines = True + + @staticmethod + def continue_(parser=None, container=None): + if parser.blank and (container.html_block_type == 6 or + container.html_block_type == 7): + return 1 + else: + return 0 + + @staticmethod + def finalize(parser=None, block=None): + block.literal = re.sub(r'(\n *)+$', '', block.string_content) + # allow GC + block.string_content = None + + @staticmethod + def can_contain(t): + return False + + +class Paragraph(Block): + accepts_lines = True + + @staticmethod + def continue_(parser=None, container=None): + return 1 if parser.blank else 0 + + @staticmethod + def finalize(parser=None, block=None): + has_reference_defs = False + + # try parsing the beginning as link reference definitions: + while peek(block.string_content, 0) == '[': + pos = parser.inline_parser.parseReference( + block.string_content, parser.refmap) + if not pos: + break + block.string_content = block.string_content[pos:] + has_reference_defs = True + if has_reference_defs and is_blank(block.string_content): + block.unlink() + + @staticmethod + def can_contain(t): + return False + + +class BlockStarts(object): + """Block start functions. + + Return values: + 0 = no match + 1 = matched container, keep going + 2 = matched leaf, no more block starts + """ + METHODS = [ + 'block_quote', + 'atx_heading', + 'fenced_code_block', + 'html_block', + 'setext_heading', + 'thematic_break', + 'list_item', + 'indented_code_block', + ] + + @staticmethod + def block_quote(parser, container=None): + if not parser.indented and \ + peek(parser.current_line, parser.next_nonspace) == '>': + parser.advance_next_nonspace() + parser.advance_offset(1, False) + # optional following space + if is_space_or_tab(peek(parser.current_line, parser.offset)): + parser.advance_offset(1, True) + parser.close_unmatched_blocks() + parser.add_child('block_quote', parser.next_nonspace) + return 1 + + return 0 + + @staticmethod + def atx_heading(parser, container=None): + if not parser.indented: + m = re.search(reATXHeadingMarker, + parser.current_line[parser.next_nonspace:]) + if m: + parser.advance_next_nonspace() + parser.advance_offset(len(m.group()), False) + parser.close_unmatched_blocks() + container = parser.add_child('heading', parser.next_nonspace) + # number of #s + container.level = len(m.group().strip()) + # remove trailing ###s: + container.string_content = re.sub( + r'[ \t]+#+[ \t]*$', '', re.sub( + r'^[ \t]*#+[ \t]*$', + '', + parser.current_line[parser.offset:])) + parser.advance_offset( + len(parser.current_line) - parser.offset, False) + return 2 + + return 0 + + @staticmethod + def fenced_code_block(parser, container=None): + if not parser.indented: + m = re.search( + reCodeFence, + parser.current_line[parser.next_nonspace:]) + if m: + fence_length = len(m.group()) + parser.close_unmatched_blocks() + container = parser.add_child( + 'code_block', parser.next_nonspace) + container.is_fenced = True + container.fence_length = fence_length + container.fence_char = m.group()[0] + container.fence_offset = parser.indent + parser.advance_next_nonspace() + parser.advance_offset(fence_length, False) + return 2 + + return 0 + + @staticmethod + def html_block(parser, container=None): + if not parser.indented and \ + peek(parser.current_line, parser.next_nonspace) == '<': + s = parser.current_line[parser.next_nonspace:] + + for block_type in range(1, 8): + if re.search(reHtmlBlockOpen[block_type], s) and \ + (block_type < 7 or container.t != 'paragraph'): + parser.close_unmatched_blocks() + # We don't adjust parser.offset; + # spaces are part of the HTML block: + b = parser.add_child('html_block', parser.offset) + b.html_block_type = block_type + return 2 + return 0 + + @staticmethod + def setext_heading(parser, container=None): + if not parser.indented and container.t == 'paragraph': + m = re.search( + reSetextHeadingLine, + parser.current_line[parser.next_nonspace:]) + if m: + parser.close_unmatched_blocks() + # resolve reference link definitiosn + while peek(container.string_content, 0) == '[': + pos = parser.inline_parser.parseReference( + container.string_content, parser.refmap) + if not pos: + break + container.string_content = container.string_content[pos:] + if container.string_content: + heading = Node('heading', container.sourcepos) + heading.level = 1 if m.group()[0] == '=' else 2 + heading.string_content = container.string_content + container.insert_after(heading) + container.unlink() + parser.tip = heading + parser.advance_offset( + len(parser.current_line) - parser.offset, False) + return 2 + else: + return 0 + + return 0 + + @staticmethod + def thematic_break(parser, container=None): + if not parser.indented and re.search( + reThematicBreak, parser.current_line[parser.next_nonspace:]): + parser.close_unmatched_blocks() + parser.add_child('thematic_break', parser.next_nonspace) + parser.advance_offset( + len(parser.current_line) - parser.offset, False) + return 2 + return 0 + + @staticmethod + def list_item(parser, container=None): + if (not parser.indented or container.t == 'list'): + data = parse_list_marker(parser, container) + if data: + parser.close_unmatched_blocks() + + # add the list if needed + if parser.tip.t != 'list' or \ + not lists_match(container.list_data, data): + container = parser.add_child('list', parser.next_nonspace) + container.list_data = data + + # add the list item + container = parser.add_child('item', parser.next_nonspace) + container.list_data = data + return 1 + + return 0 + + @staticmethod + def indented_code_block(parser, container=None): + if parser.indented and \ + parser.tip.t != 'paragraph' and \ + not parser.blank: + # indented code + parser.advance_offset(CODE_INDENT, True) + parser.close_unmatched_blocks() + parser.add_child('code_block', parser.offset) + return 2 + + return 0 + + +class Parser(object): + def __init__(self, options={}): + self.doc = Node('document', [[1, 1], [0, 0]]) + self.block_starts = BlockStarts() + self.tip = self.doc + self.oldtip = self.doc + self.current_line = '' + self.line_number = 0 + self.offset = 0 + self.column = 0 + self.next_nonspace = 0 + self.next_nonspace_column = 0 + self.indent = 0 + self.indented = False + self.blank = False + self.partially_consumed_tab = False + self.all_closed = True + self.last_matched_container = self.doc + self.refmap = {} + self.last_line_length = 0 + self.inline_parser = InlineParser(options) + self.options = options + + def add_line(self): + """ Add a line to the block at the tip. We assume the tip + can accept lines -- that check should be done before calling this.""" + if self.partially_consumed_tab: + # Skip over tab + self.offset += 1 + # Add space characters + chars_to_tab = 4 - (self.column % 4) + self.tip.string_content += (' ' * chars_to_tab) + self.tip.string_content += (self.current_line[self.offset:] + '\n') + + def add_child(self, tag, offset): + """ Add block of type tag as a child of the tip. If the tip can't + accept children, close and finalize it and try its parent, + and so on til we find a block that can accept children.""" + while not self.blocks[self.tip.t].can_contain(tag): + self.finalize(self.tip, self.line_number - 1) + + column_number = offset + 1 + new_block = Node(tag, [[self.line_number, column_number], [0, 0]]) + new_block.string_content = '' + self.tip.append_child(new_block) + self.tip = new_block + return new_block + + def close_unmatched_blocks(self): + """Finalize and close any unmatched blocks.""" + if not self.all_closed: + while self.oldtip != self.last_matched_container: + parent = self.oldtip.parent + self.finalize(self.oldtip, self.line_number - 1) + self.oldtip = parent + self.all_closed = True + + def find_next_nonspace(self): + current_line = self.current_line + i = self.offset + cols = self.column + + try: + c = current_line[i] + except IndexError: + c = '' + while c != '': + if c == ' ': + i += 1 + cols += 1 + elif c == '\t': + i += 1 + cols += (4 - (cols % 4)) + else: + break + + try: + c = current_line[i] + except IndexError: + c = '' + + self.blank = (c == '\n' or c == '\r' or c == '') + self.next_nonspace = i + self.next_nonspace_column = cols + self.indent = self.next_nonspace_column - self.column + self.indented = self.indent >= CODE_INDENT + + def advance_next_nonspace(self): + self.offset = self.next_nonspace + self.column = self.next_nonspace_column + self.partially_consumed_tab = False + + def advance_offset(self, count, columns): + current_line = self.current_line + try: + c = current_line[self.offset] + except IndexError: + c = None + while count > 0 and c is not None: + if c == '\t': + chars_to_tab = 4 - (self.column % 4) + if columns: + self.partially_consumed_tab = chars_to_tab > count + chars_to_advance = min(count, chars_to_tab) + self.column += chars_to_advance + self.offset += 0 if self.partially_consumed_tab else 1 + count -= chars_to_advance + else: + self.partially_consumed_tab = False + self.column += chars_to_tab + self.offset += 1 + count -= 1 + else: + self.partially_consumed_tab = False + self.offset += 1 + # assume ascii; block starts are ascii + self.column += 1 + count -= 1 + try: + c = current_line[self.offset] + except IndexError: + c = None + + def incorporate_line(self, ln): + """Analyze a line of text and update the document appropriately. + + We parse markdown text by calling this on each line of input, + then finalizing the document. + """ + all_matched = True + + container = self.doc + self.oldtip = self.tip + self.offset = 0 + self.column = 0 + self.blank = False + self.partially_consumed_tab = False + self.line_number += 1 + + # replace NUL characters for security + if re.search(r'\u0000', ln) is not None: + ln = re.sub(r'\0', '\uFFFD', ln) + + self.current_line = ln + + # For each containing block, try to parse the associated line start. + # Bail out on failure: container will point to the last matching block. + # Set all_matched to false if not all containers match. + while True: + last_child = container.last_child + if not (last_child and last_child.is_open): + break + container = last_child + + self.find_next_nonspace() + + rv = self.blocks[container.t].continue_(self, container) + if rv == 0: + # we've matched, keep going + pass + elif rv == 1: + # we've failed to match a block + all_matched = False + elif rv == 2: + # we've hit end of line for fenced code close and can return + self.last_line_length = len(ln) + return + else: + raise ValueError( + 'continue_ returned illegal value, must be 0, 1, or 2') + + if not all_matched: + # back up to last matching block + container = container.parent + break + + self.all_closed = (container == self.oldtip) + self.last_matched_container = container + + matched_leaf = container.t != 'paragraph' and \ + self.blocks[container.t].accepts_lines + starts = self.block_starts + starts_len = len(starts.METHODS) + # Unless last matched container is a code block, try new container + # starts, adding children to the last matched container: + while not matched_leaf: + self.find_next_nonspace() + + # this is a little performance optimization: + if not self.indented and \ + not re.search(reMaybeSpecial, ln[self.next_nonspace:]): + self.advance_next_nonspace() + break + + i = 0 + while i < starts_len: + res = getattr(starts, starts.METHODS[i])(self, container) + if res == 1: + container = self.tip + break + elif res == 2: + container = self.tip + matched_leaf = True + break + else: + i += 1 + + if i == starts_len: + # nothing matched + self.advance_next_nonspace() + break + + # What remains at the offset is a text line. Add the text to the + # appropriate container. + if not self.all_closed and not self.blank and \ + self.tip.t == 'paragraph': + # lazy paragraph continuation + self.add_line() + else: + # not a lazy continuation + # finalize any blocks not matched + self.close_unmatched_blocks() + if self.blank and container.last_child: + container.last_child.last_line_blank = True + + t = container.t + + # Block quote lines are never blank as they start with > + # and we don't count blanks in fenced code for purposes of + # tight/loose lists or breaking out of lists. We also + # don't set last_line_blank on an empty list item, or if we + # just closed a fenced block. + last_line_blank = self.blank and \ + not (t == 'block_quote' or + (t == 'code_block' and container.is_fenced) or + (t == 'item' and + not container.first_child and + container.sourcepos[0][0] == self.line_number)) + + # propagate last_line_blank up through parents: + cont = container + while cont: + cont.last_line_blank = last_line_blank + cont = cont.parent + + if self.blocks[t].accepts_lines: + self.add_line() + # if HtmlBlock, check for end condition + if t == 'html_block' and \ + container.html_block_type >= 1 and \ + container.html_block_type <= 5 and \ + re.search( + reHtmlBlockClose[container.html_block_type], + self.current_line[self.offset:]): + self.finalize(container, self.line_number) + elif self.offset < len(ln) and not self.blank: + # create a paragraph container for one line + container = self.add_child('paragraph', self.offset) + self.advance_next_nonspace() + self.add_line() + + self.last_line_length = len(ln) + + def finalize(self, block, line_number): + """ Finalize a block. Close it and do any necessary postprocessing, + e.g. creating string_content from strings, setting the 'tight' + or 'loose' status of a list, and parsing the beginnings + of paragraphs for reference definitions. Reset the tip to the + parent of the closed block.""" + above = block.parent + block.is_open = False + block.sourcepos[1] = [line_number, self.last_line_length] + + self.blocks[block.t].finalize(self, block) + + self.tip = above + + def process_inlines(self, block): + """ + Walk through a block & children recursively, parsing string content + into inline content where appropriate. + """ + walker = block.walker() + self.inline_parser.refmap = self.refmap + self.inline_parser.options = self.options + event = walker.nxt() + while event is not None: + node = event['node'] + t = node.t + if not event['entering'] and (t == 'paragraph' or t == 'heading'): + self.inline_parser.parse(node) + event = walker.nxt() + + def parse(self, my_input): + """ The main parsing function. Returns a parsed document AST.""" + self.doc = Node('document', [[1, 1], [0, 0]]) + self.tip = self.doc + self.refmap = {} + self.line_number = 0 + self.last_line_length = 0 + self.offset = 0 + self.column = 0 + self.last_matched_container = self.doc + self.current_line = '' + lines = re.split(reLineEnding, my_input) + length = len(lines) + if len(my_input) > 0 and my_input[-1] == '\n': + # ignore last blank line created by final newline + length -= 1 + for i in range(length): + self.incorporate_line(lines[i]) + while (self.tip): + self.finalize(self.tip, length) + self.process_inlines(self.doc) + return self.doc + + +CAMEL_RE = re.compile("(.)([A-Z](?:[a-z]+|(?<=[a-z0-9].)))") +Parser.blocks = dict( + (CAMEL_RE.sub(r'\1_\2', cls.__name__).lower(), cls) + for cls in Block.__subclasses__()) diff --git a/libs/commonmark/cmark.py b/libs/commonmark/cmark.py new file mode 100644 index 000000000..c6bd438a9 --- /dev/null +++ b/libs/commonmark/cmark.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +from __future__ import unicode_literals +import argparse +import sys +import commonmark + + +def main(): + parser = argparse.ArgumentParser( + description="Process Markdown according to " + "the CommonMark specification.") + if sys.version_info < (3, 0): + reload(sys) # noqa + sys.setdefaultencoding('utf-8') + parser.add_argument( + 'infile', + nargs="?", + type=argparse.FileType('r'), + default=sys.stdin, + help="Input Markdown file to parse, defaults to STDIN") + parser.add_argument( + '-o', + nargs="?", + type=argparse.FileType('w'), + default=sys.stdout, + help="Output HTML/JSON file, defaults to STDOUT") + parser.add_argument('-a', action="store_true", help="Print formatted AST") + parser.add_argument('-aj', action="store_true", help="Output JSON AST") + args = parser.parse_args() + parser = commonmark.Parser() + f = args.infile + o = args.o + lines = [] + for line in f: + lines.append(line) + data = "".join(lines) + ast = parser.parse(data) + if not args.a and not args.aj: + renderer = commonmark.HtmlRenderer() + o.write(renderer.render(ast)) + exit() + if args.a: + # print ast + commonmark.dumpAST(ast) + exit() + + # o.write(ast.to_JSON()) + o.write(commonmark.dumpJSON(ast)) + exit() + + +if __name__ == '__main__': + main() diff --git a/libs/commonmark/common.py b/libs/commonmark/common.py new file mode 100644 index 000000000..b15a8b604 --- /dev/null +++ b/libs/commonmark/common.py @@ -0,0 +1,113 @@ +from __future__ import absolute_import, unicode_literals + +import re +import sys + +try: + from urllib.parse import quote +except ImportError: + from urllib import quote + +if sys.version_info >= (3, 0): + if sys.version_info >= (3, 4): + import html + HTMLunescape = html.unescape + else: + from .entitytrans import _unescape + HTMLunescape = _unescape +else: + from commonmark import entitytrans + HTMLunescape = entitytrans._unescape + +ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});' + +TAGNAME = '[A-Za-z][A-Za-z0-9-]*' +ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*' +UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+" +SINGLEQUOTEDVALUE = "'[^']*'" +DOUBLEQUOTEDVALUE = '"[^"]*"' +ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + \ + "|" + DOUBLEQUOTEDVALUE + ")" +ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")" +ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)" +OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>" +CLOSETAG = "</" + TAGNAME + "\\s*[>]" +HTMLCOMMENT = '<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->' +PROCESSINGINSTRUCTION = "[<][?].*?[?][>]" +DECLARATION = "<![A-Z]+" + "\\s+[^>]*>" +CDATA = '<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' +HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + \ + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")" +reHtmlTag = re.compile('^' + HTMLTAG, re.IGNORECASE) +reBackslashOrAmp = re.compile(r'[\\&]') +ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]' +reEntityOrEscapedChar = re.compile( + '\\\\' + ESCAPABLE + '|' + ENTITY, re.IGNORECASE) +XMLSPECIAL = '[&<>"]' +reXmlSpecial = re.compile(XMLSPECIAL) + + +def unescape_char(s): + if s[0] == '\\': + return s[1] + else: + return HTMLunescape(s) + + +def unescape_string(s): + """Replace entities and backslash escapes with literal characters.""" + if re.search(reBackslashOrAmp, s): + return re.sub( + reEntityOrEscapedChar, + lambda m: unescape_char(m.group()), + s) + else: + return s + + +def normalize_uri(uri): + try: + return quote(uri.encode('utf-8'), safe=str('/@:+?=&()%#*,')) + except UnicodeDecodeError: + # Python 2 also throws a UnicodeDecodeError, complaining about + # the width of the "safe" string. Removing this parameter + # solves the issue, but yields overly aggressive quoting, but we + # can correct those errors manually. + s = quote(uri.encode('utf-8')) + s = re.sub(r'%40', '@', s) + s = re.sub(r'%3A', ':', s) + s = re.sub(r'%2B', '+', s) + s = re.sub(r'%3F', '?', s) + s = re.sub(r'%3D', '=', s) + s = re.sub(r'%26', '&', s) + s = re.sub(r'%28', '(', s) + s = re.sub(r'%29', ')', s) + s = re.sub(r'%25', '%', s) + s = re.sub(r'%23', '#', s) + s = re.sub(r'%2A', '*', s) + s = re.sub(r'%2C', ',', s) + return s + + +UNSAFE_MAP = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', +} + + +def replace_unsafe_char(s): + return UNSAFE_MAP.get(s, s) + + +def escape_xml(s): + if s is None: + return '' + if re.search(reXmlSpecial, s): + return re.sub( + reXmlSpecial, + lambda m: replace_unsafe_char(m.group()), + s) + else: + return s diff --git a/libs/commonmark/dump.py b/libs/commonmark/dump.py new file mode 100644 index 000000000..d83f259e7 --- /dev/null +++ b/libs/commonmark/dump.py @@ -0,0 +1,108 @@ +from __future__ import absolute_import, unicode_literals + +from builtins import str +import json +from commonmark.node import is_container + + +def prepare(obj, topnode=False): + """Walk the complete AST, only returning needed data. + + This removes circular references and allows us to output + JSON. + """ + a = [] + for subnode, entered in obj.walker(): + rep = { + 'type': subnode.t, + } + if subnode.literal: + rep['literal'] = subnode.literal + + if subnode.string_content: + rep['string_content'] = subnode.string_content + + if subnode.title: + rep['title'] = subnode.title + + if subnode.info: + rep['info'] = subnode.info + + if subnode.destination: + rep['destination'] = subnode.destination + + if subnode.list_data: + rep['list_data'] = subnode.list_data + + if is_container(subnode): + rep['children'] = [] + + if entered and len(a) > 0: + if a[-1]['children']: + a[-1]['children'].append(rep) + else: + a[-1]['children'] = [rep] + else: + a.append(rep) + return a + + +def dumpJSON(obj): + """Output AST in JSON form, this is destructive of block.""" + prepared = prepare(obj) + return json.dumps(prepared, indent=4, sort_keys=True) + + +def dumpAST(obj, ind=0, topnode=False): + """Print out a block/entire AST.""" + indChar = ("\t" * ind) + "-> " if ind else "" + print(indChar + "[" + obj.t + "]") + if not obj.title == "": + print("\t" + indChar + "Title: " + (obj.title or '')) + if not obj.info == "": + print("\t" + indChar + "Info: " + (obj.info or '')) + if not obj.destination == "": + print("\t" + indChar + "Destination: " + (obj.destination or '')) + if obj.is_open: + print("\t" + indChar + "Open: " + str(obj.is_open)) + if obj.last_line_blank: + print( + "\t" + indChar + "Last line blank: " + str(obj.last_line_blank)) + if obj.sourcepos: + print("\t" + indChar + "Sourcepos: " + str(obj.sourcepos)) + if not obj.string_content == "": + print("\t" + indChar + "String content: " + (obj.string_content or '')) + if not obj.info == "": + print("\t" + indChar + "Info: " + (obj.info or '')) + if not obj.literal == "": + print("\t" + indChar + "Literal: " + (obj.literal or '')) + if obj.list_data.get('type'): + print("\t" + indChar + "List Data: ") + print("\t\t" + indChar + "[type] = " + obj.list_data.get('type')) + if obj.list_data.get('bullet_char'): + print( + "\t\t" + indChar + "[bullet_char] = " + + obj.list_data['bullet_char']) + if obj.list_data.get('start'): + print( + "\t\t" + indChar + "[start] = " + + str(obj.list_data.get('start'))) + if obj.list_data.get('delimiter'): + print( + "\t\t" + indChar + "[delimiter] = " + + obj.list_data.get('delimiter')) + if obj.list_data.get('padding'): + print( + "\t\t" + indChar + "[padding] = " + + str(obj.list_data.get('padding'))) + if obj.list_data.get('marker_offset'): + print( + "\t\t" + indChar + "[marker_offset] = " + + str(obj.list_data.get('marker_offset'))) + if obj.walker: + print("\t" + indChar + "Children:") + walker = obj.walker() + nxt = walker.nxt() + while nxt is not None and topnode is False: + dumpAST(nxt['node'], ind + 2, topnode=True) + nxt = walker.nxt() diff --git a/libs/commonmark/entitytrans.py b/libs/commonmark/entitytrans.py new file mode 100644 index 000000000..71c8025bd --- /dev/null +++ b/libs/commonmark/entitytrans.py @@ -0,0 +1,2353 @@ +""" +Python 3.4 HTML5 entity unescaping for all! + +Based on +https://hg.python.org/cpython/file/500d3d6f22ff/Lib/html/__init__.py +""" +from __future__ import unicode_literals +import sys +import re as _re + +__all__ = ['_unescape'] + +_html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + # 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', +} + +_invalid_charrefs = { + 0x00: '\ufffd', # REPLACEMENT CHARACTER + 0x0d: '\r', # CARRIAGE RETURN + 0x80: '\u20ac', # EURO SIGN + 0x81: '\x81', # <control> + 0x82: '\u201a', # SINGLE LOW-9 QUOTATION MARK + 0x83: '\u0192', # LATIN SMALL LETTER F WITH HOOK + 0x84: '\u201e', # DOUBLE LOW-9 QUOTATION MARK + 0x85: '\u2026', # HORIZONTAL ELLIPSIS + 0x86: '\u2020', # DAGGER + 0x87: '\u2021', # DOUBLE DAGGER + 0x88: '\u02c6', # MODIFIER LETTER CIRCUMFLEX ACCENT + 0x89: '\u2030', # PER MILLE SIGN + 0x8a: '\u0160', # LATIN CAPITAL LETTER S WITH CARON + 0x8b: '\u2039', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x8c: '\u0152', # LATIN CAPITAL LIGATURE OE + 0x8d: '\x8d', # <control> + 0x8e: '\u017d', # LATIN CAPITAL LETTER Z WITH CARON + 0x8f: '\x8f', # <control> + 0x90: '\x90', # <control> + 0x91: '\u2018', # LEFT SINGLE QUOTATION MARK + 0x92: '\u2019', # RIGHT SINGLE QUOTATION MARK + 0x93: '\u201c', # LEFT DOUBLE QUOTATION MARK + 0x94: '\u201d', # RIGHT DOUBLE QUOTATION MARK + 0x95: '\u2022', # BULLET + 0x96: '\u2013', # EN DASH + 0x97: '\u2014', # EM DASH + 0x98: '\u02dc', # SMALL TILDE + 0x99: '\u2122', # TRADE MARK SIGN + 0x9a: '\u0161', # LATIN SMALL LETTER S WITH CARON + 0x9b: '\u203a', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x9c: '\u0153', # LATIN SMALL LIGATURE OE + 0x9d: '\x9d', # <control> + 0x9e: '\u017e', # LATIN SMALL LETTER Z WITH CARON + 0x9f: '\u0178', # LATIN CAPITAL LETTER Y WITH DIAERESIS +} + +_invalid_codepoints = set([ + # 0x0001 to 0x0008 + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, + # 0x000E to 0x001F + 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + # 0x007F to 0x009F + 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, + 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + # 0xFDD0 to 0xFDEF + 0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8, + 0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfde0, 0xfde1, + 0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7, 0xfde8, 0xfde9, 0xfdea, + 0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef, + # others + 0xb, 0xfffe, 0xffff, 0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff, + 0x4fffe, 0x4ffff, 0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff, + 0x8fffe, 0x8ffff, 0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff, + 0xcfffe, 0xcffff, 0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff, + 0x10fffe, 0x10ffff +]) + + +def _replace_charref(s): + s = s.group(1) + if s[0] == '#': + # numeric charref + if s[1] in 'xX': + num = int(s[2:].rstrip(';'), 16) + else: + num = int(s[1:].rstrip(';')) + if num in _invalid_charrefs: + return _invalid_charrefs[num] + if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF: + return '\uFFFD' + if num in _invalid_codepoints: + return '' + if sys.version_info >= (3, 0): + return chr(num) + else: + return unichr(num) # noqa + else: + # named charref + if s in _html5: + return _html5[s] + # find the longest matching name (as defined by the standard) + for x in range(len(s)-1, 1, -1): + if s[:x] in _html5: + return _html5[s[:x]] + s[x:] + else: + return '&' + s + + +_charref = _re.compile(r'&(#[0-9]+;?' + r'|#[xX][0-9a-fA-F]+;?' + r'|[^\t\n\f <&#;]{1,32};?)') + + +def _unescape(s): + """ + Convert all named and numeric character references (e.g. >, >, + &x3e;) in the string s to the corresponding unicode characters. + This function uses the rules defined by the HTML 5 standard + for both valid and invalid character references, and the list of + HTML 5 named character references defined in html.entities.html5. + """ + if '&' not in s: + return s + return _charref.sub(_replace_charref, s) diff --git a/libs/commonmark/inlines.py b/libs/commonmark/inlines.py new file mode 100644 index 000000000..88a84cf45 --- /dev/null +++ b/libs/commonmark/inlines.py @@ -0,0 +1,882 @@ +from __future__ import absolute_import, unicode_literals, division + +import re +import sys +from commonmark import common +from commonmark.common import normalize_uri, unescape_string +from commonmark.node import Node +from commonmark.normalize_reference import normalize_reference + +if sys.version_info >= (3, 0): + if sys.version_info >= (3, 4): + import html + HTMLunescape = html.unescape + else: + from .entitytrans import _unescape + HTMLunescape = _unescape +else: + from commonmark import entitytrans + HTMLunescape = entitytrans._unescape + +# Some regexps used in inline parser: + +ESCAPED_CHAR = '\\\\' + common.ESCAPABLE + +rePunctuation = re.compile( + r'[!"#$%&\'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB' + r'\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3' + r'\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F' + r'\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E' + r'\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12' + r'\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB' + r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736' + r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-' + r'\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F' + r'\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E' + r'\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5' + r'\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC' + r'\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011' + r'\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673' + r'\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E' + r'\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0' + r'\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63' + r'\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B' + r'\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-' + r'\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58' + r'\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D' + r'\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD' + r'\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7' + r'\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F' + r'\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]' +) + +reLinkTitle = re.compile( + '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + + '|' + + '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + + '|' + + '\\((' + ESCAPED_CHAR + '|[^()\\x00])*\\))') +reLinkDestinationBraces = re.compile(r'^(?:<(?:[^<>\n\\\x00]|\\.)*>)') + +reEscapable = re.compile('^' + common.ESCAPABLE) +reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE) +reTicks = re.compile(r'`+') +reTicksHere = re.compile(r'^`+') +reEllipses = re.compile(r'\.\.\.') +reDash = re.compile(r'--+') +reEmailAutolink = re.compile( + r"^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]" + r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?" + r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>") +reAutolink = re.compile( + r'^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>', + re.IGNORECASE) +reSpnl = re.compile(r'^ *(?:\n *)?') +reWhitespaceChar = re.compile(r'^^[ \t\n\x0b\x0c\x0d]') +reWhitespace = re.compile(r'[ \t\n\x0b\x0c\x0d]+') +reUnicodeWhitespaceChar = re.compile(r'^\s') +reFinalSpace = re.compile(r' *$') +reInitialSpace = re.compile(r'^ *') +reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)') +reLinkLabel = re.compile(r'^\[(?:[^\\\[\]]|\\.){0,1000}\]') +# Matches a string of non-special characters. +reMain = re.compile(r'^[^\n`\[\]\\!<&*_\'"]+', re.MULTILINE) + + +def text(s): + node = Node('text', None) + node.literal = s + return node + + +def smart_dashes(chars): + en_count = 0 + em_count = 0 + if len(chars) % 3 == 0: + # If divisible by 3, use all em dashes + em_count = len(chars) // 3 + elif len(chars) % 2 == 0: + # If divisble by 2, use all en dashes + en_count = len(chars) // 2 + elif len(chars) % 3 == 2: + # if 2 extra dashes, use en dashfor last 2; + # em dashes for rest + en_count = 1 + em_count = (len(chars) - 2) // 3 + else: + # Use en dashes for last 4 hyphens; em dashes for rest + en_count = 2 + em_count = (len(chars) - 4) // 3 + return ('\u2014' * em_count) + ('\u2013' * en_count) + + +class InlineParser(object): + """INLINE PARSER + + These are methods of an InlineParser class, defined below. + An InlineParser keeps track of a subject (a string to be + parsed) and a position in that subject. + """ + + def __init__(self, options={}): + self.subject = '' + self.brackets = None + self.pos = 0 + self.refmap = {} + self.options = options + + def match(self, regexString): + """ + If regexString matches at current position in the subject, advance + position in subject and return the match; otherwise return None. + """ + match = re.search(regexString, self.subject[self.pos:]) + if match is None: + return None + else: + self.pos += match.end() + return match.group() + + def peek(self): + """ Returns the character at the current subject position, or None if + there are no more characters.""" + if self.pos < len(self.subject): + return self.subject[self.pos] + else: + return None + + def spnl(self): + """ Parse zero or more space characters, including at + most one newline.""" + self.match(reSpnl) + return True + + # All of the parsers below try to match something at the current position + # in the subject. If they succeed in matching anything, they + # push an inline matched, advancing the subject. + + def parseBackticks(self, block): + """ Attempt to parse backticks, adding either a backtick code span or a + literal sequence of backticks to the 'inlines' list.""" + ticks = self.match(reTicksHere) + if ticks is None: + return False + after_open_ticks = self.pos + matched = self.match(reTicks) + while matched is not None: + if matched == ticks: + node = Node('code', None) + contents = self.subject[after_open_ticks:self.pos-len(ticks)] \ + .replace('\n', ' ') + if contents.lstrip(' ') and contents[0] == contents[-1] == ' ': + node.literal = contents[1:-1] + else: + node.literal = contents + block.append_child(node) + return True + matched = self.match(reTicks) + # If we got here, we didn't match a closing backtick sequence. + self.pos = after_open_ticks + block.append_child(text(ticks)) + return True + + def parseBackslash(self, block): + """ + Parse a backslash-escaped special character, adding either the + escaped character, a hard line break (if the backslash is followed + by a newline), or a literal backslash to the block's children. + Assumes current character is a backslash. + """ + subj = self.subject + self.pos += 1 + + try: + subjchar = subj[self.pos] + except IndexError: + subjchar = None + + if self.peek() == '\n': + self.pos += 1 + node = Node('linebreak', None) + block.append_child(node) + elif subjchar and re.search(reEscapable, subjchar): + block.append_child(text(subjchar)) + self.pos += 1 + else: + block.append_child(text('\\')) + + return True + + def parseAutolink(self, block): + """Attempt to parse an autolink (URL or email in pointy brackets).""" + m = self.match(reEmailAutolink) + + if m: + # email + dest = m[1:-1] + node = Node('link', None) + node.destination = normalize_uri('mailto:' + dest) + node.title = '' + node.append_child(text(dest)) + block.append_child(node) + return True + else: + m = self.match(reAutolink) + if m: + # link + dest = m[1:-1] + node = Node('link', None) + node.destination = normalize_uri(dest) + node.title = '' + node.append_child(text(dest)) + block.append_child(node) + return True + + return False + + def parseHtmlTag(self, block): + """Attempt to parse a raw HTML tag.""" + m = self.match(common.reHtmlTag) + if m is None: + return False + else: + node = Node('html_inline', None) + node.literal = m + block.append_child(node) + return True + + def scanDelims(self, c): + """ + Scan a sequence of characters == c, and return information about + the number of delimiters and whether they are positioned such that + they can open and/or close emphasis or strong emphasis. A utility + function for strong/emph parsing. + """ + numdelims = 0 + startpos = self.pos + + if c == "'" or c == '"': + numdelims += 1 + self.pos += 1 + else: + while (self.peek() == c): + numdelims += 1 + self.pos += 1 + + if numdelims == 0: + return None + + c_before = '\n' if startpos == 0 else self.subject[startpos - 1] + + c_after = self.peek() + if c_after is None: + c_after = '\n' + + # Python 2 doesn't recognize '\xa0' as whitespace + after_is_whitespace = re.search(reUnicodeWhitespaceChar, c_after) or \ + c_after == '\xa0' + after_is_punctuation = re.search(rePunctuation, c_after) + before_is_whitespace = re.search( + reUnicodeWhitespaceChar, c_before) or \ + c_before == '\xa0' + before_is_punctuation = re.search(rePunctuation, c_before) + + left_flanking = not after_is_whitespace and \ + (not after_is_punctuation or + before_is_whitespace or + before_is_punctuation) + right_flanking = not before_is_whitespace and \ + (not before_is_punctuation or + after_is_whitespace or + after_is_punctuation) + if c == '_': + can_open = left_flanking and \ + (not right_flanking or before_is_punctuation) + can_close = right_flanking and \ + (not left_flanking or after_is_punctuation) + elif c == "'" or c == '"': + can_open = left_flanking and not right_flanking + can_close = right_flanking + else: + can_open = left_flanking + can_close = right_flanking + + self.pos = startpos + return { + 'numdelims': numdelims, + 'can_open': can_open, + 'can_close': can_close, + } + + def handleDelim(self, cc, block): + """Handle a delimiter marker for emphasis or a quote.""" + res = self.scanDelims(cc) + if not res: + return False + numdelims = res.get('numdelims') + startpos = self.pos + + self.pos += numdelims + if cc == "'": + contents = '\u2019' + elif cc == '"': + contents = '\u201C' + else: + contents = self.subject[startpos:self.pos] + node = text(contents) + block.append_child(node) + + # Add entry to stack for this opener + self.delimiters = { + 'cc': cc, + 'numdelims': numdelims, + 'origdelims': numdelims, + 'node': node, + 'previous': self.delimiters, + 'next': None, + 'can_open': res.get('can_open'), + 'can_close': res.get('can_close'), + } + if self.delimiters['previous'] is not None: + self.delimiters['previous']['next'] = self.delimiters + return True + + def removeDelimiter(self, delim): + if delim.get('previous') is not None: + delim['previous']['next'] = delim.get('next') + if delim.get('next') is None: + # Top of stack + self.delimiters = delim.get('previous') + else: + delim['next']['previous'] = delim.get('previous') + + @staticmethod + def removeDelimitersBetween(bottom, top): + if bottom.get('next') != top: + bottom['next'] = top + top['previous'] = bottom + + def processEmphasis(self, stack_bottom): + openers_bottom = { + '_': stack_bottom, + '*': stack_bottom, + "'": stack_bottom, + '"': stack_bottom, + } + odd_match = False + use_delims = 0 + + # Find first closer above stack_bottom + closer = self.delimiters + while closer is not None and closer.get('previous') != stack_bottom: + closer = closer.get('previous') + + # Move forward, looking for closers, and handling each + while closer is not None: + if not closer.get('can_close'): + closer = closer.get('next') + else: + # found emphasis closer. now look back for first + # matching opener: + opener = closer.get('previous') + opener_found = False + closercc = closer.get('cc') + while (opener is not None and opener != stack_bottom and + opener != openers_bottom[closercc]): + odd_match = (closer.get('can_open') or + opener.get('can_close')) and \ + closer['origdelims'] % 3 != 0 and \ + (opener['origdelims'] + + closer['origdelims']) % 3 == 0 + if opener.get('cc') == closercc and \ + opener.get('can_open') and \ + not odd_match: + opener_found = True + break + opener = opener.get('previous') + old_closer = closer + + if closercc == '*' or closercc == '_': + if not opener_found: + closer = closer.get('next') + else: + # Calculate actual number of delimiters used from + # closer + use_delims = 2 if ( + closer['numdelims'] >= 2 and + opener['numdelims'] >= 2) else 1 + + opener_inl = opener.get('node') + closer_inl = closer.get('node') + + # Remove used delimiters from stack elts and inlines + opener['numdelims'] -= use_delims + closer['numdelims'] -= use_delims + opener_inl.literal = opener_inl.literal[ + :len(opener_inl.literal) - use_delims] + closer_inl.literal = closer_inl.literal[ + :len(closer_inl.literal) - use_delims] + + # Build contents for new Emph element + if use_delims == 1: + emph = Node('emph', None) + else: + emph = Node('strong', None) + + tmp = opener_inl.nxt + while tmp and tmp != closer_inl: + nxt = tmp.nxt + tmp.unlink() + emph.append_child(tmp) + tmp = nxt + + opener_inl.insert_after(emph) + + # Remove elts between opener and closer in delimiters + # stack + self.removeDelimitersBetween(opener, closer) + + # If opener has 0 delims, remove it and the inline + if opener['numdelims'] == 0: + opener_inl.unlink() + self.removeDelimiter(opener) + + if closer['numdelims'] == 0: + closer_inl.unlink() + tempstack = closer['next'] + self.removeDelimiter(closer) + closer = tempstack + + elif closercc == "'": + closer['node'].literal = '\u2019' + if opener_found: + opener['node'].literal = '\u2018' + closer = closer['next'] + + elif closercc == '"': + closer['node'].literal = '\u201D' + if opener_found: + opener['node'].literal = '\u201C' + closer = closer['next'] + + if not opener_found and not odd_match: + # Set lower bound for future searches for openers: + # We don't do this with odd_match because a ** + # that doesn't match an earlier * might turn into + # an opener, and the * might be matched by something + # else. + openers_bottom[closercc] = old_closer['previous'] + if not old_closer['can_open']: + # We can remove a closer that can't be an opener, + # once we've seen there's no matching opener: + self.removeDelimiter(old_closer) + + # Remove all delimiters + while self.delimiters is not None and self.delimiters != stack_bottom: + self.removeDelimiter(self.delimiters) + + def parseLinkTitle(self): + """ + Attempt to parse link title (sans quotes), returning the string + or None if no match. + """ + title = self.match(reLinkTitle) + if title is None: + return None + else: + # chop off quotes from title and unescape: + return unescape_string(title[1:-1]) + + def parseLinkDestination(self): + """ + Attempt to parse link destination, returning the string or + None if no match. + """ + res = self.match(reLinkDestinationBraces) + if res is None: + if self.peek() == '<': + return None + # TODO handrolled parser; res should be None or the string + savepos = self.pos + openparens = 0 + while True: + c = self.peek() + if c is None: + break + if c == '\\' and re.search( + reEscapable, self.subject[self.pos+1:self.pos+2]): + self.pos += 1 + if self.peek() is not None: + self.pos += 1 + elif c == '(': + self.pos += 1 + openparens += 1 + elif c == ')': + if openparens < 1: + break + else: + self.pos += 1 + openparens -= 1 + elif re.search(reWhitespaceChar, c): + break + else: + self.pos += 1 + if self.pos == savepos and c != ')': + return None + res = self.subject[savepos:self.pos] + return normalize_uri(unescape_string(res)) + else: + # chop off surrounding <..>: + return normalize_uri(unescape_string(res[1:-1])) + + def parseLinkLabel(self): + """ + Attempt to parse a link label, returning number of + characters parsed. + """ + # Note: our regex will allow something of form [..\]; + # we disallow it here rather than using lookahead in the regex: + m = self.match(reLinkLabel) + if m is None or len(m) > 1001: + return 0 + else: + return len(m) + + def parseOpenBracket(self, block): + """ + Add open bracket to delimiter stack and add a text node to + block's children. + """ + startpos = self.pos + self.pos += 1 + + node = text('[') + block.append_child(node) + + # Add entry to stack for this opener + self.addBracket(node, startpos, False) + return True + + def parseBang(self, block): + """ + If next character is [, and ! delimiter to delimiter stack and + add a text node to block's children. Otherwise just add a text + node. + """ + startpos = self.pos + self.pos += 1 + if self.peek() == '[': + self.pos += 1 + + node = text('![') + block.append_child(node) + + # Add entry to stack for this openeer + self.addBracket(node, startpos + 1, True) + else: + block.append_child(text('!')) + + return True + + def parseCloseBracket(self, block): + """ + Try to match close bracket against an opening in the delimiter + stack. Add either a link or image, or a plain [ character, + to block's children. If there is a matching delimiter, + remove it from the delimiter stack. + """ + title = None + matched = False + self.pos += 1 + startpos = self.pos + + # get last [ or ![ + opener = self.brackets + + if opener is None: + # no matched opener, just return a literal + block.append_child(text(']')) + return True + + if not opener.get('active'): + # no matched opener, just return a literal + block.append_child(text(']')) + # take opener off brackets stack + self.removeBracket() + return True + + # If we got here, opener is a potential opener + is_image = opener.get('image') + + # Check to see if we have a link/image + + savepos = self.pos + + # Inline link? + if self.peek() == '(': + self.pos += 1 + self.spnl() + dest = self.parseLinkDestination() + if dest is not None and self.spnl(): + # make sure there's a space before the title + if re.search(reWhitespaceChar, self.subject[self.pos-1]): + title = self.parseLinkTitle() + if self.spnl() and self.peek() == ')': + self.pos += 1 + matched = True + else: + self.pos = savepos + + if not matched: + # Next, see if there's a link label + beforelabel = self.pos + n = self.parseLinkLabel() + if n > 2: + reflabel = self.subject[beforelabel:beforelabel + n] + elif not opener.get('bracket_after'): + # Empty or missing second label means to use the first + # label as the reference. The reference must not + # contain a bracket. If we know there's a bracket, we + # don't even bother checking it. + reflabel = self.subject[opener.get('index'):startpos] + if n == 0: + # If shortcut reference link, rewind before spaces we skipped. + self.pos = savepos + + if reflabel: + # lookup rawlabel in refmap + link = self.refmap.get(normalize_reference(reflabel)) + if link: + dest = link['destination'] + title = link['title'] + matched = True + + if matched: + node = Node('image' if is_image else 'link', None) + + node.destination = dest + node.title = title or '' + tmp = opener.get('node').nxt + while tmp: + nxt = tmp.nxt + tmp.unlink() + node.append_child(tmp) + tmp = nxt + block.append_child(node) + self.processEmphasis(opener.get('previousDelimiter')) + self.removeBracket() + opener.get('node').unlink() + + # We remove this bracket and processEmphasis will remove + # later delimiters. + # Now, for a link, we also deactivate earlier link openers. + # (no links in links) + if not is_image: + opener = self.brackets + while opener is not None: + if not opener.get('image'): + # deactivate this opener + opener['active'] = False + opener = opener.get('previous') + + return True + else: + # no match + # remove this opener from stack + self.removeBracket() + self.pos = startpos + block.append_child(text(']')) + return True + + def addBracket(self, node, index, image): + if self.brackets is not None: + self.brackets['bracketAfter'] = True + + self.brackets = { + 'node': node, + 'previous': self.brackets, + 'previousDelimiter': self.delimiters, + 'index': index, + 'image': image, + 'active': True, + } + + def removeBracket(self): + self.brackets = self.brackets.get('previous') + + def parseEntity(self, block): + """Attempt to parse an entity.""" + m = self.match(reEntityHere) + if m: + block.append_child(text(HTMLunescape(m))) + return True + else: + return False + + def parseString(self, block): + """ + Parse a run of ordinary characters, or a single character with + a special meaning in markdown, as a plain string. + """ + m = self.match(reMain) + if m: + if self.options.get('smart'): + s = re.sub(reEllipses, '\u2026', m) + s = re.sub(reDash, lambda x: smart_dashes(x.group()), s) + block.append_child(text(s)) + else: + block.append_child(text(m)) + return True + else: + return False + + def parseNewline(self, block): + """ + Parse a newline. If it was preceded by two spaces, return a hard + line break; otherwise a soft line break. + """ + # assume we're at a \n + self.pos += 1 + lastc = block.last_child + if lastc and lastc.t == 'text' and lastc.literal[-1] == ' ': + linebreak = len(lastc.literal) >= 2 and lastc.literal[-2] == ' ' + lastc.literal = re.sub(reFinalSpace, '', lastc.literal) + if linebreak: + node = Node('linebreak', None) + else: + node = Node('softbreak', None) + block.append_child(node) + else: + block.append_child(Node('softbreak', None)) + + # gobble leading spaces in next line + self.match(reInitialSpace) + return True + + def parseReference(self, s, refmap): + """Attempt to parse a link reference, modifying refmap.""" + self.subject = s + self.pos = 0 + startpos = self.pos + + # label: + match_chars = self.parseLinkLabel() + if match_chars == 0 or match_chars == 2: + return 0 + else: + rawlabel = self.subject[:match_chars] + + # colon: + if (self.peek() == ':'): + self.pos += 1 + else: + self.pos = startpos + return 0 + + # link url + self.spnl() + + dest = self.parseLinkDestination() + if dest is None: + self.pos = startpos + return 0 + + beforetitle = self.pos + self.spnl() + title = None + if self.pos != beforetitle: + title = self.parseLinkTitle() + if title is None: + title = '' + # rewind before spaces + self.pos = beforetitle + + # make sure we're at line end: + at_line_end = True + if self.match(reSpaceAtEndOfLine) is None: + if title == '': + at_line_end = False + else: + # the potential title we found is not at the line end, + # but it could still be a legal link reference if we + # discard the title + title == '' + # rewind before spaces + self.pos = beforetitle + # and instead check if the link URL is at the line end + at_line_end = self.match(reSpaceAtEndOfLine) is not None + + if not at_line_end: + self.pos = startpos + return 0 + + normlabel = normalize_reference(rawlabel) + if normlabel == '': + # label must contain non-whitespace characters + self.pos = startpos + return 0 + + if not refmap.get(normlabel): + refmap[normlabel] = { + 'destination': dest, + 'title': title + } + return (self.pos - startpos) + + def parseInline(self, block): + """ + Parse the next inline element in subject, advancing subject + position. + + On success, add the result to block's children and return True. + On failure, return False. + """ + res = False + c = self.peek() + if c is None: + return False + if c == '\n': + res = self.parseNewline(block) + elif c == '\\': + res = self.parseBackslash(block) + elif c == '`': + res = self.parseBackticks(block) + elif c == '*' or c == '_': + res = self.handleDelim(c, block) + elif c == "'" or c == '"': + res = self.options.get('smart') and self.handleDelim(c, block) + elif c == '[': + res = self.parseOpenBracket(block) + elif c == '!': + res = self.parseBang(block) + elif c == ']': + res = self.parseCloseBracket(block) + elif c == '<': + res = self.parseAutolink(block) or self.parseHtmlTag(block) + elif c == '&': + res = self.parseEntity(block) + else: + res = self.parseString(block) + + if not res: + self.pos += 1 + block.append_child(text(c)) + + return True + + def parseInlines(self, block): + """ + Parse string content in block into inline children, + using refmap to resolve references. + """ + self.subject = block.string_content.strip() + self.pos = 0 + self.delimiters = None + self.brackets = None + while (self.parseInline(block)): + pass + # allow raw string to be garbage collected + block.string_content = None + self.processEmphasis(None) + + parse = parseInlines diff --git a/libs/commonmark/main.py b/libs/commonmark/main.py new file mode 100644 index 000000000..cffe63c36 --- /dev/null +++ b/libs/commonmark/main.py @@ -0,0 +1,41 @@ +# 2014 - Bibek Kafle & Roland Shoemaker +# 2015-2017 - Nikolas Nyby +# Port of @jgm's commonmark.js implementation of the CommonMark spec. + +# Basic usage: +# +# import commonmark +# parser = commonmark.Parser() +# renderer = commonmark.HtmlRenderer() +# print(renderer.render(parser.parse('Hello *world*'))) + +from __future__ import absolute_import, unicode_literals + +from commonmark.blocks import Parser +from commonmark.dump import dumpAST, dumpJSON +from commonmark.render.html import HtmlRenderer +from commonmark.render.rst import ReStructuredTextRenderer + + +def commonmark(text, format="html"): + """Render CommonMark into HTML, JSON or AST + Optional keyword arguments: + format: 'html' (default), 'json' or 'ast' + + >>> commonmark("*hello!*") + '<p><em>hello</em></p>\\n' + """ + parser = Parser() + ast = parser.parse(text) + if format not in ["html", "json", "ast", "rst"]: + raise ValueError("format must be 'html', 'json' or 'ast'") + if format == "html": + renderer = HtmlRenderer() + return renderer.render(ast) + if format == "json": + return dumpJSON(ast) + if format == "ast": + return dumpAST(ast) + if format == "rst": + renderer = ReStructuredTextRenderer() + return renderer.render(ast) diff --git a/libs/commonmark/node.py b/libs/commonmark/node.py new file mode 100644 index 000000000..4c0ed407e --- /dev/null +++ b/libs/commonmark/node.py @@ -0,0 +1,179 @@ +from __future__ import unicode_literals + +import re + + +reContainer = re.compile( + r'(document|block_quote|list|item|paragraph|' + r'heading|emph|strong|link|image|' + r'custom_inline|custom_block)') + + +def is_container(node): + return (re.search(reContainer, node.t) is not None) + + +class NodeWalker(object): + + def __init__(self, root): + self.current = root + self.root = root + self.entering = True + + def __next__(self): + cur = self.current + entering = self.entering + + if cur is None: + raise StopIteration + + container = is_container(cur) + + if entering and container: + if cur.first_child: + self.current = cur.first_child + self.entering = True + else: + # stay on node but exit + self.entering = False + elif cur == self.root: + self.current = None + elif cur.nxt is None: + self.current = cur.parent + self.entering = False + else: + self.current = cur.nxt + self.entering = True + + return cur, entering + + next = __next__ + + def __iter__(self): + return self + + def nxt(self): + """ for backwards compatibility """ + try: + cur, entering = next(self) + return { + 'entering': entering, + 'node': cur, + } + except StopIteration: + return None + + def resume_at(self, node, entering): + self.current = node + self.entering = (entering is True) + + +class Node(object): + def __init__(self, node_type, sourcepos): + self.t = node_type + self.parent = None + self.first_child = None + self.last_child = None + self.prv = None + self.nxt = None + self.sourcepos = sourcepos + self.last_line_blank = False + self.last_line_checked = False + self.is_open = True + self.string_content = '' + self.literal = None + self.list_data = {} + self.info = None + self.destination = None + self.title = None + self.is_fenced = False + self.fence_char = None + self.fence_length = 0 + self.fence_offset = None + self.level = None + self.on_enter = None + self.on_exit = None + + def __repr__(self): + return "Node {} [{}]".format(self.t, self.literal) + + def pretty(self): + from pprint import pprint + pprint(self.__dict__) + + def normalize(self): + prev = None + for curr, _ in self.walker(): + if prev is None: + prev = curr + continue + if prev.t == 'text' and curr.t == 'text': + prev.literal += curr.literal + curr.unlink() + else: + prev = curr + + def is_container(self): + return is_container(self) + + def append_child(self, child): + child.unlink() + child.parent = self + if self.last_child: + self.last_child.nxt = child + child.prv = self.last_child + self.last_child = child + else: + self.first_child = child + self.last_child = child + + def prepend_child(self, child): + child.unlink() + child.parent = self + if self.first_child: + self.first_child.prv = child + child.nxt = self.first_child + self.first_child = child + else: + self.first_child = child + self.last_child = child + + def unlink(self): + if self.prv: + self.prv.nxt = self.nxt + elif self.parent: + self.parent.first_child = self.nxt + + if self.nxt: + self.nxt.prv = self.prv + elif self.parent: + self.parent.last_child = self.prv + + self.parent = None + self.nxt = None + self.prv = None + + def insert_after(self, sibling): + sibling.unlink() + sibling.nxt = self.nxt + if sibling.nxt: + sibling.nxt.prv = sibling + sibling.prv = self + self.nxt = sibling + sibling.parent = self.parent + if not sibling.nxt: + sibling.parent.last_child = sibling + + def insert_before(self, sibling): + sibling.unlink() + sibling.prv = self.prv + if sibling.prv: + sibling.prv.nxt = sibling + sibling.nxt = self + self.prv = sibling + sibling.parent = self.parent + if not sibling.prv: + sibling.parent.first_child = sibling + + def walker(self): + return NodeWalker(self) diff --git a/libs/commonmark/normalize_reference.py b/libs/commonmark/normalize_reference.py new file mode 100644 index 000000000..d68a3b147 --- /dev/null +++ b/libs/commonmark/normalize_reference.py @@ -0,0 +1,165 @@ +"""Case-folding and whitespace normalization""" +# Unicode Case Folding table has been derived from the following work: +# +# CaseFolding-12.0.0.txt +# Date: 2019-01-22, 08:18:22 GMT +# (c) 2019 Unicode(R) Inc. +# Unicode and the Unicode Logo are registered trademarks +# of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +import re +import sys +from builtins import str, chr + +__all__ = ["normalize_reference"] + +if sys.version_info < (3,) and sys.maxunicode <= 0xffff: + # shim for Python 2.x UCS2 build + _unichr = chr + + def chr(cdp): + if 0x10000 <= cdp < 0x110000: + cdp -= 0x10000 + return (_unichr(0xd800 | (cdp >> 10)) + + _unichr(0xdc00 | (cdp & 0x3ff))) + return _unichr(cdp) + + +def _parse_table(tbl): + xlat = {} + cur_i, cur_j = -1, 0 + for entry in tbl.split(';'): + arr = entry.split(',') + info = [int(x, 36) if x else 0 for x in arr[0].split(':')] + arr = [int(x, 36) for x in arr[1:]] + assert not any(x in xlat for x in arr) + sfx = ''.join(map(chr, arr)) + streak, stride = 0, 1 + if len(info) == 2: + fdt, delta = info + elif len(info) == 3: + fdt, streak, delta = info + else: + fdt, streak, delta, stride = info + assert streak >= 0 and stride >= 1 + cur_i += fdt + 1 + cur_j -= delta + assert cur_j != 0 + i = cur_i + last = cur_i + streak + while i <= last: + # uniqueness and idempotency + assert i not in xlat and i + cur_j not in xlat + assert i not in arr + xlat[i] = chr(i + cur_j) + sfx + i += stride + return xlat + + +XLAT = _parse_table( + # ===== Start of Unicode Case Folding table ===== + '1t:p:-w;37:-kn;a:m:kn;n:6:;6:3w,37;w:1a:-31:2;1b:5k,lj;1:4:-5k:2;6:e::' + '2;f:-aa,32;:18:aa:2;19:3e;:4:-3e:2;5:7h;1:-da;:2:5t:2;3:-5p;:5p;1:1:-5' + 'o;1:5o;2:-26;:-3f;:-1;:5m;1:-5o;:-2;1:-4;:2;:5s;3:-5u;:-2;1:-1;:4:5x:2' + ';5:-61;:61;1:-61;2:61;1:-61;:61;1:1:-60;1:2:60:2;3:-62;:4:62:4;b:-1;:1' + ';1:-1;:1;1:-1;:g:1:2;i:g::2;h:av,lo;:-aw;:2:1:2;3:2q;:-15;:12:-1l:2;13' + ':3n;1:g:-3n:2;n:-8bu;:8bu;1:4k;:-8gb;2:8br;1:5g;:-7c;:-2;:8:1y:2;72:-3' + '7;16:2:37:2;5:;8:-37;6:26;1:2:1;3:-r;1:1:1;1:m,lk,ld;:g:9;h:8:;c:b,lk,' + 'ld;h:k;c:-7;:12;:-5;3:-a;:7;1:m:-n:2;n:1j;:-6;2:c;:4;1:-1t;1:8;:-8;2:2' + ':3n;2:f:-5u;f:v:1c;27:w:v:2;15:1g::2;1h:-e;:c:e:2;e:2m::2;2o:11:-1b;2d' + ':2a,136;26w:11:-5mq;12:6::6;mo:5:5m0;1on:4sm;:-1;:-9;:1:-2;1:1;:-7;:-o' + ';:-vzb;7:16:tj7;18:2:;8y:44:-2bl:2;45:5yn,mp;:-b,lk;:-2,lm;:-1,lm;:p,j' + 'i;:-5xb;2:5wx,37;1:2m:-5yk:2;2v:7:9;f:5:;f:7:;f:7:;f:5:;7:5fn,lv;1:2,l' + 'v,lc;1:2,lv,ld;1:2,lv,n6;2:6:-5ft:2;e:7:;n:7:3c,qh;7:7:8,qh;7:7:-o,qh;' + '7:7:8,qh;7:7:-1k,qh;7:7:8,qh;9:-6,qh;:5hc,qh;:6,qh;1:-3,n6;:1,n6,qh;:1' + ':-5j2;1:1:1u;1:5hd,qh;1:-6;3:-5h3,qh;:5ha,qh;:a,qh;1:-7,n6;:1,n6,qh;:3' + ':-5h6;3:5hb,qh;5:4,lk,lc;:1,lk,ld;2:3,n6;:1,lk,n6;:1:-5jq;1:1:2k;7:5h5' + ',lk,lc;:1,lk,ld;:5,lv;1:-2,n6;:1,lk,n6;:1:-5ju;1:1:2w;1:-2x;5:33,qh;:5' + 'h0,qh;:-4,qh;1:7,n6;:1,n6,qh;:1:-5gu;1:1:-2;1:5h1,qh;89:8a;3:o2;:-3d;6' + ':-6ea;19:f:c;y:f;mq:p:-p;1ft:1a:-m;2n:1b;1:8ag;:-5ch;:5c1;2:4:-8a0:2;5' + ':8bh;:-v;:y;:-1;1:3:-8bj:3;b:1:8cg;1:2q:-8cg:2;2y:2::2;6:nym::nym;nyn:' + '16::2;1p:q::2;4h:c::2;f:1o::2;1y:2::2;3:r9h;:8:-r9h:2;c:;1:wmh;2:2:-wm' + 'h:2;5:i::2;j:wn9;:b;:-4;:-a;:3;1:-1e;:o;:-l;:-xbp;:a:pr:2;d:;1:1d;:wlv' + ';:-5cb;q1:27:2oo;fpr:jii,2u;:1,2x;:1,30;:1,2u,2x;:1,2u,30;:-c,38;:1,38' + ';c:-z8,12u;:1,12d;:1,12j;:-9,12u;:b,12l;sp:p:-1cjn;ym:13:-8;4v:z:;1jj:' + '1e:-o;2e7:v:w;gwv:v:;o8v:x:-2' + # ===== End of Unicode Case Folding table ===== +) + + +def _check_native(tbl): + """ + Determine if Python's own native implementation + subsumes the supplied case folding table + """ + try: + for i in tbl: + stv = chr(i) + if stv.casefold() == stv: + return False + except AttributeError: + return False + return True + + +# Hoist version check out of function for performance +SPACE_RE = re.compile(r'[ \t\r\n]+') +if _check_native(XLAT): + def normalize_reference(string): + """ + Normalize reference label: collapse internal whitespace + to single space, remove leading/trailing whitespace, case fold. + """ + return SPACE_RE.sub(' ', string[1:-1].strip()).casefold() +elif sys.version_info >= (3,) or sys.maxunicode > 0xffff: + def normalize_reference(string): + """ + Normalize reference label: collapse internal whitespace + to single space, remove leading/trailing whitespace, case fold. + """ + return SPACE_RE.sub(' ', string[1:-1].strip()).translate(XLAT) +else: + def _get_smp_regex(): + xls = sorted(x - 0x10000 for x in XLAT if x >= 0x10000) + xls.append(-1) + fmt, (dsh, opn, pip, cse) = str('\\u%04x'), str('-[|]') + rga, srk, erk = [str(r'[ \t\r\n]+')], 0, -2 + for k in xls: + new_hir = (erk ^ k) >> 10 != 0 + if new_hir or erk + 1 != k: + if erk >= 0 and srk != erk: + if srk + 1 != erk: + rga.append(dsh) + rga.append(fmt % (0xdc00 + (erk & 0x3ff))) + if new_hir: + if erk >= 0: + rga.append(cse) + if k < 0: + break + rga.append(pip) + rga.append(fmt % (0xd800 + (k >> 10))) + rga.append(opn) + srk = k + rga.append(fmt % (0xdc00 + (srk & 0x3ff))) + erk = k + return re.compile(str().join(rga)) + + def _subst_handler(matchobj): + src = matchobj.group(0) + hiv = ord(src[0]) + if hiv < 0xd800: + return ' ' + return XLAT[0x10000 + ((hiv & 0x3ff) << 10) | (ord(src[1]) & 0x3ff)] + + SMP_RE = _get_smp_regex() + + def normalize_reference(string): + """ + Normalize reference label: collapse internal whitespace + to single space, remove leading/trailing whitespace, case fold. + """ + return SMP_RE.sub(_subst_handler, string[1:-1].strip()).translate(XLAT) diff --git a/libs/commonmark/render/__init__.py b/libs/commonmark/render/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/libs/commonmark/render/__init__.py diff --git a/libs/commonmark/render/html.py b/libs/commonmark/render/html.py new file mode 100644 index 000000000..b4ea345d5 --- /dev/null +++ b/libs/commonmark/render/html.py @@ -0,0 +1,228 @@ +from __future__ import unicode_literals + + +import re +from builtins import str +from commonmark.common import escape_xml +from commonmark.render.renderer import Renderer + + +reUnsafeProtocol = re.compile( + r'^javascript:|vbscript:|file:|data:', re.IGNORECASE) +reSafeDataProtocol = re.compile( + r'^data:image\/(?:png|gif|jpeg|webp)', re.IGNORECASE) + + +def potentially_unsafe(url): + return re.search(reUnsafeProtocol, url) and \ + (not re.search(reSafeDataProtocol, url)) + + +class HtmlRenderer(Renderer): + def __init__(self, options={}): + # by default, soft breaks are rendered as newlines in HTML + options['softbreak'] = options.get('softbreak') or '\n' + # set to "<br />" to make them hard breaks + # set to " " if you want to ignore line wrapping in source + + self.disable_tags = 0 + self.last_out = '\n' + self.options = options + + def escape(self, text): + return escape_xml(text) + + def tag(self, name, attrs=None, selfclosing=None): + """Helper function to produce an HTML tag.""" + if self.disable_tags > 0: + return + + self.buf += '<' + name + if attrs and len(attrs) > 0: + for attrib in attrs: + self.buf += ' ' + attrib[0] + '="' + attrib[1] + '"' + + if selfclosing: + self.buf += ' /' + + self.buf += '>' + self.last_out = '>' + + # Node methods # + + def text(self, node, entering=None): + self.out(node.literal) + + def softbreak(self, node=None, entering=None): + self.lit(self.options['softbreak']) + + def linebreak(self, node=None, entering=None): + self.tag('br', [], True) + self.cr() + + def link(self, node, entering): + attrs = self.attrs(node) + if entering: + if not (self.options.get('safe') and + potentially_unsafe(node.destination)): + attrs.append(['href', self.escape(node.destination)]) + + if node.title: + attrs.append(['title', self.escape(node.title)]) + + self.tag('a', attrs) + else: + self.tag('/a') + + def image(self, node, entering): + if entering: + if self.disable_tags == 0: + if self.options.get('safe') and \ + potentially_unsafe(node.destination): + self.lit('<img src="" alt="') + else: + self.lit('<img src="' + + self.escape(node.destination) + + '" alt="') + self.disable_tags += 1 + else: + self.disable_tags -= 1 + if self.disable_tags == 0: + if node.title: + self.lit('" title="' + self.escape(node.title)) + self.lit('" />') + + def emph(self, node, entering): + self.tag('em' if entering else '/em') + + def strong(self, node, entering): + self.tag('strong' if entering else '/strong') + + def paragraph(self, node, entering): + grandparent = node.parent.parent + attrs = self.attrs(node) + if grandparent is not None and grandparent.t == 'list': + if grandparent.list_data['tight']: + return + + if entering: + self.cr() + self.tag('p', attrs) + else: + self.tag('/p') + self.cr() + + def heading(self, node, entering): + tagname = 'h' + str(node.level) + attrs = self.attrs(node) + if entering: + self.cr() + self.tag(tagname, attrs) + else: + self.tag('/' + tagname) + self.cr() + + def code(self, node, entering): + self.tag('code') + self.out(node.literal) + self.tag('/code') + + def code_block(self, node, entering): + info_words = node.info.split() if node.info else [] + attrs = self.attrs(node) + if len(info_words) > 0 and len(info_words[0]) > 0: + attrs.append(['class', 'language-' + + self.escape(info_words[0])]) + + self.cr() + self.tag('pre') + self.tag('code', attrs) + self.out(node.literal) + self.tag('/code') + self.tag('/pre') + self.cr() + + def thematic_break(self, node, entering): + attrs = self.attrs(node) + self.cr() + self.tag('hr', attrs, True) + self.cr() + + def block_quote(self, node, entering): + attrs = self.attrs(node) + if entering: + self.cr() + self.tag('blockquote', attrs) + self.cr() + else: + self.cr() + self.tag('/blockquote') + self.cr() + + def list(self, node, entering): + tagname = 'ul' if node.list_data['type'] == 'bullet' else 'ol' + attrs = self.attrs(node) + if entering: + start = node.list_data['start'] + if start is not None and start != 1: + attrs.append(['start', str(start)]) + + self.cr() + self.tag(tagname, attrs) + self.cr() + else: + self.cr() + self.tag('/' + tagname) + self.cr() + + def item(self, node, entering): + attrs = self.attrs(node) + if entering: + self.tag('li', attrs) + else: + self.tag('/li') + self.cr() + + def html_inline(self, node, entering): + if self.options.get('safe'): + self.lit('<!-- raw HTML omitted -->') + else: + self.lit(node.literal) + + def html_block(self, node, entering): + self.cr() + if self.options.get('safe'): + self.lit('<!-- raw HTML omitted -->') + else: + self.lit(node.literal) + self.cr() + + def custom_inline(self, node, entering): + if entering and node.on_enter: + self.lit(node.on_enter) + elif (not entering) and node.on_exit: + self.lit(node.on_exit) + + def custom_block(self, node, entering): + self.cr() + if entering and node.on_enter: + self.lit(node.on_enter) + elif (not entering) and node.on_exit: + self.lit(node.on_exit) + self.cr() + + # Helper methods # + + def out(self, s): + self.lit(self.escape(s)) + + def attrs(self, node): + att = [] + if self.options.get('sourcepos'): + pos = node.sourcepos + if pos: + att.append(['data-sourcepos', str(pos[0][0]) + ':' + + str(pos[0][1]) + '-' + str(pos[1][0]) + ':' + + str(pos[1][1])]) + + return att diff --git a/libs/commonmark/render/renderer.py b/libs/commonmark/render/renderer.py new file mode 100644 index 000000000..058f86bb2 --- /dev/null +++ b/libs/commonmark/render/renderer.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + + +class Renderer(object): + def render(self, ast): + """Walks the AST and calls member methods for each Node type. + + @param ast {Node} The root of the abstract syntax tree. + """ + walker = ast.walker() + + self.buf = '' + self.last_out = '\n' + + event = walker.nxt() + while event is not None: + type_ = event['node'].t + if hasattr(self, type_): + getattr(self, type_)(event['node'], event['entering']) + event = walker.nxt() + + return self.buf + + def lit(self, s): + """Concatenate a literal string to the buffer. + + @param str {String} The string to concatenate. + """ + self.buf += s + self.last_out = s + + def cr(self): + if self.last_out != '\n': + self.lit('\n') + + def out(self, s): + """Concatenate a string to the buffer possibly escaping the content. + + Concrete renderer implementations should override this method. + + @param str {String} The string to concatenate. + """ + self.lit(s) diff --git a/libs/commonmark/render/rst.py b/libs/commonmark/render/rst.py new file mode 100644 index 000000000..a18f7b233 --- /dev/null +++ b/libs/commonmark/render/rst.py @@ -0,0 +1,159 @@ +from __future__ import unicode_literals + + +from commonmark.render.renderer import Renderer + + +class ReStructuredTextRenderer(Renderer): + """ + Render reStructuredText from Markdown + + Example: + + .. code:: python + + import commonmark + + parser = commonmark.Parser() + ast = parser.parse('Hello `inline code` example') + + renderer = commonmark.ReStructuredTextRenderer() + rst = renderer.render(ast) + print(rst) # Hello ``inline code`` example + """ + def __init__(self, indent_char=' '): + self.indent_char = indent_char + self.indent_length = 0 + + def lit(self, s): + if s == '\n': + indent = '' # Avoid whitespace if we're just adding a newline + elif self.last_out != '\n': + indent = '' # Don't indent if we're in the middle of a line + else: + indent = self.indent_char * self.indent_length + + return super(ReStructuredTextRenderer, self).lit(indent + s) + + def cr(self): + self.lit('\n') + + def indent_lines(self, literal, indent_length=4): + indent = self.indent_char * indent_length + new_lines = [] + + for line in literal.splitlines(): + new_lines.append(indent + line) + + return '\n'.join(new_lines) + + # Nodes + + def document(self, node, entering): + pass + + def softbreak(self, node, entering): + self.cr() + + def linebreak(self, node, entering): + self.cr() + self.cr() + + def text(self, node, entering): + self.out(node.literal) + + def emph(self, node, entering): + self.out('*') + + def strong(self, node, entering): + self.out('**') + + def paragraph(self, node, entering): + if node.parent.t == 'item': + pass + else: + self.cr() + + def link(self, node, entering): + if entering: + self.out('`') + else: + self.out(' <%s>`_' % node.destination) + + def image(self, node, entering): + directive = '.. image:: ' + node.destination + + if entering: + self.out(directive) + self.cr() + self.indent_length += 4 + self.out(':alt: ') + else: + self.indent_length -= 4 + + def code(self, node, entering): + self.out('``') + self.out(node.literal) + self.out('``') + + def code_block(self, node, entering): + directive = '.. code::' + language_name = None + + info_words = node.info.split() if node.info else [] + if len(info_words) > 0 and len(info_words[0]) > 0: + language_name = info_words[0] + + if language_name: + directive += ' ' + language_name + + self.cr() + self.out(directive) + self.cr() + self.cr() + self.out(self.indent_lines(node.literal)) + self.cr() + + def list(self, node, entering): + if entering: + self.cr() + + def item(self, node, entering): + tagname = '*' if node.list_data['type'] == 'bullet' else '#.' + + if entering: + self.out(tagname + ' ') + else: + self.cr() + + def block_quote(self, node, entering): + if entering: + self.indent_length += 4 + else: + self.indent_length -= 4 + + def heading(self, node, entering): + heading_chars = [ + '#', + '*', + '=', + '-', + '^', + '"' + ] + + try: + heading_char = heading_chars[node.level-1] + except IndexError: + # Default to the last level if we're in too deep + heading_char = heading_chars[-1] + + heading_length = len(node.first_child.literal) + banner = heading_char * heading_length + + if entering: + self.cr() + else: + self.cr() + self.out(banner) + self.cr() diff --git a/libs/commonmark/tests/__init__.py b/libs/commonmark/tests/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/libs/commonmark/tests/__init__.py diff --git a/libs/commonmark/tests/rst_tests.py b/libs/commonmark/tests/rst_tests.py new file mode 100644 index 000000000..b8fa89009 --- /dev/null +++ b/libs/commonmark/tests/rst_tests.py @@ -0,0 +1,172 @@ +import unittest + +import commonmark + + +class TestCommonmark(unittest.TestCase): + def setUp(self): + self.parser = commonmark.Parser() + self.renderer = commonmark.ReStructuredTextRenderer() + + def render_rst(self, test_str): + ast = self.parser.parse(test_str) + rst = self.renderer.render(ast) + + return rst + + def assertEqualRender(self, src_markdown, expected_rst): + rendered_rst = self.render_rst(src_markdown) + self.assertEqual(rendered_rst, expected_rst) + + def test_strong(self): + src_markdown = 'Hello **Strong**' + expected_rst = '\nHello **Strong**\n' + self.assertEqualRender(src_markdown, expected_rst) + + def test_emphasis(self): + src_markdown = 'Hello *Emphasis*' + expected_rst = '\nHello *Emphasis*\n' + self.assertEqualRender(src_markdown, expected_rst) + + def test_paragraph(self): + src_markdown = 'Hello paragraph' + expected_rst = '\nHello paragraph\n' + self.assertEqualRender(src_markdown, expected_rst) + + def test_link(self): + src_markdown = '[Link](http://example.com)' + expected_rst = '\n`Link <http://example.com>`_\n' + self.assertEqualRender(src_markdown, expected_rst) + + def test_image(self): + src_markdown = '![Image](http://placekitten.com/100/100)' + expected_rst = """ +.. image:: http://placekitten.com/100/100 + :alt: Image +""" + self.assertEqualRender(src_markdown, expected_rst) + + def test_code(self): + src_markdown = 'Test `inline code` with backticks' + expected_rst = '\nTest ``inline code`` with backticks\n' + self.assertEqualRender(src_markdown, expected_rst) + + def test_code_block(self): + src_markdown = """ +```python +# code block +print '3 backticks or' +print 'indent 4 spaces' +``` +""" + expected_rst = """ +.. code:: python + + # code block + print '3 backticks or' + print 'indent 4 spaces' +""" + self.assertEqualRender(src_markdown, expected_rst) + + def test_unordered_list(self): + src_markdown = """ +This is a list: +* List item +* List item +* List item +""" + expected_rst = """ +This is a list: + +* List item +* List item +* List item +""" + self.assertEqualRender(src_markdown, expected_rst) + + def test_ordered_list(self): + src_markdown = """ +This is a ordered list: +1. One +2. Two +3. Three +""" + expected_rst = """ +This is a ordered list: + +#. One +#. Two +#. Three +""" + self.assertEqualRender(src_markdown, expected_rst) + + def test_block_quote(self): + src_markdown = """ +Before the blockquote: + +> The blockquote + +After the blockquote +""" + expected_rst = """ +Before the blockquote: + + The blockquote + +After the blockquote +""" + self.assertEqualRender(src_markdown, expected_rst) + + def test_heading(self): + src_markdown = ''' +# Heading 1 + +## Heading 2 + +### Heading 3 + +#### Heading 4 + +##### Heading 5 + +###### Heading 6 +''' + expected_rst = ''' +Heading 1 +######### + +Heading 2 +********* + +Heading 3 +========= + +Heading 4 +--------- + +Heading 5 +^^^^^^^^^ + +Heading 6 +""""""""" +''' + self.assertEqualRender(src_markdown, expected_rst) + + def test_multiple_paragraphs(self): + src_markdown = ''' +Start of first paragraph that +continues on a new line + +This is the second paragraph +''' + expected_rst = ''' +Start of first paragraph that +continues on a new line + +This is the second paragraph +''' + self.assertEqualRender(src_markdown, expected_rst) + + +if __name__ == '__main__': + unittest.main() diff --git a/libs/commonmark/tests/run_spec_tests.py b/libs/commonmark/tests/run_spec_tests.py new file mode 100644 index 000000000..623f562b6 --- /dev/null +++ b/libs/commonmark/tests/run_spec_tests.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import division, print_function, unicode_literals +import re +import timeit +import codecs +import argparse +import sys +from builtins import str +from commonmark.render.html import HtmlRenderer +from commonmark.main import Parser, dumpAST + + +class colors(object): + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + + +def trace_calls(frame, event, arg): + co = frame.f_code + func_name = co.co_name + if func_name == "write": + return + line_no = frame.f_lineno + filename = co.co_filename + if event == "call" and not re.match("__", func_name) and \ + re.search("CommonMark.py", filename) \ + and func_name != "dumpAST": + print("-> " + frame.f_back.f_code.co_name + + " at " + str(frame.f_back.f_lineno) + + " called " + func_name + " at " + str(line_no) + + " in " + filename) + return trace_calls + + +def main(): + parser = argparse.ArgumentParser( + description="script to run the CommonMark specification tests " + + "against the CommonMark.py parser") + parser.add_argument( + '-t', + help="Single test to run or comma separated list " + + "of tests (-t 10 or -t 10,11,12,13)") + parser.add_argument( + '-p', + action="store_true", + help="Print passed test information") + parser.add_argument( + '-f', + action="store_true", + help="Print failed tests (during -np...)") + parser.add_argument( + '-i', + action="store_true", + help="Interactive Markdown input mode") + parser.add_argument( + '-d', + action="store_true", + help="Debug, trace calls") + parser.add_argument( + '-np', + action="store_true", + help="Only print section header, tick, or cross") + parser.add_argument( + '-s', + action="store_true", + help="Print percent of tests passed by category") + args = parser.parse_args() + + if args.d: + sys.settrace(trace_calls) + + renderer = HtmlRenderer() + parser = Parser() + + f = codecs.open("spec.txt", encoding="utf-8") + datalist = [] + for line in f: + datalist.append(line) + data = "".join(datalist) + passed = 0 + failed = 0 + catStats = {} + examples = [] + example_number = 0 + current_section = "" + tabChar = '\u2192' + spaceChar = '\u2423' + nbspChar = '\u00A0' + + def showSpaces(t): + t = re.sub("\\t", tabChar, t) + t = re.sub(" ", spaceChar, t) + t = re.sub(nbspChar, spaceChar, t) + return t + + t = re.sub("\r\n", "\n", data) + + tests = re.sub( + re.compile("^<!-- END TESTS -->(.|[\n])*", flags=re.M), '', t) + testMatch = re.findall( + re.compile( + r'^`{32} example\n' + r'([\s\S]*?)^\.\n([\s\S]*?)' + r'^`{32}$' + r'|^#{1,6} *(.*)$', + re.M), + tests) + + for match in testMatch: + if not match[2] == "": + current_section = match[2] + else: + example_number += 1 + examples.append({ + 'markdown': match[0], + 'html': match[1], + 'section': current_section, + 'number': example_number}) + + current_section = "" + + startTime = timeit.default_timer() + + if args.i: + print( + colors.OKGREEN + + "(To end input of Markdown block enter 'end' on " + + "it's own line, to quit enter 'quit')" + + colors.ENDC) + while True: + s = "" + while True: + if sys.version_info >= (3, 0): + inp = input(colors.OKBLUE + 'Markdown: ' + colors.ENDC) + else: + inp = raw_input(colors.OKBLUE + 'Markdown: ' + colors.ENDC) # noqa + + if not inp == "end" and inp != "quit": + s += inp + "\n" + elif inp == "end": + s = s[:-1] + break + elif inp == "quit": + print(colors.HEADER+"bye!"+colors.ENDC) + exit(0) + ast = parser.parse(s) + html = renderer.render(ast) + print(colors.WARNING+"="*10+"AST====="+colors.ENDC) + dumpAST(ast) + print(colors.WARNING+"="*10+"HTML===="+colors.ENDC) + print(html) + + # some tests? + if args.t: + tests = args.t.split(",") + choice_examples = [] + for t in tests: + if not t == "" and len(examples) > int(t): + choice_examples.append(examples[int(t)-1]) + examples = choice_examples + + # all tests + + for i, example in enumerate(examples): # [0,examples[0]] + if not example['section'] == "" and \ + not current_section == example['section']: + print('\n' + colors.HEADER + '[' + example['section'] + ']' + + colors.ENDC + ' ', end='') + current_section = example['section'] + catStats.update({current_section: [0, 0, 0]}) + + catStats[current_section][2] += 1 + if args.d: + print(colors.HEADER+"[Parsing]"+colors.ENDC) + ast = parser.parse(re.sub(tabChar, "\t", example['markdown'])) + if args.d: + print(colors.HEADER+"[Rendering]"+colors.ENDC) + actual = renderer.render(ast) + if re.sub('\t', tabChar, actual) == example['html']: + passed += 1 + catStats[current_section][0] += 1 + if not args.f: + print(colors.OKGREEN + '✓' + colors.ENDC, end='') + if args.d: + dumpAST(ast) + if args.p or args.d and not args.np: + print( + colors.OKBLUE + + "=== markdown ===============\n" + + colors.ENDC + showSpaces(example['markdown']) + + colors.OKBLUE + + "\n=== expected ===============\n" + + colors.ENDC + showSpaces(example['html']) + + colors.OKBLUE + + "\n=== got ====================\n" + + colors.ENDC + showSpaces(actual)) + else: + failed += 1 + catStats[current_section][1] += 1 + if args.t: + print("Test #" + str(args.t.split(",")[i]), end='') + else: + print("Test #" + str(i+1), end='') + print(' ' + colors.FAIL + "✗" + colors.ENDC) + if args.d: + dumpAST(ast) + if not args.np or args.f: + print( + colors.WARNING + + "=== markdown ===============\n" + + colors.ENDC + showSpaces(example['markdown']) + + colors.WARNING + + "\n=== expected ===============\n" + + colors.ENDC + showSpaces(example['html']) + + colors.WARNING + + "\n=== got ====================\n" + + colors.ENDC + showSpaces(actual)) + + print('\n' + str(passed) + ' tests passed, ' + str(failed) + ' failed') + + endTime = timeit.default_timer() + runTime = endTime - startTime + + if args.s: + for i in catStats.keys(): + per = catStats[i][0]/catStats[i][2] + print(colors.HEADER + "[" + i + "]" + colors.ENDC + + "\t" + str(per*100) + "% Passed") + + print("runtime: " + str(runTime) + "s") + + if (failed > 0): + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/libs/commonmark/tests/unit_tests.py b/libs/commonmark/tests/unit_tests.py new file mode 100644 index 000000000..e9efef9ba --- /dev/null +++ b/libs/commonmark/tests/unit_tests.py @@ -0,0 +1,157 @@ +from __future__ import unicode_literals + +import unittest + +try: + from hypothesis import given, example +except ImportError: + # Mock out hypothesis stuff for python 2.6 + def given(a): + def func(b): + return + return func + + example = given + +try: + from hypothesis.strategies import text +except ImportError: + def text(): + pass + + +import commonmark +from commonmark.blocks import Parser +from commonmark.render.html import HtmlRenderer +from commonmark.inlines import InlineParser +from commonmark.node import NodeWalker, Node + + +class TestCommonmark(unittest.TestCase): + def test_output(self): + s = commonmark.commonmark('*hello!*') + self.assertEqual(s, '<p><em>hello!</em></p>\n') + + def test_unicode(self): + s = commonmark.commonmark('<div>\u2020</div>\n') + self.assertEqual(s, '<div>\u2020</div>\n', + 'Unicode works in an HTML block.') + commonmark.commonmark('* unicode: \u2020') + commonmark.commonmark('# unicode: \u2020') + commonmark.commonmark('```\n# unicode: \u2020\n```') + + def test_null_string_bug(self): + s = commonmark.commonmark('> sometext\n>\n\n') + self.assertEqual( + s, + '<blockquote>\n<pre><code>sometext\n</code></pre>' + '\n</blockquote>\n') + + def test_normalize_contracts_text_nodes(self): + md = '_a' + ast = Parser().parse(md) + + def assert_text_literals(text_literals): + walker = ast.walker() + document, _ = walker.next() + self.assertEqual(document.t, 'document') + paragraph, _ = walker.next() + self.assertEqual(paragraph.t, 'paragraph') + for literal in text_literals: + text, _ = walker.next() + self.assertEqual(text.t, 'text') + self.assertEqual(text.literal, literal) + paragraph, _ = walker.next() + self.assertEqual(paragraph.t, 'paragraph') + + assert_text_literals(['_', 'a']) + ast.normalize() + # assert text nodes are contracted + assert_text_literals(['_a']) + ast.normalize() + # assert normalize() doesn't alter a normalized ast + assert_text_literals(['_a']) + + def test_dumpAST_orderedlist(self): + md = '1.' + ast = Parser().parse(md) + commonmark.dumpAST(ast) + + @given(text()) + def test_random_text(self, s): + commonmark.commonmark(s) + + def test_smart_dashes(self): + md = 'a - b -- c --- d ---- e ----- f' + EM = '\u2014' + EN = '\u2013' + expected_html = ( + '<p>' + + 'a - ' + + 'b ' + EN + ' ' + + 'c ' + EM + ' ' + + 'd ' + EN + EN + ' ' + + 'e ' + EM + EN + ' ' + + 'f</p>\n') + parser = commonmark.Parser(options=dict(smart=True)) + ast = parser.parse(md) + renderer = commonmark.HtmlRenderer() + html = renderer.render(ast) + self.assertEqual(html, expected_html) + + def test_regex_vulnerability_link_label(self): + i = 200 + while i <= 2000: + s = commonmark.commonmark('[' + ('\\' * i) + '\n') + self.assertEqual(s, '<p>' + '[' + ('\\' * (i // 2)) + '</p>\n', + '[\\\\... %d deep' % (i,)) + i *= 10 + + def test_regex_vulnerability_link_destination(self): + i = 200 + while i <= 2000: + s = commonmark.commonmark(('[](' * i) + '\n') + self.assertEqual(s, '<p>' + ('[](' * i) + '</p>\n', + '[]( %d deep' % (i,)) + i *= 10 + + +class TestHtmlRenderer(unittest.TestCase): + def test_init(self): + HtmlRenderer() + + +class TestInlineParser(unittest.TestCase): + def test_init(self): + InlineParser() + + +class TestNode(unittest.TestCase): + def test_doc_node(self): + Node('document', [[1, 1], [0, 0]]) + + +class TestNodeWalker(unittest.TestCase): + def test_node_walker(self): + node = Node('document', [[1, 1], [0, 0]]) + NodeWalker(node) + + def test_node_walker_iter(self): + node = Node('document', [[1, 1], [0, 0]]) + for subnode, entered in node.walker(): + pass + + +class TestParser(unittest.TestCase): + def setUp(self): + self.parser = Parser() + + @given(text()) + @example('') + @example('* unicode: \u2020') + def test_text(self, s): + self.parser.parse(s) + + +if __name__ == '__main__': + unittest.main() |