diff options
author | Louis Vézina <[email protected]> | 2019-09-13 15:14:31 -0400 |
---|---|---|
committer | Louis Vézina <[email protected]> | 2019-09-13 15:14:31 -0400 |
commit | c5fa0f56e41ee03fd7b02f6474cbb4fd1bdf08e6 (patch) | |
tree | ed41edcfd2c977a989216a69fb5887d70ebb1098 /libs/bs4 | |
parent | 645952c61aba3cccb5ca919be966a7ba02d853fa (diff) | |
download | bazarr-c5fa0f56e41ee03fd7b02f6474cbb4fd1bdf08e6.tar.gz bazarr-c5fa0f56e41ee03fd7b02f6474cbb4fd1bdf08e6.zip |
WIP
Diffstat (limited to 'libs/bs4')
-rw-r--r-- | libs/bs4/formatter.py | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/libs/bs4/formatter.py b/libs/bs4/formatter.py new file mode 100644 index 000000000..7dbaa3850 --- /dev/null +++ b/libs/bs4/formatter.py @@ -0,0 +1,99 @@ +from bs4.dammit import EntitySubstitution + +class Formatter(EntitySubstitution): + """Describes a strategy to use when outputting a parse tree to a string. + + Some parts of this strategy come from the distinction between + HTML4, HTML5, and XML. Others are configurable by the user. + """ + # Registries of XML and HTML formatters. + XML_FORMATTERS = {} + HTML_FORMATTERS = {} + + HTML = 'html' + XML = 'xml' + + HTML_DEFAULTS = dict( + cdata_containing_tags=set(["script", "style"]), + ) + + def _default(self, language, value, kwarg): + if value is not None: + return value + if language == self.XML: + return set() + return self.HTML_DEFAULTS[kwarg] + + def __init__( + self, language=None, entity_substitution=None, + void_element_close_prefix='/', cdata_containing_tags=None, + ): + """ + + :param void_element_close_prefix: By default, represent void + elements as <tag/> rather than <tag> + """ + self.language = language + self.entity_substitution = entity_substitution + self.void_element_close_prefix = void_element_close_prefix + self.cdata_containing_tags = self._default( + language, cdata_containing_tags, 'cdata_containing_tags' + ) + + def substitute(self, ns): + """Process a string that needs to undergo entity substitution.""" + if not self.entity_substitution: + return ns + from .element import NavigableString + if (isinstance(ns, NavigableString) + and ns.parent is not None + and ns.parent.name in self.cdata_containing_tags): + # Do nothing. + return ns + # Substitute. + return self.entity_substitution(ns) + + def attribute_value(self, value): + """Process the value of an attribute.""" + return self.substitute(value) + + def attributes(self, tag): + """Reorder a tag's attributes however you want.""" + return sorted(tag.attrs.items()) + + +class HTMLFormatter(Formatter): + REGISTRY = {} + def __init__(self, *args, **kwargs): + return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) + + +class XMLFormatter(Formatter): + REGISTRY = {} + def __init__(self, *args, **kwargs): + return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) + + +# Set up aliases for the default formatters. +HTMLFormatter.REGISTRY['html'] = HTMLFormatter( + entity_substitution=EntitySubstitution.substitute_html +) +HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( + entity_substitution=EntitySubstitution.substitute_html, + void_element_close_prefix = None +) +HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( + entity_substitution=EntitySubstitution.substitute_xml +) +HTMLFormatter.REGISTRY[None] = HTMLFormatter( + entity_substitution=None +) +XMLFormatter.REGISTRY["html"] = XMLFormatter( + entity_substitution=EntitySubstitution.substitute_html +) +XMLFormatter.REGISTRY["minimal"] = XMLFormatter( + entity_substitution=EntitySubstitution.substitute_xml +) +XMLFormatter.REGISTRY[None] = Formatter( + Formatter(Formatter.XML, entity_substitution=None) +) |