diff options
62 files changed, 962 insertions, 19046 deletions
@@ -2,6 +2,7 @@ from __future__ import absolute_import from __future__ import print_function +from six import PY3 import subprocess as sp import time import os @@ -43,7 +44,10 @@ def start_bazarr(): print("Bazarr starting...") try: for line in iter(ep.stdout.readline, ''): - sys.stdout.buffer.write(line) + if PY3: + sys.stdout.buffer.write(line) + else: + sys.stdout.write(line) except KeyboardInterrupt: pass diff --git a/libs/apprise/config/ConfigBase.py b/libs/apprise/config/ConfigBase.py index d5acc4af1..10d05a37a 100644 --- a/libs/apprise/config/ConfigBase.py +++ b/libs/apprise/config/ConfigBase.py @@ -26,7 +26,14 @@ import os import re import six -import yaml + +try: + import yaml +except ImportError: + if six.PY3: + import yaml3 as yaml + else: + import yaml2 as yaml from .. import plugins from ..AppriseAsset import AppriseAsset diff --git a/libs/bs4/__init__.py b/libs/bs4/__init__.py index 95ca229c1..7a80452f7 100644 --- a/libs/bs4/__init__.py +++ b/libs/bs4/__init__.py @@ -17,17 +17,18 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + __author__ = "Leonard Richardson ([email protected])" -__version__ = "4.8.0" -__copyright__ = "Copyright (c) 2004-2019 Leonard Richardson" -# Use of this source code is governed by the MIT license. +__version__ = "4.6.0" +__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson" __license__ = "MIT" __all__ = ['BeautifulSoup'] import os import re -import sys import traceback import warnings @@ -49,7 +50,7 @@ from .element import ( # The very first thing we do is give a useful error if someone is # running this code under Python 3 without converting it. -'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' +'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' class BeautifulSoup(Tag): """ @@ -73,7 +74,7 @@ class BeautifulSoup(Tag): like HTML's <br> tag), call handle_starttag and then handle_endtag. """ - ROOT_TAG_NAME = '[document]' + ROOT_TAG_NAME = u'[document]' # If the end-user gives no indication which tree builder they # want, look for one with these features. @@ -81,56 +82,16 @@ class BeautifulSoup(Tag): ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n" def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, **kwargs): - """Constructor. - - :param markup: A string or a file-like object representing - markup to be parsed. - - :param features: Desirable features of the parser to be used. This - may be the name of a specific parser ("lxml", "lxml-xml", - "html.parser", or "html5lib") or it may be the type of markup - to be used ("html", "html5", "xml"). It's recommended that you - name a specific parser, so that Beautiful Soup gives you the - same results across platforms and virtual environments. - - :param builder: A TreeBuilder subclass to instantiate (or - instance to use) instead of looking one up based on - `features`. You only need to use this if you've implemented a - custom TreeBuilder. - - :param parse_only: A SoupStrainer. Only parts of the document - matching the SoupStrainer will be considered. This is useful - when parsing part of a document that would otherwise be too - large to fit into memory. - - :param from_encoding: A string indicating the encoding of the - document to be parsed. Pass this in if Beautiful Soup is - guessing wrongly about the document's encoding. - - :param exclude_encodings: A list of strings indicating - encodings known to be wrong. Pass this in if you don't know - the document's encoding but you know Beautiful Soup's guess is - wrong. - - :param kwargs: For backwards compatibility purposes, the - constructor accepts certain keyword arguments used in - Beautiful Soup 3. None of these arguments do anything in - Beautiful Soup 4; they will result in a warning and then be ignored. - - Apart from this, any keyword arguments passed into the BeautifulSoup - constructor are propagated to the TreeBuilder constructor. This - makes it possible to configure a TreeBuilder beyond saying - which one to use. - - """ + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser.""" if 'convertEntities' in kwargs: - del kwargs['convertEntities'] warnings.warn( "BS4 does not respect the convertEntities argument to the " "BeautifulSoup constructor. Entities are always converted " @@ -181,22 +142,18 @@ class BeautifulSoup(Tag): from_encoding = from_encoding or deprecated_argument( "fromEncoding", "from_encoding") - if from_encoding and isinstance(markup, str): + if from_encoding and isinstance(markup, unicode): warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") from_encoding = None - # We need this information to track whether or not the builder - # was specified well enough that we can omit the 'you need to - # specify a parser' warning. - original_builder = builder - original_features = features - - if isinstance(builder, type): - # A builder class was passed in; it needs to be instantiated. - builder_class = builder - builder = None - elif builder is None: - if isinstance(features, str): + if len(kwargs) > 0: + arg = kwargs.keys().pop() + raise TypeError( + "__init__() got an unexpected keyword argument '%s'" % arg) + + if builder is None: + original_features = features + if isinstance(features, basestring): features = [features] if features is None or len(features) == 0: features = self.DEFAULT_BUILDER_FEATURES @@ -206,73 +163,41 @@ class BeautifulSoup(Tag): "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) - - # At this point either we have a TreeBuilder instance in - # builder, or we have a builder_class that we can instantiate - # with the remaining **kwargs. - if builder is None: - builder = builder_class(**kwargs) - if not original_builder and not ( - original_features == builder.NAME or - original_features in builder.ALTERNATE_NAMES - ): + builder = builder_class() + if not (original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES): if builder.is_xml: markup_type = "XML" else: markup_type = "HTML" - # This code adapted from warnings.py so that we get the same line - # of code as our warnings.warn() call gets, even if the answer is wrong - # (as it may be in a multithreading situation). - caller = None - try: - caller = sys._getframe(1) - except ValueError: - pass - if caller: - globals = caller.f_globals - line_number = caller.f_lineno - else: - globals = sys.__dict__ - line_number= 1 - filename = globals.get('__file__') - if filename: - fnl = filename.lower() - if fnl.endswith((".pyc", ".pyo")): - filename = filename[:-1] - if filename: - # If there is no filename at all, the user is most likely in a REPL, - # and the warning is not necessary. - values = dict( - filename=filename, - line_number=line_number, - parser=builder.NAME, - markup_type=markup_type - ) - warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2) - else: - if kwargs: - warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.") - + caller = traceback.extract_stack()[0] + filename = caller[0] + line_number = caller[1] + warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( + filename=filename, + line_number=line_number, + parser=builder.NAME, + markup_type=markup_type)) + self.builder = builder self.is_xml = builder.is_xml self.known_xml = self.is_xml - self._namespaces = dict() - self.parse_only = parse_only + self.builder.soup = self - self.builder.initialize_soup(self) + self.parse_only = parse_only if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() elif len(markup) <= 256 and ( (isinstance(markup, bytes) and not b'<' in markup) - or (isinstance(markup, str) and not '<' in markup) + or (isinstance(markup, unicode) and not u'<' in markup) ): # Print out warnings for a couple beginner problems # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, # just in case that's what the user really wants. - if (isinstance(markup, str) + if (isinstance(markup, unicode) and not os.path.supports_unicode_filenames): possible_filename = markup.encode("utf8") else: @@ -280,13 +205,13 @@ class BeautifulSoup(Tag): is_file = False try: is_file = os.path.exists(possible_filename) - except Exception as e: + except Exception, e: # This is almost certainly a problem involving # characters not valid in filenames on this # system. Just let it go. pass if is_file: - if isinstance(markup, str): + if isinstance(markup, unicode): markup = markup.encode("utf8") warnings.warn( '"%s" looks like a filename, not markup. You should' @@ -338,9 +263,9 @@ class BeautifulSoup(Tag): if isinstance(markup, bytes): space = b' ' cant_start_with = (b"http:", b"https:") - elif isinstance(markup, str): - space = ' ' - cant_start_with = ("http:", "https:") + elif isinstance(markup, unicode): + space = u' ' + cant_start_with = (u"http:", u"https:") else: return @@ -377,10 +302,9 @@ class BeautifulSoup(Tag): self.preserve_whitespace_tag_stack = [] self.pushTag(self) - def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, **kwattrs): + def new_tag(self, name, namespace=None, nsprefix=None, **attrs): """Create a new tag associated with this soup.""" - kwattrs.update(attrs) - return Tag(None, self.builder, name, namespace, nsprefix, kwattrs) + return Tag(None, self.builder, name, namespace, nsprefix, attrs) def new_string(self, s, subclass=NavigableString): """Create a new NavigableString associated with this soup.""" @@ -403,7 +327,7 @@ class BeautifulSoup(Tag): def pushTag(self, tag): #print "Push", tag.name - if self.currentTag is not None: + if self.currentTag: self.currentTag.contents.append(tag) self.tagStack.append(tag) self.currentTag = self.tagStack[-1] @@ -412,7 +336,7 @@ class BeautifulSoup(Tag): def endData(self, containerClass=NavigableString): if self.current_data: - current_data = ''.join(self.current_data) + current_data = u''.join(self.current_data) # If whitespace is not preserved, and this string contains # nothing but ASCII spaces, replace it with a single space # or newline. @@ -442,71 +366,60 @@ class BeautifulSoup(Tag): def object_was_parsed(self, o, parent=None, most_recent_element=None): """Add an object to the parse tree.""" - if parent is None: - parent = self.currentTag - if most_recent_element is not None: - previous_element = most_recent_element - else: - previous_element = self._most_recent_element + parent = parent or self.currentTag + previous_element = most_recent_element or self._most_recent_element next_element = previous_sibling = next_sibling = None if isinstance(o, Tag): next_element = o.next_element next_sibling = o.next_sibling previous_sibling = o.previous_sibling - if previous_element is None: + if not previous_element: previous_element = o.previous_element - fix = parent.next_element is not None - o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) self._most_recent_element = o parent.contents.append(o) - # Check if we are inserting into an already parsed node. - if fix: - self._linkage_fixer(parent) - - def _linkage_fixer(self, el): - """Make sure linkage of this fragment is sound.""" - - first = el.contents[0] - child = el.contents[-1] - descendant = child - - if child is first and el.parent is not None: - # Parent should be linked to first child - el.next_element = child - # We are no longer linked to whatever this element is - prev_el = child.previous_element - if prev_el is not None and prev_el is not el: - prev_el.next_element = None - # First child should be linked to the parent, and no previous siblings. - child.previous_element = el - child.previous_sibling = None - - # We have no sibling as we've been appended as the last. - child.next_sibling = None - - # This index is a tag, dig deeper for a "last descendant" - if isinstance(child, Tag) and child.contents: - descendant = child._last_descendant(False) - - # As the final step, link last descendant. It should be linked - # to the parent's next sibling (if found), else walk up the chain - # and find a parent with a sibling. It should have no next sibling. - descendant.next_element = None - descendant.next_sibling = None - target = el - while True: - if target is None: - break - elif target.next_sibling is not None: - descendant.next_element = target.next_sibling - target.next_sibling.previous_element = child - break - target = target.parent + if parent.next_sibling: + # This node is being inserted into an element that has + # already been parsed. Deal with any dangling references. + index = len(parent.contents)-1 + while index >= 0: + if parent.contents[index] is o: + break + index -= 1 + else: + raise ValueError( + "Error building tree: supposedly %r was inserted " + "into %r after the fact, but I don't see it!" % ( + o, parent + ) + ) + if index == 0: + previous_element = parent + previous_sibling = None + else: + previous_element = previous_sibling = parent.contents[index-1] + if index == len(parent.contents)-1: + next_element = parent.next_sibling + next_sibling = None + else: + next_element = next_sibling = parent.contents[index+1] + + o.previous_element = previous_element + if previous_element: + previous_element.next_element = o + o.next_element = next_element + if next_element: + next_element.previous_element = o + o.next_sibling = next_sibling + if next_sibling: + next_sibling.previous_sibling = o + o.previous_sibling = previous_sibling + if previous_sibling: + previous_sibling.next_sibling = o def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent @@ -552,7 +465,7 @@ class BeautifulSoup(Tag): self.currentTag, self._most_recent_element) if tag is None: return tag - if self._most_recent_element is not None: + if self._most_recent_element: self._most_recent_element.next_element = tag self._most_recent_element = tag self.pushTag(tag) @@ -577,9 +490,9 @@ class BeautifulSoup(Tag): encoding_part = '' if eventual_encoding != None: encoding_part = ' encoding="%s"' % eventual_encoding - prefix = '<?xml version="1.0"%s?>\n' % encoding_part + prefix = u'<?xml version="1.0"%s?>\n' % encoding_part else: - prefix = '' + prefix = u'' if not pretty_print: indent_level = None else: @@ -613,4 +526,4 @@ class FeatureNotFound(ValueError): if __name__ == '__main__': import sys soup = BeautifulSoup(sys.stdin) - print(soup.prettify()) + print soup.prettify() diff --git a/libs/bs4/builder/__init__.py b/libs/bs4/builder/__init__.py index cc497cf0b..fdb3362fc 100644 --- a/libs/bs4/builder/__init__.py +++ b/libs/bs4/builder/__init__.py @@ -1,5 +1,5 @@ -# Use of this source code is governed by the MIT license. -__license__ = "MIT" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. from collections import defaultdict import itertools @@ -7,7 +7,8 @@ import sys from bs4.element import ( CharsetMetaAttributeValue, ContentMetaAttributeValue, - nonwhitespace_re + HTMLAwareEntitySubstitution, + whitespace_re ) __all__ = [ @@ -89,46 +90,18 @@ class TreeBuilder(object): is_xml = False picklable = False + preserve_whitespace_tags = set() empty_element_tags = None # A tag will be considered an empty-element # tag when and only when it has no contents. - + # A value for these tag/attribute combinations is a space- or # comma-separated list of CDATA, rather than a single CDATA. - DEFAULT_CDATA_LIST_ATTRIBUTES = {} - - DEFAULT_PRESERVE_WHITESPACE_TAGS = set() - - USE_DEFAULT = object() - - def __init__(self, multi_valued_attributes=USE_DEFAULT, preserve_whitespace_tags=USE_DEFAULT): - """Constructor. - - :param multi_valued_attributes: If this is set to None, the - TreeBuilder will not turn any values for attributes like - 'class' into lists. Setting this do a dictionary will - customize this behavior; look at DEFAULT_CDATA_LIST_ATTRIBUTES - for an example. - - Internally, these are called "CDATA list attributes", but that - probably doesn't make sense to an end-user, so the argument name - is `multi_valued_attributes`. - - :param preserve_whitespace_tags: - """ + cdata_list_attributes = {} + + + def __init__(self): self.soup = None - if multi_valued_attributes is self.USE_DEFAULT: - multi_valued_attributes = self.DEFAULT_CDATA_LIST_ATTRIBUTES - self.cdata_list_attributes = multi_valued_attributes - if preserve_whitespace_tags is self.USE_DEFAULT: - preserve_whitespace_tags = self.DEFAULT_PRESERVE_WHITESPACE_TAGS - self.preserve_whitespace_tags = preserve_whitespace_tags - - def initialize_soup(self, soup): - """The BeautifulSoup object has been initialized and is now - being associated with the TreeBuilder. - """ - self.soup = soup - + def reset(self): pass @@ -152,7 +125,7 @@ class TreeBuilder(object): if self.empty_element_tags is None: return True return tag_name in self.empty_element_tags - + def feed(self, markup): raise NotImplementedError() @@ -187,14 +160,14 @@ class TreeBuilder(object): universal = self.cdata_list_attributes.get('*', []) tag_specific = self.cdata_list_attributes.get( tag_name.lower(), None) - for attr in list(attrs.keys()): + for attr in attrs.keys(): if attr in universal or (tag_specific and attr in tag_specific): # We have a "class"-type attribute whose string # value is a whitespace-separated list of # values. Split it into a list. value = attrs[attr] - if isinstance(value, str): - values = nonwhitespace_re.findall(value) + if isinstance(value, basestring): + values = whitespace_re.split(value) else: # html5lib sometimes calls setAttributes twice # for the same tag when rearranging the parse @@ -258,20 +231,15 @@ class HTMLTreeBuilder(TreeBuilder): Such as which tags are empty-element tags. """ + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags empty_element_tags = set([ # These are from HTML5. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', - - # These are from earlier versions of HTML and are removed in HTML5. - 'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer' + + # These are from HTML4, removed in HTML5. + 'spacer', 'frame' ]) - # The HTML standard defines these as block-level elements. Beautiful - # Soup does not treat these elements differently from other elements, - # but it may do so eventually, and this information is available if - # you need to use it. - block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) - # The HTML standard defines these attributes as containing a # space-separated list of values, not a single value. That is, # class="foo bar" means that the 'class' attribute has two values, @@ -279,7 +247,7 @@ class HTMLTreeBuilder(TreeBuilder): # encounter one of these attributes, we will parse its value into # a list of values if possible. Upon output, the list will be # converted back into a string. - DEFAULT_CDATA_LIST_ATTRIBUTES = { + cdata_list_attributes = { "*" : ['class', 'accesskey', 'dropzone'], "a" : ['rel', 'rev'], "link" : ['rel', 'rev'], @@ -296,8 +264,6 @@ class HTMLTreeBuilder(TreeBuilder): "output" : ["for"], } - DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) - def set_up_substitutions(self, tag): # We are only interested in <meta> tags if tag.name != 'meta': diff --git a/libs/bs4/builder/_html5lib.py b/libs/bs4/builder/_html5lib.py index 090bb61a8..5f5489358 100644 --- a/libs/bs4/builder/_html5lib.py +++ b/libs/bs4/builder/_html5lib.py @@ -1,5 +1,5 @@ -# Use of this source code is governed by the MIT license. -__license__ = "MIT" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __all__ = [ 'HTML5TreeBuilder', @@ -15,7 +15,7 @@ from bs4.builder import ( ) from bs4.element import ( NamespacedAttribute, - nonwhitespace_re, + whitespace_re, ) import html5lib from html5lib.constants import ( @@ -33,7 +33,7 @@ try: # Pre-0.99999999 from html5lib.treebuilders import _base as treebuilder_base new_html5lib = False -except ImportError as e: +except ImportError, e: # 0.99999999 and up from html5lib.treebuilders import base as treebuilder_base new_html5lib = True @@ -64,7 +64,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): parser = html5lib.HTMLParser(tree=self.create_treebuilder) extra_kwargs = dict() - if not isinstance(markup, str): + if not isinstance(markup, unicode): if new_html5lib: extra_kwargs['override_encoding'] = self.user_specified_encoding else: @@ -72,13 +72,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder): doc = parser.parse(markup, **extra_kwargs) # Set the character encoding detected by the tokenizer. - if isinstance(markup, str): + if isinstance(markup, unicode): # We need to special-case this because html5lib sets # charEncoding to UTF-8 if it gets Unicode input. doc.original_encoding = None else: original_encoding = parser.tokenizer.stream.charEncoding[0] - if not isinstance(original_encoding, str): + if not isinstance(original_encoding, basestring): # In 0.99999999 and up, the encoding is an html5lib # Encoding object. We want to use a string for compatibility # with other tree builders. @@ -92,7 +92,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" - return '<html><head></head><body>%s</body></html>' % fragment + return u'<html><head></head><body>%s</body></html>' % fragment class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): @@ -174,7 +174,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): rv.append("|%s<%s>" % (' ' * indent, name)) if element.attrs: attributes = [] - for name, value in list(element.attrs.items()): + for name, value in element.attrs.items(): if isinstance(name, NamespacedAttribute): name = "%s %s" % (prefixes[name.namespace], name.name) if isinstance(value, list): @@ -199,14 +199,14 @@ class AttrList(object): def __setitem__(self, name, value): # If this attribute is a multi-valued attribute for this element, # turn its value into a list. - list_attr = self.element.cdata_list_attributes + list_attr = HTML5TreeBuilder.cdata_list_attributes if (name in list_attr['*'] or (self.element.name in list_attr and name in list_attr[self.element.name])): # A node that is being cloned may have already undergone # this procedure. if not isinstance(value, list): - value = nonwhitespace_re.findall(value) + value = whitespace_re.split(value) self.element[name] = value def items(self): return list(self.attrs.items()) @@ -229,7 +229,7 @@ class Element(treebuilder_base.Node): def appendChild(self, node): string_child = child = None - if isinstance(node, str): + if isinstance(node, basestring): # Some other piece of code decided to pass in a string # instead of creating a TextElement object to contain the # string. @@ -246,10 +246,10 @@ class Element(treebuilder_base.Node): child = node.element node.parent = self - if not isinstance(child, str) and child.parent is not None: + if not isinstance(child, basestring) and child.parent is not None: node.element.extract() - if (string_child is not None and self.element.contents + if (string_child and self.element.contents and self.element.contents[-1].__class__ == NavigableString): # We are appending a string onto another string. # TODO This has O(n^2) performance, for input like @@ -259,7 +259,7 @@ class Element(treebuilder_base.Node): old_element.replace_with(new_element) self.soup._most_recent_element = new_element else: - if isinstance(node, str): + if isinstance(node, basestring): # Create a brand new NavigableString from this string. child = self.soup.new_string(node) @@ -299,7 +299,7 @@ class Element(treebuilder_base.Node): self.soup.builder._replace_cdata_list_attribute_values( self.name, attributes) - for name, value in list(attributes.items()): + for name, value in attributes.items(): self.element[name] = value # The attributes may contain variables that need substitution. @@ -360,16 +360,16 @@ class Element(treebuilder_base.Node): # Set the first child's previous_element and previous_sibling # to elements within the new parent first_child = to_append[0] - if new_parents_last_descendant is not None: + if new_parents_last_descendant: first_child.previous_element = new_parents_last_descendant else: first_child.previous_element = new_parent_element first_child.previous_sibling = new_parents_last_child - if new_parents_last_descendant is not None: + if new_parents_last_descendant: new_parents_last_descendant.next_element = first_child else: new_parent_element.next_element = first_child - if new_parents_last_child is not None: + if new_parents_last_child: new_parents_last_child.next_sibling = first_child # Find the very last element being moved. It is now the @@ -379,7 +379,7 @@ class Element(treebuilder_base.Node): last_childs_last_descendant = to_append[-1]._last_descendant(False, True) last_childs_last_descendant.next_element = new_parents_last_descendant_next_element - if new_parents_last_descendant_next_element is not None: + if new_parents_last_descendant_next_element: # TODO: This code has no test coverage and I'm not sure # how to get html5lib to go through this path, but it's # just the other side of the previous line. diff --git a/libs/bs4/builder/_htmlparser.py b/libs/bs4/builder/_htmlparser.py index ea549c356..67890b3a3 100644 --- a/libs/bs4/builder/_htmlparser.py +++ b/libs/bs4/builder/_htmlparser.py @@ -1,18 +1,17 @@ -# encoding: utf-8 """Use the HTMLParser library to parse HTML files that aren't too bad.""" -# Use of this source code is governed by the MIT license. -__license__ = "MIT" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __all__ = [ 'HTMLParserTreeBuilder', ] -from html.parser import HTMLParser +from HTMLParser import HTMLParser try: - from html.parser import HTMLParseError -except ImportError as e: + from HTMLParser import HTMLParseError +except ImportError, e: # HTMLParseError is removed in Python 3.5. Since it can never be # thrown in 3.5, we can just define our own class as a placeholder. class HTMLParseError(Exception): @@ -65,18 +64,7 @@ class BeautifulSoupHTMLParser(HTMLParser): # order. It's a list of closing tags we've already handled and # will ignore, assuming they ever show up. self.already_closed_empty_element = [] - - def error(self, msg): - """In Python 3, HTMLParser subclasses must implement error(), although this - requirement doesn't appear to be documented. - - In Python 2, HTMLParser implements error() as raising an exception. - - In any event, this method is called only on very strange markup and our best strategy - is to pretend it didn't happen and keep going. - """ - warnings.warn(msg) - + def handle_startendtag(self, name, attrs): # This is only called when the markup looks like # <tag/>. @@ -141,26 +129,11 @@ class BeautifulSoupHTMLParser(HTMLParser): else: real_name = int(name) - data = None - if real_name < 256: - # HTML numeric entities are supposed to reference Unicode - # code points, but sometimes they reference code points in - # some other encoding (ahem, Windows-1252). E.g. “ - # instead of É for LEFT DOUBLE QUOTATION MARK. This - # code tries to detect this situation and compensate. - for encoding in (self.soup.original_encoding, 'windows-1252'): - if not encoding: - continue - try: - data = bytearray([real_name]).decode(encoding) - except UnicodeDecodeError as e: - pass - if not data: - try: - data = chr(real_name) - except (ValueError, OverflowError) as e: - pass - data = data or "\N{REPLACEMENT CHARACTER}" + try: + data = unichr(real_name) + except (ValueError, OverflowError), e: + data = u"\N{REPLACEMENT CHARACTER}" + self.handle_data(data) def handle_entityref(self, name): @@ -168,12 +141,7 @@ class BeautifulSoupHTMLParser(HTMLParser): if character is not None: data = character else: - # If this were XML, it would be ambiguous whether "&foo" - # was an character entity reference with a missing - # semicolon or the literal string "&foo". Since this is - # HTML, we have a complete list of all character entity references, - # and this one wasn't found, so assume it's the literal string "&foo". - data = "&%s" % name + data = "&%s;" % name self.handle_data(data) def handle_comment(self, data): @@ -214,15 +182,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): NAME = HTMLPARSER features = [NAME, HTML, STRICT] - def __init__(self, parser_args=None, parser_kwargs=None, **kwargs): - super(HTMLParserTreeBuilder, self).__init__(**kwargs) - parser_args = parser_args or [] - parser_kwargs = parser_kwargs or {} + def __init__(self, *args, **kwargs): if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: - parser_kwargs['strict'] = False + kwargs['strict'] = False if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: - parser_kwargs['convert_charrefs'] = False - self.parser_args = (parser_args, parser_kwargs) + kwargs['convert_charrefs'] = False + self.parser_args = (args, kwargs) def prepare_markup(self, markup, user_specified_encoding=None, document_declared_encoding=None, exclude_encodings=None): @@ -231,7 +196,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): declared within markup, whether any characters had to be replaced with REPLACEMENT CHARACTER). """ - if isinstance(markup, str): + if isinstance(markup, unicode): yield (markup, None, None, False) return @@ -248,8 +213,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): parser.soup = self.soup try: parser.feed(markup) - parser.close() - except HTMLParseError as e: + except HTMLParseError, e: warnings.warn(RuntimeWarning( "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) raise e diff --git a/libs/bs4/builder/_lxml.py b/libs/bs4/builder/_lxml.py index a490e2301..d2ca2872d 100644 --- a/libs/bs4/builder/_lxml.py +++ b/libs/bs4/builder/_lxml.py @@ -1,18 +1,13 @@ -# Use of this source code is governed by the MIT license. -__license__ = "MIT" - +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __all__ = [ 'LXMLTreeBuilderForXML', 'LXMLTreeBuilder', ] -try: - from collections.abc import Callable # Python 3.6 -except ImportError as e: - from collections import Callable - from io import BytesIO -from io import StringIO +from StringIO import StringIO +import collections from lxml import etree from bs4.element import ( Comment, @@ -33,10 +28,6 @@ from bs4.dammit import EncodingDetector LXML = 'lxml' -def _invert(d): - "Invert a dictionary." - return dict((v,k) for k, v in list(d.items())) - class LXMLTreeBuilderForXML(TreeBuilder): DEFAULT_PARSER_CLASS = etree.XMLParser @@ -53,29 +44,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): # This namespace mapping is specified in the XML Namespace # standard. - DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace') - - DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS) - - def initialize_soup(self, soup): - """Let the BeautifulSoup object know about the standard namespace - mapping. - """ - super(LXMLTreeBuilderForXML, self).initialize_soup(soup) - self._register_namespaces(self.DEFAULT_NSMAPS) - - def _register_namespaces(self, mapping): - """Let the BeautifulSoup object know about namespaces encountered - while parsing the document. - - This might be useful later on when creating CSS selectors. - """ - for key, value in list(mapping.items()): - if key and key not in self.soup._namespaces: - # Let the BeautifulSoup object know about a new namespace. - # If there are multiple namespaces defined with the same - # prefix, the first one in the document takes precedence. - self.soup._namespaces[key] = value + DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} def default_parser(self, encoding): # This can either return a parser object or a class, which @@ -89,12 +58,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): # Use the default parser. parser = self.default_parser(encoding) - if isinstance(parser, Callable): + if isinstance(parser, collections.Callable): # Instantiate the parser with default arguments parser = parser(target=self, strip_cdata=False, encoding=encoding) return parser - def __init__(self, parser=None, empty_element_tags=None, **kwargs): + def __init__(self, parser=None, empty_element_tags=None): # TODO: Issue a warning if parser is present but not a # callable, since that means there's no way to create new # parsers for different encodings. @@ -102,9 +71,8 @@ class LXMLTreeBuilderForXML(TreeBuilder): if empty_element_tags is not None: self.empty_element_tags = set(empty_element_tags) self.soup = None - self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED] - super(LXMLTreeBuilderForXML, self).__init__(**kwargs) - + self.nsmaps = [self.DEFAULT_NSMAPS] + def _getNsTag(self, tag): # Split the namespace URL out of a fully-qualified lxml tag # name. Copied from lxml's src/lxml/sax.py. @@ -133,12 +101,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): else: self.processing_instruction_class = XMLProcessingInstruction - if isinstance(markup, str): + if isinstance(markup, unicode): # We were given Unicode. Maybe lxml can parse Unicode on # this system? yield markup, None, document_declared_encoding, False - if isinstance(markup, str): + if isinstance(markup, unicode): # No, apparently not. Convert the Unicode to UTF-8 and # tell lxml to parse it as UTF-8. yield (markup.encode("utf8"), "utf8", @@ -153,7 +121,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def feed(self, markup): if isinstance(markup, bytes): markup = BytesIO(markup) - elif isinstance(markup, str): + elif isinstance(markup, unicode): markup = StringIO(markup) # Call feed() at least once, even if the markup is empty, @@ -168,36 +136,30 @@ class LXMLTreeBuilderForXML(TreeBuilder): if len(data) != 0: self.parser.feed(data) self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e)) def close(self): - self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED] + self.nsmaps = [self.DEFAULT_NSMAPS] def start(self, name, attrs, nsmap={}): # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. attrs = dict(attrs) nsprefix = None # Invert each namespace map as it comes in. - if len(nsmap) == 0 and len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) + if len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) elif len(nsmap) > 0: # A new namespace mapping has come into play. - - # First, Let the BeautifulSoup object know about it. - self._register_namespaces(nsmap) - - # Then, add it to our running list of inverted namespace - # mappings. - self.nsmaps.append(_invert(nsmap)) - + inverted_nsmap = dict((value, key) for key, value in nsmap.items()) + self.nsmaps.append(inverted_nsmap) # Also treat the namespace mapping as a set of attributes on the # tag, so we can recreate it later. attrs = attrs.copy() - for prefix, namespace in list(nsmap.items()): + for prefix, namespace in nsmap.items(): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace @@ -206,7 +168,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): # from lxml with namespaces attached to their names, and # turn then into NamespacedAttribute objects. new_attrs = {} - for attr, value in list(attrs.items()): + for attr, value in attrs.items(): namespace, attr = self._getNsTag(attr) if namespace is None: new_attrs[attr] = value @@ -266,7 +228,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" - return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment + return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): @@ -287,10 +249,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): self.parser = self.parser_for(encoding) self.parser.feed(markup) self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + except (UnicodeDecodeError, LookupError, etree.ParserError), e: raise ParserRejectedMarkup(str(e)) def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" - return '<html><body>%s</body></html>' % fragment + return u'<html><body>%s</body></html>' % fragment diff --git a/libs/bs4/dammit.py b/libs/bs4/dammit.py index c7ac4d431..7965565f5 100644 --- a/libs/bs4/dammit.py +++ b/libs/bs4/dammit.py @@ -6,11 +6,12 @@ necessary. It is heavily based on code from Mark Pilgrim's Universal Feed Parser. It works best on XML and HTML, but it does not rewrite the XML or HTML to reflect a new encoding; that's the tree builder's job. """ -# Use of this source code is governed by the MIT license. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" import codecs -from html.entities import codepoint2name +from htmlentitydefs import codepoint2name import re import logging import string @@ -45,9 +46,9 @@ except ImportError: pass xml_encoding_re = re.compile( - '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I) + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I) html_meta_re = re.compile( - '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) + '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) class EntitySubstitution(object): @@ -57,24 +58,15 @@ class EntitySubstitution(object): lookup = {} reverse_lookup = {} characters_for_re = [] - - # &apos is an XHTML entity and an HTML 5, but not an HTML 4 - # entity. We don't want to use it, but we want to recognize it on the way in. - # - # TODO: Ideally we would be able to recognize all HTML 5 named - # entities, but that's a little tricky. - extra = [(39, 'apos')] - for codepoint, name in list(codepoint2name.items()) + extra: - character = chr(codepoint) - if codepoint not in (34, 39): + for codepoint, name in list(codepoint2name.items()): + character = unichr(codepoint) + if codepoint != 34: # There's no point in turning the quotation mark into - # " or the single quote into ', unless it - # happens within an attribute value, which is handled - # elsewhere. + # ", unless it happens within an attribute value, which + # is handled elsewhere. characters_for_re.append(character) lookup[character] = name - # But we do want to recognize those entities on the way in and - # convert them to Unicode characters. + # But we do want to turn " into the quotation mark. reverse_lookup[name] = character re_definition = "[%s]" % "".join(characters_for_re) return lookup, reverse_lookup, re.compile(re_definition) @@ -90,7 +82,7 @@ class EntitySubstitution(object): } BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)" + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" ")") AMPERSAND_OR_BRACKET = re.compile("([<>&])") @@ -282,7 +274,7 @@ class EncodingDetector: def strip_byte_order_mark(cls, data): """If a byte-order mark is present, strip it and return the encoding it implies.""" encoding = None - if isinstance(data, str): + if isinstance(data, unicode): # Unicode data cannot have a byte-order mark. return data, encoding if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ @@ -360,9 +352,9 @@ class UnicodeDammit: markup, override_encodings, is_html, exclude_encodings) # Short-circuit if the data is in Unicode to begin with. - if isinstance(markup, str) or markup == '': + if isinstance(markup, unicode) or markup == '': self.markup = markup - self.unicode_markup = str(markup) + self.unicode_markup = unicode(markup) self.original_encoding = None return @@ -446,7 +438,7 @@ class UnicodeDammit: def _to_unicode(self, data, encoding, errors="strict"): '''Given a string and its encoding, decodes the string into Unicode. %encoding is a string recognized by encodings.aliases''' - return str(data, encoding, errors) + return unicode(data, encoding, errors) @property def declared_html_encoding(self): diff --git a/libs/bs4/diagnose.py b/libs/bs4/diagnose.py index b5f6e6c8b..8768332f5 100644 --- a/libs/bs4/diagnose.py +++ b/libs/bs4/diagnose.py @@ -1,11 +1,12 @@ """Diagnostic functions, mainly for use when doing tech support.""" -# Use of this source code is governed by the MIT license. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" import cProfile -from io import StringIO -from html.parser import HTMLParser +from StringIO import StringIO +from HTMLParser import HTMLParser import bs4 from bs4 import BeautifulSoup, __version__ from bs4.builder import builder_registry @@ -21,8 +22,8 @@ import cProfile def diagnose(data): """Diagnostic suite for isolating common problems.""" - print("Diagnostic running on Beautiful Soup %s" % __version__) - print("Python version %s" % sys.version) + print "Diagnostic running on Beautiful Soup %s" % __version__ + print "Python version %s" % sys.version basic_parsers = ["html.parser", "html5lib", "lxml"] for name in basic_parsers: @@ -31,16 +32,16 @@ def diagnose(data): break else: basic_parsers.remove(name) - print(( + print ( "I noticed that %s is not installed. Installing it may help." % - name)) + name) if 'lxml' in basic_parsers: - basic_parsers.append("lxml-xml") + basic_parsers.append(["lxml", "xml"]) try: from lxml import etree - print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) - except ImportError as e: + print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) + except ImportError, e: print ( "lxml is not installed or couldn't be imported.") @@ -48,43 +49,37 @@ def diagnose(data): if 'html5lib' in basic_parsers: try: import html5lib - print("Found html5lib version %s" % html5lib.__version__) - except ImportError as e: + print "Found html5lib version %s" % html5lib.__version__ + except ImportError, e: print ( "html5lib is not installed or couldn't be imported.") if hasattr(data, 'read'): data = data.read() + elif os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + with open(data) as fp: + data = fp.read() elif data.startswith("http:") or data.startswith("https:"): - print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) - print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") + print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data + print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return - else: - try: - if os.path.exists(data): - print('"%s" looks like a filename. Reading data from the file.' % data) - with open(data) as fp: - data = fp.read() - except ValueError: - # This can happen on some platforms when the 'filename' is - # too long. Assume it's data and not a filename. - pass - print() + print for parser in basic_parsers: - print("Trying to parse your markup with %s" % parser) + print "Trying to parse your markup with %s" % parser success = False try: - soup = BeautifulSoup(data, features=parser) + soup = BeautifulSoup(data, parser) success = True - except Exception as e: - print("%s could not parse the markup." % parser) + except Exception, e: + print "%s could not parse the markup." % parser traceback.print_exc() if success: - print("Here's what %s did with the markup:" % parser) - print(soup.prettify()) + print "Here's what %s did with the markup:" % parser + print soup.prettify() - print("-" * 80) + print "-" * 80 def lxml_trace(data, html=True, **kwargs): """Print out the lxml events that occur during parsing. @@ -94,7 +89,7 @@ def lxml_trace(data, html=True, **kwargs): """ from lxml import etree for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): - print(("%s, %4s, %s" % (event, element.tag, element.text))) + print("%s, %4s, %s" % (event, element.tag, element.text)) class AnnouncingParser(HTMLParser): """Announces HTMLParser parse events, without doing anything else.""" @@ -154,7 +149,7 @@ def rword(length=5): def rsentence(length=4): "Generate a random sentence-like string." - return " ".join(rword(random.randint(4,9)) for i in list(range(length))) + return " ".join(rword(random.randint(4,9)) for i in range(length)) def rdoc(num_elements=1000): """Randomly generate an invalid HTML document.""" @@ -176,9 +171,9 @@ def rdoc(num_elements=1000): def benchmark_parsers(num_elements=100000): """Very basic head-to-head performance benchmark.""" - print("Comparative parser benchmark on Beautiful Soup %s" % __version__) + print "Comparative parser benchmark on Beautiful Soup %s" % __version__ data = rdoc(num_elements) - print("Generated a large invalid HTML document (%d bytes)." % len(data)) + print "Generated a large invalid HTML document (%d bytes)." % len(data) for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: success = False @@ -187,24 +182,24 @@ def benchmark_parsers(num_elements=100000): soup = BeautifulSoup(data, parser) b = time.time() success = True - except Exception as e: - print("%s could not parse the markup." % parser) + except Exception, e: + print "%s could not parse the markup." % parser traceback.print_exc() if success: - print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) + print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) from lxml import etree a = time.time() etree.HTML(data) b = time.time() - print("Raw lxml parsed the markup in %.2fs." % (b-a)) + print "Raw lxml parsed the markup in %.2fs." % (b-a) import html5lib parser = html5lib.HTMLParser() a = time.time() parser.parse(data) b = time.time() - print("Raw html5lib parsed the markup in %.2fs." % (b-a)) + print "Raw html5lib parsed the markup in %.2fs." % (b-a) def profile(num_elements=100000, parser="lxml"): diff --git a/libs/bs4/element.py b/libs/bs4/element.py index f16b1663e..9ef75f814 100644 --- a/libs/bs4/element.py +++ b/libs/bs4/element.py @@ -1,35 +1,18 @@ -# Use of this source code is governed by the MIT license. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" -try: - from collections.abc import Callable # Python 3.6 -except ImportError as e: - from collections import Callable +import collections import re +import shlex import sys import warnings -try: - import soupsieve -except ImportError as e: - soupsieve = None - warnings.warn( - 'The soupsieve package is not installed. CSS selectors cannot be used.' - ) - -from bs4.formatter import ( - Formatter, - HTMLFormatter, - XMLFormatter, -) +from bs4.dammit import EntitySubstitution DEFAULT_OUTPUT_ENCODING = "utf-8" PY3K = (sys.version_info[0] > 2) -nonwhitespace_re = re.compile(r"\S+") - -# NOTE: This isn't used as of 4.7.0. I'm leaving it for a little bit on -# the off chance someone imported it for their own use. -whitespace_re = re.compile(r"\s+") +whitespace_re = re.compile("\s+") def _alias(attr): """Alias one attribute name to another for backward compatibility""" @@ -43,22 +26,22 @@ def _alias(attr): return alias -class NamespacedAttribute(str): +class NamespacedAttribute(unicode): def __new__(cls, prefix, name, namespace=None): if name is None: - obj = str.__new__(cls, prefix) + obj = unicode.__new__(cls, prefix) elif prefix is None: # Not really namespaced. - obj = str.__new__(cls, name) + obj = unicode.__new__(cls, name) else: - obj = str.__new__(cls, prefix + ":" + name) + obj = unicode.__new__(cls, prefix + ":" + name) obj.prefix = prefix obj.name = name obj.namespace = namespace return obj -class AttributeValueWithCharsetSubstitution(str): +class AttributeValueWithCharsetSubstitution(unicode): """A stand-in object for a character encoding specified in HTML.""" class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): @@ -69,7 +52,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): """ def __new__(cls, original_value): - obj = str.__new__(cls, original_value) + obj = unicode.__new__(cls, original_value) obj.original_value = original_value return obj @@ -86,15 +69,15 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): The value of the 'content' attribute will be one of these objects. """ - CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) def __new__(cls, original_value): match = cls.CHARSET_RE.search(original_value) if match is None: # No substitution necessary. - return str.__new__(str, original_value) + return unicode.__new__(unicode, original_value) - obj = str.__new__(cls, original_value) + obj = unicode.__new__(cls, original_value) obj.original_value = original_value return obj @@ -103,71 +86,94 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): return match.group(1) + encoding return self.CHARSET_RE.sub(rewrite, self.original_value) - -class PageElement(object): - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def setup(self, parent=None, previous_element=None, next_element=None, - previous_sibling=None, next_sibling=None): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent +class HTMLAwareEntitySubstitution(EntitySubstitution): - self.previous_element = previous_element - if previous_element is not None: - self.previous_element.next_element = self + """Entity substitution rules that are aware of some HTML quirks. - self.next_element = next_element - if self.next_element is not None: - self.next_element.previous_element = self + Specifically, the contents of <script> and <style> tags should not + undergo entity substitution. - self.next_sibling = next_sibling - if self.next_sibling is not None: - self.next_sibling.previous_sibling = self + Incoming NavigableString objects are checked to see if they're the + direct children of a <script> or <style> tag. + """ - if (previous_sibling is None - and self.parent is not None and self.parent.contents): - previous_sibling = self.parent.contents[-1] + cdata_containing_tags = set(["script", "style"]) - self.previous_sibling = previous_sibling - if previous_sibling is not None: - self.previous_sibling.next_sibling = self + preformatted_tags = set(["pre"]) - def format_string(self, s, formatter): - """Format the given string using the given formatter.""" - if formatter is None: - return s - if not isinstance(formatter, Formatter): - formatter = self.formatter_for_name(formatter) - output = formatter.substitute(s) - return output + preserve_whitespace_tags = set(['pre', 'textarea']) - def formatter_for_name(self, formatter): - """Look up or create a Formatter for the given identifier, - if necessary. + @classmethod + def _substitute_if_appropriate(cls, ns, f): + if (isinstance(ns, NavigableString) + and ns.parent is not None + and ns.parent.name in cls.cdata_containing_tags): + # Do nothing. + return ns + # Substitute. + return f(ns) - :param formatter: Can be a Formatter object (used as-is), a - function (used as the entity substitution hook for an - XMLFormatter or HTMLFormatter), or a string (used to look up - an XMLFormatter or HTMLFormatter in the appropriate registry. - """ - if isinstance(formatter, Formatter): - return formatter - if self._is_xml: - c = XMLFormatter + @classmethod + def substitute_html(cls, ns): + return cls._substitute_if_appropriate( + ns, EntitySubstitution.substitute_html) + + @classmethod + def substitute_xml(cls, ns): + return cls._substitute_if_appropriate( + ns, EntitySubstitution.substitute_xml) + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + # There are five possible values for the "formatter" argument passed in + # to methods like encode() and prettify(): + # + # "html" - All Unicode characters with corresponding HTML entities + # are converted to those entities on output. + # "minimal" - Bare ampersands and angle brackets are converted to + # XML entities: & < > + # None - The null formatter. Unicode characters are never + # converted to entities. This is not recommended, but it's + # faster than "minimal". + # A function - This function will be called on every string that + # needs to undergo entity substitution. + # + + # In an HTML document, the default "html" and "minimal" functions + # will leave the contents of <script> and <style> tags alone. For + # an XML document, all tags will be given the same treatment. + + HTML_FORMATTERS = { + "html" : HTMLAwareEntitySubstitution.substitute_html, + "minimal" : HTMLAwareEntitySubstitution.substitute_xml, + None : None + } + + XML_FORMATTERS = { + "html" : EntitySubstitution.substitute_html, + "minimal" : EntitySubstitution.substitute_xml, + None : None + } + + def format_string(self, s, formatter='minimal'): + """Format the given string using the given formatter.""" + if not callable(formatter): + formatter = self._formatter_for_name(formatter) + if formatter is None: + output = s else: - c = HTMLFormatter - if callable(formatter): - return c(entity_substitution=formatter) - return c.REGISTRY[formatter] + output = formatter(s) + return output @property def _is_xml(self): """Is this element part of an XML tree or an HTML tree? - This is used in formatter_for_name, when deciding whether an - XMLFormatter or HTMLFormatter is more appropriate. It can be + This is used when mapping a formatter name ("minimal") to an + appropriate function (one that performs entity-substitution on + the contents of <script> and <style> tags, or not). It can be inefficient, but it should be called very rarely. """ if self.known_xml is not None: @@ -185,13 +191,48 @@ class PageElement(object): return getattr(self, 'is_xml', False) return self.parent._is_xml + def _formatter_for_name(self, name): + "Look up a formatter function based on its name and the tree." + if self._is_xml: + return self.XML_FORMATTERS.get( + name, EntitySubstitution.substitute_xml) + else: + return self.HTML_FORMATTERS.get( + name, HTMLAwareEntitySubstitution.substitute_xml) + + def setup(self, parent=None, previous_element=None, next_element=None, + previous_sibling=None, next_sibling=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + + self.previous_element = previous_element + if previous_element is not None: + self.previous_element.next_element = self + + self.next_element = next_element + if self.next_element: + self.next_element.previous_element = self + + self.next_sibling = next_sibling + if self.next_sibling: + self.next_sibling.previous_sibling = self + + if (not previous_sibling + and self.parent is not None and self.parent.contents): + previous_sibling = self.parent.contents[-1] + + self.previous_sibling = previous_sibling + if previous_sibling: + self.previous_sibling.next_sibling = self + nextSibling = _alias("next_sibling") # BS3 previousSibling = _alias("previous_sibling") # BS3 def replace_with(self, replace_with): - if self.parent is None: + if not self.parent: raise ValueError( - "Cannot replace one element with another when the " + "Cannot replace one element with another when the" "element to be replaced is not part of a tree.") if replace_with is self: return @@ -206,7 +247,7 @@ class PageElement(object): def unwrap(self): my_parent = self.parent - if self.parent is None: + if not self.parent: raise ValueError( "Cannot replace an element with its contents when that" "element is not part of a tree.") @@ -254,7 +295,7 @@ class PageElement(object): def _last_descendant(self, is_initialized=True, accept_self=True): "Finds the last element beneath this object to be parsed." - if is_initialized and self.next_sibling is not None: + if is_initialized and self.next_sibling: last_child = self.next_sibling.previous_element else: last_child = self @@ -271,18 +312,10 @@ class PageElement(object): raise ValueError("Cannot insert None into a tag.") if new_child is self: raise ValueError("Cannot insert a tag into itself.") - if (isinstance(new_child, str) + if (isinstance(new_child, basestring) and not isinstance(new_child, NavigableString)): new_child = NavigableString(new_child) - from bs4 import BeautifulSoup - if isinstance(new_child, BeautifulSoup): - # We don't want to end up with a situation where one BeautifulSoup - # object contains another. Insert the children one at a time. - for subchild in list(new_child.contents): - self.insert(position, subchild) - position += 1 - return position = min(position, len(self.contents)) if hasattr(new_child, 'parent') and new_child.parent is not None: # We're 'inserting' an element that's already one @@ -344,54 +377,43 @@ class PageElement(object): """Appends the given tag to the contents of this tag.""" self.insert(len(self.contents), tag) - def extend(self, tags): - """Appends the given tags to the contents of this tag.""" - for tag in tags: - self.append(tag) + def insert_before(self, predecessor): + """Makes the given element the immediate predecessor of this one. - def insert_before(self, *args): - """Makes the given element(s) the immediate predecessor of this one. - - The elements will have the same parent, and the given elements + The two elements will have the same parent, and the given element will be immediately before this one. """ + if self is predecessor: + raise ValueError("Can't insert an element before itself.") parent = self.parent if parent is None: raise ValueError( "Element has no parent, so 'before' has no meaning.") - if any(x is self for x in args): - raise ValueError("Can't insert an element before itself.") - for predecessor in args: - # Extract first so that the index won't be screwed up if they - # are siblings. - if isinstance(predecessor, PageElement): - predecessor.extract() - index = parent.index(self) - parent.insert(index, predecessor) - - def insert_after(self, *args): - """Makes the given element(s) the immediate successor of this one. - - The elements will have the same parent, and the given elements + # Extract first so that the index won't be screwed up if they + # are siblings. + if isinstance(predecessor, PageElement): + predecessor.extract() + index = parent.index(self) + parent.insert(index, predecessor) + + def insert_after(self, successor): + """Makes the given element the immediate successor of this one. + + The two elements will have the same parent, and the given element will be immediately after this one. """ - # Do all error checking before modifying the tree. + if self is successor: + raise ValueError("Can't insert an element after itself.") parent = self.parent if parent is None: raise ValueError( "Element has no parent, so 'after' has no meaning.") - if any(x is self for x in args): - raise ValueError("Can't insert an element after itself.") - - offset = 0 - for successor in args: - # Extract first so that the index won't be screwed up if they - # are siblings. - if isinstance(successor, PageElement): - successor.extract() - index = parent.index(self) - parent.insert(index+1+offset, successor) - offset += 1 + # Extract first so that the index won't be screwed up if they + # are siblings. + if isinstance(successor, PageElement): + successor.extract() + index = parent.index(self) + parent.insert(index+1, successor) def find_next(self, name=None, attrs={}, text=None, **kwargs): """Returns the first item that matches the given criteria and @@ -511,24 +533,17 @@ class PageElement(object): result = (element for element in generator if isinstance(element, Tag)) return ResultSet(strainer, result) - elif isinstance(name, str): + elif isinstance(name, basestring): # Optimization to find all tags with a given name. if name.count(':') == 1: - # This is a name with a prefix. If this is a namespace-aware document, - # we need to match the local name against tag.name. If not, - # we need to match the fully-qualified name against tag.name. - prefix, local_name = name.split(':', 1) + # This is a name with a prefix. + prefix, name = name.split(':', 1) else: prefix = None - local_name = name result = (element for element in generator if isinstance(element, Tag) - and ( - element.name == name - ) or ( - element.name == local_name - and (prefix is None or element.prefix == prefix) - ) + and element.name == name + and (prefix is None or element.prefix == prefix) ) return ResultSet(strainer, result) results = ResultSet(strainer) @@ -582,6 +597,82 @@ class PageElement(object): yield i i = i.parent + # Methods for supporting CSS selectors. + + tag_name_re = re.compile('^[a-zA-Z0-9][-.a-zA-Z0-9:_]*$') + + # /^([a-zA-Z0-9][-.a-zA-Z0-9:_]*)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/ + # \---------------------------/ \---/\-------------/ \-------/ + # | | | | + # | | | The value + # | | ~,|,^,$,* or = + # | Attribute + # Tag + attribselect_re = re.compile( + r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' + + r'=?"?(?P<value>[^\]"]*)"?\]$' + ) + + def _attr_value_as_string(self, value, default=None): + """Force an attribute value into a string representation. + + A multi-valued attribute will be converted into a + space-separated stirng. + """ + value = self.get(value, default) + if isinstance(value, list) or isinstance(value, tuple): + value =" ".join(value) + return value + + def _tag_name_matches_and(self, function, tag_name): + if not tag_name: + return function + else: + def _match(tag): + return tag.name == tag_name and function(tag) + return _match + + def _attribute_checker(self, operator, attribute, value=''): + """Create a function that performs a CSS selector operation. + + Takes an operator, attribute and optional value. Returns a + function that will return True for elements that match that + combination. + """ + if operator == '=': + # string representation of `attribute` is equal to `value` + return lambda el: el._attr_value_as_string(attribute) == value + elif operator == '~': + # space-separated list representation of `attribute` + # contains `value` + def _includes_value(element): + attribute_value = element.get(attribute, []) + if not isinstance(attribute_value, list): + attribute_value = attribute_value.split() + return value in attribute_value + return _includes_value + elif operator == '^': + # string representation of `attribute` starts with `value` + return lambda el: el._attr_value_as_string( + attribute, '').startswith(value) + elif operator == '$': + # string representation of `attribute` ends with `value` + return lambda el: el._attr_value_as_string( + attribute, '').endswith(value) + elif operator == '*': + # string representation of `attribute` contains `value` + return lambda el: value in el._attr_value_as_string(attribute, '') + elif operator == '|': + # string representation of `attribute` is either exactly + # `value` or starts with `value` and then a dash. + def _is_or_starts_with_dash(element): + attribute_value = element._attr_value_as_string(attribute, '') + return (attribute_value == value or attribute_value.startswith( + value + '-')) + return _is_or_starts_with_dash + else: + return lambda el: el.has_attr(attribute) + # Old non-property versions of the generators, for backwards # compatibility with BS3. def nextGenerator(self): @@ -600,7 +691,7 @@ class PageElement(object): return self.parents -class NavigableString(str, PageElement): +class NavigableString(unicode, PageElement): PREFIX = '' SUFFIX = '' @@ -618,10 +709,10 @@ class NavigableString(str, PageElement): passed in to the superclass's __new__ or the superclass won't know how to handle non-ASCII characters. """ - if isinstance(value, str): - u = str.__new__(cls, value) + if isinstance(value, unicode): + u = unicode.__new__(cls, value) else: - u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) u.setup() return u @@ -632,7 +723,7 @@ class NavigableString(str, PageElement): return type(self)(self) def __getnewargs__(self): - return (str(self),) + return (unicode(self),) def __getattr__(self, attr): """text.string gives you text. This is for backwards @@ -646,7 +737,6 @@ class NavigableString(str, PageElement): self.__class__.__name__, attr)) def output_ready(self, formatter="minimal"): - """Run the string through the provided formatter.""" output = self.format_string(self, formatter) return self.PREFIX + output + self.SUFFIX @@ -665,39 +755,37 @@ class PreformattedString(NavigableString): but the return value will be ignored. """ - def output_ready(self, formatter=None): - """CData strings are passed into the formatter, purely - for any side effects. The return value is ignored. - """ - if formatter is not None: - ignore = self.format_string(self, formatter) + def output_ready(self, formatter="minimal"): + """CData strings are passed into the formatter. + But the return value is ignored.""" + self.format_string(self, formatter) return self.PREFIX + self + self.SUFFIX class CData(PreformattedString): - PREFIX = '<![CDATA[' - SUFFIX = ']]>' + PREFIX = u'<![CDATA[' + SUFFIX = u']]>' class ProcessingInstruction(PreformattedString): """A SGML processing instruction.""" - PREFIX = '<?' - SUFFIX = '>' + PREFIX = u'<?' + SUFFIX = u'>' class XMLProcessingInstruction(ProcessingInstruction): """An XML processing instruction.""" - PREFIX = '<?' - SUFFIX = '?>' + PREFIX = u'<?' + SUFFIX = u'?>' class Comment(PreformattedString): - PREFIX = '<!--' - SUFFIX = '-->' + PREFIX = u'<!--' + SUFFIX = u'-->' class Declaration(PreformattedString): - PREFIX = '<?' - SUFFIX = '?>' + PREFIX = u'<?' + SUFFIX = u'?>' class Doctype(PreformattedString): @@ -714,8 +802,8 @@ class Doctype(PreformattedString): return Doctype(value) - PREFIX = '<!DOCTYPE ' - SUFFIX = '>\n' + PREFIX = u'<!DOCTYPE ' + SUFFIX = u'>\n' class Tag(PageElement): @@ -738,6 +826,14 @@ class Tag(PageElement): self.name = name self.namespace = namespace self.prefix = prefix + if builder is not None: + preserve_whitespace_tags = builder.preserve_whitespace_tags + else: + if is_xml: + preserve_whitespace_tags = [] + else: + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags + self.preserve_whitespace_tags = preserve_whitespace_tags if attrs is None: attrs = {} elif attrs: @@ -760,32 +856,13 @@ class Tag(PageElement): self.setup(parent, previous) self.hidden = False - if builder is None: - # In the absence of a TreeBuilder, assume this tag is nothing - # special. - self.can_be_empty_element = False - self.cdata_list_attributes = None - else: - # Set up any substitutions for this tag, such as the charset in a META tag. + # Set up any substitutions, such as the charset in a META tag. + if builder is not None: builder.set_up_substitutions(self) - - # Ask the TreeBuilder whether this tag might be an empty-element tag. self.can_be_empty_element = builder.can_be_empty_element(name) + else: + self.can_be_empty_element = False - # Keep track of the list of attributes of this tag that - # might need to be treated as a list. - # - # For performance reasons, we store the whole data structure - # rather than asking the question of every tag. Asking would - # require building a new data structure every time, and - # (unlike can_be_empty_element), we almost never need - # to check this. - self.cdata_list_attributes = builder.cdata_list_attributes - - # Keep track of the names that might cause this tag to be treated as a - # whitespace-preserved tag. - self.preserve_whitespace_tags = builder.preserve_whitespace_tags - parserClass = _alias("parser_class") # BS3 def __copy__(self): @@ -865,7 +942,7 @@ class Tag(PageElement): for string in self._all_strings(True): yield string - def get_text(self, separator="", strip=False, + def get_text(self, separator=u"", strip=False, types=(NavigableString, CData)): """ Get all child strings, concatenated using the given separator. @@ -899,43 +976,6 @@ class Tag(PageElement): for element in self.contents[:]: element.extract() - def smooth(self): - """Smooth out this element's children by consolidating consecutive strings. - - This makes pretty-printed output look more natural following a - lot of operations that modified the tree. - """ - # Mark the first position of every pair of children that need - # to be consolidated. Do this rather than making a copy of - # self.contents, since in most cases very few strings will be - # affected. - marked = [] - for i, a in enumerate(self.contents): - if isinstance(a, Tag): - # Recursively smooth children. - a.smooth() - if i == len(self.contents)-1: - # This is the last item in .contents, and it's not a - # tag. There's no chance it needs any work. - continue - b = self.contents[i+1] - if (isinstance(a, NavigableString) - and isinstance(b, NavigableString) - and not isinstance(a, PreformattedString) - and not isinstance(b, PreformattedString) - ): - marked.append(i) - - # Go over the marked positions in reverse order, so that - # removing items from .contents won't affect the remaining - # positions. - for i in reversed(marked): - a = self.contents[i] - b = self.contents[i+1] - b.extract() - n = NavigableString(a+b) - a.replace_with(n) - def index(self, element): """ Find the index of a child by identity, not value. Avoids issues with @@ -981,7 +1021,7 @@ class Tag(PageElement): def __contains__(self, x): return x in self.contents - def __bool__(self): + def __nonzero__(self): "A tag is non-None even if it has no contents." return True @@ -1006,10 +1046,8 @@ class Tag(PageElement): # BS3: soup.aTag -> "soup.find("a") tag_name = tag[:-3] warnings.warn( - '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict( - name=tag_name - ) - ) + '.%sTag is deprecated, use .find("%s") instead.' % ( + tag_name, tag_name)) return self.find(tag_name) # We special case contents to avoid recursion. elif not tag.startswith("__") and not tag == "contents": @@ -1070,6 +1108,14 @@ class Tag(PageElement): u = self.decode(indent_level, encoding, formatter) return u.encode(encoding, errors) + def _should_pretty_print(self, indent_level): + """Should this tag be pretty-printed?""" + + return ( + indent_level is not None + and self.name not in self.preserve_whitespace_tags + ) + def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"): @@ -1083,32 +1129,31 @@ class Tag(PageElement): encoding. """ - # First off, turn a non-Formatter `formatter` into a Formatter - # object. This will stop the lookup from happening over and - # over again. - if not isinstance(formatter, Formatter): - formatter = self.formatter_for_name(formatter) - attributes = formatter.attributes(self) + # First off, turn a string formatter into a function. This + # will stop the lookup from happening over and over again. + if not callable(formatter): + formatter = self._formatter_for_name(formatter) + attrs = [] - for key, val in attributes: - if val is None: - decoded = key - else: - if isinstance(val, list) or isinstance(val, tuple): - val = ' '.join(val) - elif not isinstance(val, str): - val = str(val) - elif ( + if self.attrs: + for key, val in sorted(self.attrs.items()): + if val is None: + decoded = key + else: + if isinstance(val, list) or isinstance(val, tuple): + val = ' '.join(val) + elif not isinstance(val, basestring): + val = unicode(val) + elif ( isinstance(val, AttributeValueWithCharsetSubstitution) - and eventual_encoding is not None - ): - val = val.encode(eventual_encoding) - - text = formatter.attribute_value(val) - decoded = ( - str(key) + '=' - + formatter.quoted_attribute_value(text)) - attrs.append(decoded) + and eventual_encoding is not None): + val = val.encode(eventual_encoding) + + text = self.format_string(val, formatter) + decoded = ( + unicode(key) + '=' + + EntitySubstitution.quoted_attribute_value(text)) + attrs.append(decoded) close = '' closeTag = '' @@ -1117,7 +1162,7 @@ class Tag(PageElement): prefix = self.prefix + ":" if self.is_empty_element: - close = formatter.void_element_close_prefix or '' + close = '/' else: closeTag = '</%s%s>' % (prefix, self.name) @@ -1132,8 +1177,7 @@ class Tag(PageElement): else: indent_contents = None contents = self.decode_contents( - indent_contents, eventual_encoding, formatter - ) + indent_contents, eventual_encoding, formatter) if self.hidden: # This is the 'document root' object. @@ -1165,13 +1209,6 @@ class Tag(PageElement): s = ''.join(s) return s - def _should_pretty_print(self, indent_level): - """Should this tag be pretty-printed?""" - return ( - indent_level is not None - and self.name not in self.preserve_whitespace_tags - ) - def prettify(self, encoding=None, formatter="minimal"): if encoding is None: return self.decode(True, formatter=formatter) @@ -1187,19 +1224,19 @@ class Tag(PageElement): indented this many spaces. :param eventual_encoding: The tag is destined to be - encoded into this encoding. decode_contents() is _not_ + encoded into this encoding. This method is _not_ responsible for performing that encoding. This information is passed in so that it can be substituted in if the document contains a <META> tag that mentions the document's encoding. - :param formatter: A Formatter object, or a string naming one of - the standard Formatters. + :param formatter: The output formatter responsible for converting + entities to Unicode characters. """ - # First off, turn a string formatter into a Formatter object. This + # First off, turn a string formatter into a function. This # will stop the lookup from happening over and over again. - if not isinstance(formatter, Formatter): - formatter = self.formatter_for_name(formatter) + if not callable(formatter): + formatter = self._formatter_for_name(formatter) pretty_print = (indent_level is not None) s = [] @@ -1210,19 +1247,16 @@ class Tag(PageElement): elif isinstance(c, Tag): s.append(c.decode(indent_level, eventual_encoding, formatter)) - preserve_whitespace = ( - self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags - ) - if text and indent_level and not preserve_whitespace: + if text and indent_level and not self.name == 'pre': text = text.strip() if text: - if pretty_print and not preserve_whitespace: + if pretty_print and not self.name == 'pre': s.append(" " * (indent_level - 1)) s.append(text) - if pretty_print and not preserve_whitespace: + if pretty_print and not self.name == 'pre': s.append("\n") return ''.join(s) - + def encode_contents( self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"): @@ -1297,41 +1331,236 @@ class Tag(PageElement): current = current.next_element # CSS selector code - def select_one(self, selector, namespaces=None, **kwargs): + + _selector_combinators = ['>', '+', '~'] + _select_debug = False + quoted_colon = re.compile('"[^"]*:[^"]*"') + def select_one(self, selector): """Perform a CSS selection operation on the current element.""" - value = self.select(selector, namespaces, 1, **kwargs) + value = self.select(selector, limit=1) if value: return value[0] return None - def select(self, selector, namespaces=None, limit=None, **kwargs): - """Perform a CSS selection operation on the current element. + def select(self, selector, _candidate_generator=None, limit=None): + """Perform a CSS selection operation on the current element.""" - This uses the SoupSieve library. + # Handle grouping selectors if ',' exists, ie: p,a + if ',' in selector: + context = [] + for partial_selector in selector.split(','): + partial_selector = partial_selector.strip() + if partial_selector == '': + raise ValueError('Invalid group selection syntax: %s' % selector) + candidates = self.select(partial_selector, limit=limit) + for candidate in candidates: + if candidate not in context: + context.append(candidate) + + if limit and len(context) >= limit: + break + return context + tokens = shlex.split(selector) + current_context = [self] - :param selector: A string containing a CSS selector. + if tokens[-1] in self._selector_combinators: + raise ValueError( + 'Final combinator "%s" is missing an argument.' % tokens[-1]) - :param namespaces: A dictionary mapping namespace prefixes - used in the CSS selector to namespace URIs. By default, - Beautiful Soup will use the prefixes it encountered while - parsing the document. + if self._select_debug: + print 'Running CSS selector "%s"' % selector - :param limit: After finding this number of results, stop looking. + for index, token in enumerate(tokens): + new_context = [] + new_context_ids = set([]) - :param kwargs: Any extra arguments you'd like to pass in to - soupsieve.select(). - """ - if namespaces is None: - namespaces = self._namespaces - - if limit is None: - limit = 0 - if soupsieve is None: - raise NotImplementedError( - "Cannot execute CSS selectors because the soupsieve package is not installed." - ) - - return soupsieve.select(selector, self, namespaces, limit, **kwargs) + if tokens[index-1] in self._selector_combinators: + # This token was consumed by the previous combinator. Skip it. + if self._select_debug: + print ' Token was consumed by the previous combinator.' + continue + + if self._select_debug: + print ' Considering token "%s"' % token + recursive_candidate_generator = None + tag_name = None + + # Each operation corresponds to a checker function, a rule + # for determining whether a candidate matches the + # selector. Candidates are generated by the active + # iterator. + checker = None + + m = self.attribselect_re.match(token) + if m is not None: + # Attribute selector + tag_name, attribute, operator, value = m.groups() + checker = self._attribute_checker(operator, attribute, value) + + elif '#' in token: + # ID selector + tag_name, tag_id = token.split('#', 1) + def id_matches(tag): + return tag.get('id', None) == tag_id + checker = id_matches + + elif '.' in token: + # Class selector + tag_name, klass = token.split('.', 1) + classes = set(klass.split('.')) + def classes_match(candidate): + return classes.issubset(candidate.get('class', [])) + checker = classes_match + + elif ':' in token and not self.quoted_colon.search(token): + # Pseudo-class + tag_name, pseudo = token.split(':', 1) + if tag_name == '': + raise ValueError( + "A pseudo-class must be prefixed with a tag name.") + pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo) + found = [] + if pseudo_attributes is None: + pseudo_type = pseudo + pseudo_value = None + else: + pseudo_type, pseudo_value = pseudo_attributes.groups() + if pseudo_type == 'nth-of-type': + try: + pseudo_value = int(pseudo_value) + except: + raise NotImplementedError( + 'Only numeric values are currently supported for the nth-of-type pseudo-class.') + if pseudo_value < 1: + raise ValueError( + 'nth-of-type pseudo-class value must be at least 1.') + class Counter(object): + def __init__(self, destination): + self.count = 0 + self.destination = destination + + def nth_child_of_type(self, tag): + self.count += 1 + if self.count == self.destination: + return True + else: + return False + checker = Counter(pseudo_value).nth_child_of_type + else: + raise NotImplementedError( + 'Only the following pseudo-classes are implemented: nth-of-type.') + + elif token == '*': + # Star selector -- matches everything + pass + elif token == '>': + # Run the next token as a CSS selector against the + # direct children of each tag in the current context. + recursive_candidate_generator = lambda tag: tag.children + elif token == '~': + # Run the next token as a CSS selector against the + # siblings of each tag in the current context. + recursive_candidate_generator = lambda tag: tag.next_siblings + elif token == '+': + # For each tag in the current context, run the next + # token as a CSS selector against the tag's next + # sibling that's a tag. + def next_tag_sibling(tag): + yield tag.find_next_sibling(True) + recursive_candidate_generator = next_tag_sibling + + elif self.tag_name_re.match(token): + # Just a tag name. + tag_name = token + else: + raise ValueError( + 'Unsupported or invalid CSS selector: "%s"' % token) + if recursive_candidate_generator: + # This happens when the selector looks like "> foo". + # + # The generator calls select() recursively on every + # member of the current context, passing in a different + # candidate generator and a different selector. + # + # In the case of "> foo", the candidate generator is + # one that yields a tag's direct children (">"), and + # the selector is "foo". + next_token = tokens[index+1] + def recursive_select(tag): + if self._select_debug: + print ' Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs) + print '-' * 40 + for i in tag.select(next_token, recursive_candidate_generator): + if self._select_debug: + print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs) + yield i + if self._select_debug: + print '-' * 40 + _use_candidate_generator = recursive_select + elif _candidate_generator is None: + # By default, a tag's candidates are all of its + # children. If tag_name is defined, only yield tags + # with that name. + if self._select_debug: + if tag_name: + check = "[any]" + else: + check = tag_name + print ' Default candidate generator, tag name="%s"' % check + if self._select_debug: + # This is redundant with later code, but it stops + # a bunch of bogus tags from cluttering up the + # debug log. + def default_candidate_generator(tag): + for child in tag.descendants: + if not isinstance(child, Tag): + continue + if tag_name and not child.name == tag_name: + continue + yield child + _use_candidate_generator = default_candidate_generator + else: + _use_candidate_generator = lambda tag: tag.descendants + else: + _use_candidate_generator = _candidate_generator + + count = 0 + for tag in current_context: + if self._select_debug: + print " Running candidate generator on %s %s" % ( + tag.name, repr(tag.attrs)) + for candidate in _use_candidate_generator(tag): + if not isinstance(candidate, Tag): + continue + if tag_name and candidate.name != tag_name: + continue + if checker is not None: + try: + result = checker(candidate) + except StopIteration: + # The checker has decided we should no longer + # run the generator. + break + if checker is None or result: + if self._select_debug: + print " SUCCESS %s %s" % (candidate.name, repr(candidate.attrs)) + if id(candidate) not in new_context_ids: + # If a tag matches a selector more than once, + # don't include it in the context more than once. + new_context.append(candidate) + new_context_ids.add(id(candidate)) + elif self._select_debug: + print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs)) + + current_context = new_context + if limit and len(current_context) >= limit: + current_context = current_context[:limit] + + if self._select_debug: + print "Final verdict:" + for i in current_context: + print " %s %s" % (i.name, i.attrs) + return current_context # Old names for backwards compatibility def childGenerator(self): @@ -1374,7 +1603,7 @@ class SoupStrainer(object): else: attrs = kwargs normalized_attrs = {} - for key, value in list(attrs.items()): + for key, value in attrs.items(): normalized_attrs[key] = self._normalize_search_value(value) self.attrs = normalized_attrs @@ -1383,7 +1612,7 @@ class SoupStrainer(object): def _normalize_search_value(self, value): # Leave it alone if it's a Unicode string, a callable, a # regular expression, a boolean, or None. - if (isinstance(value, str) or isinstance(value, Callable) or hasattr(value, 'match') + if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match') or isinstance(value, bool) or value is None): return value @@ -1396,7 +1625,7 @@ class SoupStrainer(object): new_value = [] for v in value: if (hasattr(v, '__iter__') and not isinstance(v, bytes) - and not isinstance(v, str)): + and not isinstance(v, unicode)): # This is almost certainly the user's mistake. In the # interests of avoiding infinite loops, we'll let # it through as-is rather than doing a recursive call. @@ -1408,7 +1637,7 @@ class SoupStrainer(object): # Otherwise, convert it into a Unicode string. # The unicode(str()) thing is so this will do the same thing on Python 2 # and Python 3. - return str(str(value)) + return unicode(str(value)) def __str__(self): if self.text: @@ -1423,7 +1652,7 @@ class SoupStrainer(object): markup = markup_name markup_attrs = markup call_function_with_tag_data = ( - isinstance(self.name, Callable) + isinstance(self.name, collections.Callable) and not isinstance(markup_name, Tag)) if ((not self.name) @@ -1462,7 +1691,7 @@ class SoupStrainer(object): found = None # If given a list of items, scan it for a text element that # matches. - if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)): + if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)): for element in markup: if isinstance(element, NavigableString) \ and self.search(element): @@ -1475,7 +1704,7 @@ class SoupStrainer(object): found = self.search_tag(markup) # If it's text, make sure the text matches. elif isinstance(markup, NavigableString) or \ - isinstance(markup, str): + isinstance(markup, basestring): if not self.name and not self.attrs and self._matches(markup, self.text): found = markup else: @@ -1503,7 +1732,7 @@ class SoupStrainer(object): # True matches any non-None value. return markup is not None - if isinstance(match_against, Callable): + if isinstance(match_against, collections.Callable): return match_against(markup) # Custom callables take the tag as an argument, but all @@ -1520,7 +1749,7 @@ class SoupStrainer(object): return not match_against if (hasattr(match_against, '__iter__') - and not isinstance(match_against, str)): + and not isinstance(match_against, basestring)): # We're asked to match against an iterable of items. # The markup must be match at least one item in the # iterable. We'll try each one in turn. @@ -1547,7 +1776,7 @@ class SoupStrainer(object): # the tag's name and once against its prefixed name. match = False - if not match and isinstance(match_against, str): + if not match and isinstance(match_against, unicode): # Exact string match match = markup == match_against diff --git a/libs/bs4/formatter.py b/libs/bs4/formatter.py deleted file mode 100644 index 7dbaa3850..000000000 --- a/libs/bs4/formatter.py +++ /dev/null @@ -1,99 +0,0 @@ -from bs4.dammit import EntitySubstitution - -class Formatter(EntitySubstitution): - """Describes a strategy to use when outputting a parse tree to a string. - - Some parts of this strategy come from the distinction between - HTML4, HTML5, and XML. Others are configurable by the user. - """ - # Registries of XML and HTML formatters. - XML_FORMATTERS = {} - HTML_FORMATTERS = {} - - HTML = 'html' - XML = 'xml' - - HTML_DEFAULTS = dict( - cdata_containing_tags=set(["script", "style"]), - ) - - def _default(self, language, value, kwarg): - if value is not None: - return value - if language == self.XML: - return set() - return self.HTML_DEFAULTS[kwarg] - - def __init__( - self, language=None, entity_substitution=None, - void_element_close_prefix='/', cdata_containing_tags=None, - ): - """ - - :param void_element_close_prefix: By default, represent void - elements as <tag/> rather than <tag> - """ - self.language = language - self.entity_substitution = entity_substitution - self.void_element_close_prefix = void_element_close_prefix - self.cdata_containing_tags = self._default( - language, cdata_containing_tags, 'cdata_containing_tags' - ) - - def substitute(self, ns): - """Process a string that needs to undergo entity substitution.""" - if not self.entity_substitution: - return ns - from .element import NavigableString - if (isinstance(ns, NavigableString) - and ns.parent is not None - and ns.parent.name in self.cdata_containing_tags): - # Do nothing. - return ns - # Substitute. - return self.entity_substitution(ns) - - def attribute_value(self, value): - """Process the value of an attribute.""" - return self.substitute(value) - - def attributes(self, tag): - """Reorder a tag's attributes however you want.""" - return sorted(tag.attrs.items()) - - -class HTMLFormatter(Formatter): - REGISTRY = {} - def __init__(self, *args, **kwargs): - return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) - - -class XMLFormatter(Formatter): - REGISTRY = {} - def __init__(self, *args, **kwargs): - return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) - - -# Set up aliases for the default formatters. -HTMLFormatter.REGISTRY['html'] = HTMLFormatter( - entity_substitution=EntitySubstitution.substitute_html -) -HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( - entity_substitution=EntitySubstitution.substitute_html, - void_element_close_prefix = None -) -HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( - entity_substitution=EntitySubstitution.substitute_xml -) -HTMLFormatter.REGISTRY[None] = HTMLFormatter( - entity_substitution=None -) -XMLFormatter.REGISTRY["html"] = XMLFormatter( - entity_substitution=EntitySubstitution.substitute_html -) -XMLFormatter.REGISTRY["minimal"] = XMLFormatter( - entity_substitution=EntitySubstitution.substitute_xml -) -XMLFormatter.REGISTRY[None] = Formatter( - Formatter(Formatter.XML, entity_substitution=None) -) diff --git a/libs/bs4/testing.py b/libs/bs4/testing.py index cc9966601..6ba2506c4 100644 --- a/libs/bs4/testing.py +++ b/libs/bs4/testing.py @@ -1,7 +1,7 @@ -# encoding: utf-8 """Helper classes for tests.""" -# Use of this source code is governed by the MIT license. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. __license__ = "MIT" import pickle @@ -16,66 +16,29 @@ from bs4.element import ( ContentMetaAttributeValue, Doctype, SoupStrainer, - Tag ) from bs4.builder import HTMLParserTreeBuilder default_builder = HTMLParserTreeBuilder -BAD_DOCUMENT = """A bare string -<!DOCTYPE xsl:stylesheet SYSTEM "htmlent.dtd"> -<!DOCTYPE xsl:stylesheet PUBLIC "htmlent.dtd"> -<div><![CDATA[A CDATA section where it doesn't belong]]></div> -<div><svg><![CDATA[HTML5 does allow CDATA sections in SVG]]></svg></div> -<div>A <meta> tag</div> -<div>A <br> tag that supposedly has contents.</br></div> -<div>AT&T</div> -<div><textarea>Within a textarea, markup like <b> tags and <&<& should be treated as literal</textarea></div> -<div><script>if (i < 2) { alert("<b>Markup within script tags should be treated as literal.</b>"); }</script></div> -<div>This numeric entity is missing the final semicolon: <x t="piñata"></div> -<div><a href="http://example.com/</a> that attribute value never got closed</div> -<div><a href="foo</a>, </a><a href="bar">that attribute value was closed by the subsequent tag</a></div> -<! This document starts with a bogus declaration ><div>a</div> -<div>This document contains <!an incomplete declaration <div>(do you see it?)</div> -<div>This document ends with <!an incomplete declaration -<div><a style={height:21px;}>That attribute value was bogus</a></div> -<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">The doctype is invalid because it contains extra whitespace -<div><table><td nowrap>That boolean attribute had no value</td></table></div> -<div>Here's a nonexistent entity: &#foo; (do you see it?)</div> -<div>This document ends before the entity finishes: > -<div><p>Paragraphs shouldn't contain block display elements, but this one does: <dl><dt>you see?</dt></p> -<b b="20" a="1" b="10" a="2" a="3" a="4">Multiple values for the same attribute.</b> -<div><table><tr><td>Here's a table</td></tr></table></div> -<div><table id="1"><tr><td>Here's a nested table:<table id="2"><tr><td>foo</td></tr></table></td></div> -<div>This tag contains nothing but whitespace: <b> </b></div> -<div><blockquote><p><b>This p tag is cut off by</blockquote></p>the end of the blockquote tag</div> -<div><table><div>This table contains bare markup</div></table></div> -<div><div id="1">\n <a href="link1">This link is never closed.\n</div>\n<div id="2">\n <div id="3">\n <a href="link2">This link is closed.</a>\n </div>\n</div></div> -<div>This document contains a <!DOCTYPE surprise>surprise doctype</div> -<div><a><B><Cd><EFG>Mixed case tags are folded to lowercase</efg></CD></b></A></div> -<div><our\u2603>Tag name contains Unicode characters</our\u2603></div> -<div><a \u2603="snowman">Attribute name contains Unicode characters</a></div> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -""" - class SoupTest(unittest.TestCase): @property def default_builder(self): - return default_builder + return default_builder() def soup(self, markup, **kwargs): """Build a Beautiful Soup object from markup.""" builder = kwargs.pop('builder', self.default_builder) return BeautifulSoup(markup, builder=builder, **kwargs) - def document_for(self, markup, **kwargs): + def document_for(self, markup): """Turn an HTML fragment into a document. The details depend on the builder. """ - return self.default_builder(**kwargs).test_fragment_to_document(markup) + return self.default_builder.test_fragment_to_document(markup) def assertSoupEquals(self, to_parse, compare_parsed_to=None): builder = self.default_builder @@ -96,121 +59,6 @@ class SoupTest(unittest.TestCase): self.assertEqual(earlier, e.previous_element) earlier = e - def linkage_validator(self, el, _recursive_call=False): - """Ensure proper linkage throughout the document.""" - descendant = None - # Document element should have no previous element or previous sibling. - # It also shouldn't have a next sibling. - if el.parent is None: - assert el.previous_element is None,\ - "Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( - el, el.previous_element, None - ) - assert el.previous_sibling is None,\ - "Bad previous_sibling\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( - el, el.previous_sibling, None - ) - assert el.next_sibling is None,\ - "Bad next_sibling\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format( - el, el.next_sibling, None - ) - - idx = 0 - child = None - last_child = None - last_idx = len(el.contents) - 1 - for child in el.contents: - descendant = None - - # Parent should link next element to their first child - # That child should have no previous sibling - if idx == 0: - if el.parent is not None: - assert el.next_element is child,\ - "Bad next_element\nNODE: {}\nNEXT: {}\nEXPECTED: {}".format( - el, el.next_element, child - ) - assert child.previous_element is el,\ - "Bad previous_element\nNODE: {}\nPREV: {}\nEXPECTED: {}".format( - child, child.previous_element, el - ) - assert child.previous_sibling is None,\ - "Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED: {}".format( - child, child.previous_sibling, None - ) - - # If not the first child, previous index should link as sibling to this index - # Previous element should match the last index or the last bubbled up descendant - else: - assert child.previous_sibling is el.contents[idx - 1],\ - "Bad previous_sibling\nNODE: {}\nPREV {}\nEXPECTED {}".format( - child, child.previous_sibling, el.contents[idx - 1] - ) - assert el.contents[idx - 1].next_sibling is child,\ - "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( - el.contents[idx - 1], el.contents[idx - 1].next_sibling, child - ) - - if last_child is not None: - assert child.previous_element is last_child,\ - "Bad previous_element\nNODE: {}\nPREV {}\nEXPECTED {}\nCONTENTS {}".format( - child, child.previous_element, last_child, child.parent.contents - ) - assert last_child.next_element is child,\ - "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( - last_child, last_child.next_element, child - ) - - if isinstance(child, Tag) and child.contents: - descendant = self.linkage_validator(child, True) - # A bubbled up descendant should have no next siblings - assert descendant.next_sibling is None,\ - "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( - descendant, descendant.next_sibling, None - ) - - # Mark last child as either the bubbled up descendant or the current child - if descendant is not None: - last_child = descendant - else: - last_child = child - - # If last child, there are non next siblings - if idx == last_idx: - assert child.next_sibling is None,\ - "Bad next_sibling\nNODE: {}\nNEXT {}\nEXPECTED {}".format( - child, child.next_sibling, None - ) - idx += 1 - - child = descendant if descendant is not None else child - if child is None: - child = el - - if not _recursive_call and child is not None: - target = el - while True: - if target is None: - assert child.next_element is None, \ - "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( - child, child.next_element, None - ) - break - elif target.next_sibling is not None: - assert child.next_element is target.next_sibling, \ - "Bad next_element\nNODE: {}\nNEXT {}\nEXPECTED {}".format( - child, child.next_element, target.next_sibling - ) - break - target = target.parent - - # We are done, so nothing to return - return None - else: - # Return the child to the recursive caller - return child - - class HTMLTreeBuilderSmokeTest(object): """A basic test of a treebuilder's competence. @@ -232,7 +80,7 @@ class HTMLTreeBuilderSmokeTest(object): soup = self.soup("") new_tag = soup.new_tag(name) self.assertEqual(True, new_tag.is_empty_element) - + def test_pickle_and_unpickle_identity(self): # Pickling a tree, then unpickling it, yields a tree identical # to the original. @@ -302,20 +150,12 @@ class HTMLTreeBuilderSmokeTest(object): soup.encode("utf-8").replace(b"\n", b""), markup.replace(b"\n", b"")) - def test_namespaced_html(self): - """When a namespaced XML document is parsed as HTML it should - be treated as HTML with weird tag names. - """ - markup = b"""<ns1:foo>content</ns1:foo><ns1:foo/><ns2:foo/>""" - soup = self.soup(markup) - self.assertEqual(2, len(soup.find_all("ns1:foo"))) - def test_processing_instruction(self): # We test both Unicode and bytestring to verify that # process_markup correctly sets processing_instruction_class # even when the markup is already Unicode and there is no # need to process anything. - markup = """<?PITarget PIContent?>""" + markup = u"""<?PITarget PIContent?>""" soup = self.soup(markup) self.assertEqual(markup, soup.decode()) @@ -452,18 +292,6 @@ Hello, world! "<tbody><tr><td>Bar</td></tr></tbody>" "<tfoot><tr><td>Baz</td></tr></tfoot></table>") - def test_multivalued_attribute_with_whitespace(self): - # Whitespace separating the values of a multi-valued attribute - # should be ignored. - - markup = '<div class=" foo bar "></a>' - soup = self.soup(markup) - self.assertEqual(['foo', 'bar'], soup.div['class']) - - # If you search by the literal name of the class it's like the whitespace - # wasn't there. - self.assertEqual(soup.div, soup.find('div', class_="foo bar")) - def test_deeply_nested_multivalued_attribute(self): # html5lib can set the attributes of the same tag many times # as it rearranges the tree. This has caused problems with @@ -483,41 +311,15 @@ Hello, world! def test_angle_brackets_in_attribute_values_are_escaped(self): self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>') - def test_strings_resembling_character_entity_references(self): - # "&T" and "&p" look like incomplete character entities, but they are - # not. - self.assertSoupEquals( - "<p>• AT&T is in the s&p 500</p>", - "<p>\u2022 AT&T is in the s&p 500</p>" - ) - - def test_apos_entity(self): - self.assertSoupEquals( - "<p>Bob's Bar</p>", - "<p>Bob's Bar</p>", - ) - - def test_entities_in_foreign_document_encoding(self): - # “ and ” are invalid numeric entities referencing - # Windows-1252 characters. - references a character common - # to Windows-1252 and Unicode, and ☃ references a - # character only found in Unicode. - # - # All of these entities should be converted to Unicode - # characters. - markup = "<p>“Hello” -☃</p>" - soup = self.soup(markup) - self.assertEqual("“Hello” -☃", soup.p.string) - def test_entities_in_attributes_converted_to_unicode(self): - expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>' + expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>' self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) def test_entities_in_text_converted_to_unicode(self): - expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>' + expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>' self.assertSoupEquals("<p>piñata</p>", expect) self.assertSoupEquals("<p>piñata</p>", expect) self.assertSoupEquals("<p>piñata</p>", expect) @@ -528,11 +330,11 @@ Hello, world! '<p>I said "good day!"</p>') def test_out_of_range_entity(self): - expect = "\N{REPLACEMENT CHARACTER}" + expect = u"\N{REPLACEMENT CHARACTER}" self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) - + def test_multipart_strings(self): "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." soup = self.soup("<html><h2>\nfoo</h2><p></p></html>") @@ -606,9 +408,9 @@ Hello, world! # A seemingly innocuous document... but it's in Unicode! And # it contains characters that can't be represented in the # encoding found in the declaration! The horror! - markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>' + markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>' soup = self.soup(markup) - self.assertEqual('Sacr\xe9 bleu!', soup.body.string) + self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) def test_soupstrainer(self): """Parsers should be able to work with SoupStrainers.""" @@ -648,7 +450,7 @@ Hello, world! # Both XML and HTML entities are converted to Unicode characters # during parsing. text = "<p><<sacré bleu!>></p>" - expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>" + expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>" self.assertSoupEquals(text, expected) def test_smart_quotes_converted_on_the_way_in(self): @@ -658,15 +460,15 @@ Hello, world! soup = self.soup(quote) self.assertEqual( soup.p.string, - "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") + u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") def test_non_breaking_spaces_converted_on_the_way_in(self): soup = self.soup("<a> </a>") - self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) + self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) def test_entities_converted_on_the_way_out(self): text = "<p><<sacré bleu!>></p>" - expected = "<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8") + expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8") soup = self.soup(text) self.assertEqual(soup.p.encode("utf-8"), expected) @@ -675,7 +477,7 @@ Hello, world! # easy-to-understand document. # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. - unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>' + unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>' # That's because we're going to encode it into ISO-Latin-1, and use # that to test. @@ -784,13 +586,6 @@ Hello, world! data.a['foo'] = 'bar' self.assertEqual('<a foo="bar">text</a>', data.a.decode()) - def test_worst_case(self): - """Test the worst case (currently) for linking issues.""" - - soup = self.soup(BAD_DOCUMENT) - self.linkage_validator(soup) - - class XMLTreeBuilderSmokeTest(object): def test_pickle_and_unpickle_identity(self): @@ -829,17 +624,6 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual( soup.encode("utf-8"), markup) - def test_nested_namespaces(self): - doc = b"""<?xml version="1.0" encoding="utf-8"?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> -<parent xmlns="http://ns1/"> -<child xmlns="http://ns2/" xmlns:ns3="http://ns3/"> -<grandchild ns3:attr="value" xmlns="http://ns4/"/> -</child> -</parent>""" - soup = self.soup(doc) - self.assertEqual(doc, soup.encode()) - def test_formatter_processes_script_tag_for_xml_documents(self): doc = """ <script type="text/javascript"> @@ -853,15 +637,15 @@ class XMLTreeBuilderSmokeTest(object): self.assertTrue(b"< < hey > >" in encoded) def test_can_parse_unicode_document(self): - markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>' + markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>' soup = self.soup(markup) - self.assertEqual('Sacr\xe9 bleu!', soup.root.string) + self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) def test_popping_namespaced_tag(self): markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>' soup = self.soup(markup) self.assertEqual( - str(soup.rss), markup) + unicode(soup.rss), markup) def test_docstring_includes_correct_encoding(self): soup = self.soup("<root/>") @@ -892,17 +676,17 @@ class XMLTreeBuilderSmokeTest(object): def test_closing_namespaced_tag(self): markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>' soup = self.soup(markup) - self.assertEqual(str(soup.p), markup) + self.assertEqual(unicode(soup.p), markup) def test_namespaced_attributes(self): markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>' soup = self.soup(markup) - self.assertEqual(str(soup.foo), markup) + self.assertEqual(unicode(soup.foo), markup) def test_namespaced_attributes_xml_namespace(self): markup = '<foo xml:lang="fr">bar</foo>' soup = self.soup(markup) - self.assertEqual(str(soup.foo), markup) + self.assertEqual(unicode(soup.foo), markup) def test_find_by_prefixed_name(self): doc = """<?xml version="1.0" encoding="utf-8"?> @@ -937,12 +721,6 @@ class XMLTreeBuilderSmokeTest(object): # The two tags have the same namespace prefix. self.assertEqual(tag.prefix, duplicate.prefix) - def test_worst_case(self): - """Test the worst case (currently) for linking issues.""" - - soup = self.soup(BAD_DOCUMENT) - self.linkage_validator(soup) - class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): """Smoke test for a tree builder that supports HTML5.""" diff --git a/libs/bs4/tests/test_html5lib.py b/libs/bs4/tests/test_html5lib.py index 96529b0b3..0f89d6244 100644 --- a/libs/bs4/tests/test_html5lib.py +++ b/libs/bs4/tests/test_html5lib.py @@ -5,7 +5,7 @@ import warnings try: from bs4.builder import HTML5TreeBuilder HTML5LIB_PRESENT = True -except ImportError as e: +except ImportError, e: HTML5LIB_PRESENT = False from bs4.element import SoupStrainer from bs4.testing import ( @@ -22,7 +22,7 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): @property def default_builder(self): - return HTML5TreeBuilder + return HTML5TreeBuilder() def test_soupstrainer(self): # The html5lib tree builder does not support SoupStrainers. @@ -74,14 +74,14 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): def test_reparented_markup(self): markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>' soup = self.soup(markup) - self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) + self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) self.assertEqual(2, len(soup.find_all('p'))) def test_reparented_markup_ends_with_whitespace(self): markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n' soup = self.soup(markup) - self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) + self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) self.assertEqual(2, len(soup.find_all('p'))) def test_reparented_markup_containing_identical_whitespace_nodes(self): @@ -127,44 +127,4 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): def test_foster_parenting(self): markup = b"""<table><td></tbody>A""" soup = self.soup(markup) - self.assertEqual("<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode()) - - def test_extraction(self): - """ - Test that extraction does not destroy the tree. - - https://bugs.launchpad.net/beautifulsoup/+bug/1782928 - """ - - markup = """ -<html><head></head> -<style> -</style><script></script><body><p>hello</p></body></html> -""" - soup = self.soup(markup) - [s.extract() for s in soup('script')] - [s.extract() for s in soup('style')] - - self.assertEqual(len(soup.find_all("p")), 1) - - def test_empty_comment(self): - """ - Test that empty comment does not break structure. - - https://bugs.launchpad.net/beautifulsoup/+bug/1806598 - """ - - markup = """ -<html> -<body> -<form> -<!----><input type="text"> -</form> -</body> -</html> -""" - soup = self.soup(markup) - inputs = [] - for form in soup.find_all('form'): - inputs.extend(form.find_all('input')) - self.assertEqual(len(inputs), 1) + self.assertEqual(u"<body>A<table><tbody><tr><td></td></tr></tbody></table></body>", soup.body.decode()) diff --git a/libs/bs4/tests/test_htmlparser.py b/libs/bs4/tests/test_htmlparser.py index 790489aa1..d5cf0253f 100644 --- a/libs/bs4/tests/test_htmlparser.py +++ b/libs/bs4/tests/test_htmlparser.py @@ -5,11 +5,12 @@ from pdb import set_trace import pickle from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest from bs4.builder import HTMLParserTreeBuilder -from bs4.builder._htmlparser import BeautifulSoupHTMLParser class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - default_builder = HTMLParserTreeBuilder + @property + def default_builder(self): + return HTMLParserTreeBuilder() def test_namespaced_system_doctype(self): # html.parser can't handle namespaced doctypes, so skip this one. @@ -31,17 +32,3 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): def test_redundant_empty_element_closing_tags(self): self.assertSoupEquals('<br></br><br></br><br></br>', "<br/><br/><br/>") self.assertSoupEquals('</br></br></br>', "") - - def test_empty_element(self): - # This verifies that any buffered data present when the parser - # finishes working is handled. - self.assertSoupEquals("foo &# bar", "foo &# bar") - - -class TestHTMLParserSubclass(SoupTest): - def test_error(self): - """Verify that our HTMLParser subclass implements error() in a way - that doesn't cause a crash. - """ - parser = BeautifulSoupHTMLParser() - parser.error("don't crash") diff --git a/libs/bs4/tests/test_lxml.py b/libs/bs4/tests/test_lxml.py index 29da71149..a05870b91 100644 --- a/libs/bs4/tests/test_lxml.py +++ b/libs/bs4/tests/test_lxml.py @@ -7,7 +7,7 @@ try: import lxml.etree LXML_PRESENT = True LXML_VERSION = lxml.etree.LXML_VERSION -except ImportError as e: +except ImportError, e: LXML_PRESENT = False LXML_VERSION = (0,) @@ -36,7 +36,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): @property def default_builder(self): - return LXMLTreeBuilder + return LXMLTreeBuilder() def test_out_of_range_entity(self): self.assertSoupEquals( @@ -46,12 +46,6 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): self.assertSoupEquals( "<p>foo�bar</p>", "<p>foobar</p>") - def test_entities_in_foreign_document_encoding(self): - # We can't implement this case correctly because by the time we - # hear about markup like "“", it's been (incorrectly) converted into - # a string like u'\x93' - pass - # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this # test if an old version of lxml is installed. @@ -68,7 +62,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): # if one is installed. with warnings.catch_warnings(record=True) as w: soup = BeautifulStoneSoup("<b />") - self.assertEqual("<b/>", str(soup.b)) + self.assertEqual(u"<b/>", unicode(soup.b)) self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) @skipIf( @@ -79,22 +73,4 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): @property def default_builder(self): - return LXMLTreeBuilderForXML - - def test_namespace_indexing(self): - # We should not track un-prefixed namespaces as we can only hold one - # and it will be recognized as the default namespace by soupsieve, - # which may be confusing in some situations. When no namespace is provided - # for a selector, the default namespace (if defined) is assumed. - - soup = self.soup( - '<?xml version="1.1"?>\n' - '<root>' - '<tag xmlns="http://unprefixed-namespace.com">content</tag>' - '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>' - '</root>' - ) - self.assertEqual( - soup._namespaces, - {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'} - ) + return LXMLTreeBuilderForXML() diff --git a/libs/bs4/tests/test_soup.py b/libs/bs4/tests/test_soup.py index 1eda9484b..f3e69edf3 100644 --- a/libs/bs4/tests/test_soup.py +++ b/libs/bs4/tests/test_soup.py @@ -24,7 +24,6 @@ from bs4.dammit import ( EncodingDetector, ) from bs4.testing import ( - default_builder, SoupTest, skipIf, ) @@ -33,7 +32,7 @@ import warnings try: from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML LXML_PRESENT = True -except ImportError as e: +except ImportError, e: LXML_PRESENT = False PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) @@ -41,86 +40,21 @@ PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) class TestConstructor(SoupTest): def test_short_unicode_input(self): - data = "<h1>éé</h1>" + data = u"<h1>éé</h1>" soup = self.soup(data) - self.assertEqual("éé", soup.h1.string) + self.assertEqual(u"éé", soup.h1.string) def test_embedded_null(self): - data = "<h1>foo\0bar</h1>" + data = u"<h1>foo\0bar</h1>" soup = self.soup(data) - self.assertEqual("foo\0bar", soup.h1.string) + self.assertEqual(u"foo\0bar", soup.h1.string) def test_exclude_encodings(self): - utf8_data = "Räksmörgås".encode("utf-8") + utf8_data = u"Räksmörgås".encode("utf-8") soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) self.assertEqual("windows-1252", soup.original_encoding) - def test_custom_builder_class(self): - # Verify that you can pass in a custom Builder class and - # it'll be instantiated with the appropriate keyword arguments. - class Mock(object): - def __init__(self, **kwargs): - self.called_with = kwargs - self.is_xml = True - def initialize_soup(self, soup): - pass - def prepare_markup(self, *args, **kwargs): - return '' - - kwargs = dict( - var="value", - # This is a deprecated BS3-era keyword argument, which - # will be stripped out. - convertEntities=True, - ) - with warnings.catch_warnings(record=True): - soup = BeautifulSoup('', builder=Mock, **kwargs) - assert isinstance(soup.builder, Mock) - self.assertEqual(dict(var="value"), soup.builder.called_with) - - # You can also instantiate the TreeBuilder yourself. In this - # case, that specific object is used and any keyword arguments - # to the BeautifulSoup constructor are ignored. - builder = Mock(**kwargs) - with warnings.catch_warnings(record=True) as w: - soup = BeautifulSoup( - '', builder=builder, ignored_value=True, - ) - msg = str(w[0].message) - assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.") - self.assertEqual(builder, soup.builder) - self.assertEqual(kwargs, builder.called_with) - - def test_cdata_list_attributes(self): - # Most attribute values are represented as scalars, but the - # HTML standard says that some attributes, like 'class' have - # space-separated lists as values. - markup = '<a id=" an id " class=" a class "></a>' - soup = self.soup(markup) - - # Note that the spaces are stripped for 'class' but not for 'id'. - a = soup.a - self.assertEqual(" an id ", a['id']) - self.assertEqual(["a", "class"], a['class']) - - # TreeBuilder takes an argument called 'mutli_valued_attributes' which lets - # you customize or disable this. As always, you can customize the TreeBuilder - # by passing in a keyword argument to the BeautifulSoup constructor. - soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None) - self.assertEqual(" a class ", soup.a['class']) - - # Here are two ways of saying that `id` is a multi-valued - # attribute in this context, but 'class' is not. - for switcheroo in ({'*': 'id'}, {'a': 'id'}): - with warnings.catch_warnings(record=True) as w: - # This will create a warning about not explicitly - # specifying a parser, but we'll ignore it. - soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo) - a = soup.a - self.assertEqual(["an", "id"], a['id']) - self.assertEqual(" a class ", a['class']) - - + class TestWarnings(SoupTest): def _no_parser_specified(self, s, is_there=True): @@ -195,7 +129,7 @@ class TestWarnings(SoupTest): with warnings.catch_warnings(record=True) as warning_list: # note - this url must differ from the bytes one otherwise # python's warnings system swallows the second warning - soup = self.soup("http://www.crummyunicode.com/") + soup = self.soup(u"http://www.crummyunicode.com/") self.assertTrue(any("looks like a URL" in str(w.message) for w in warning_list)) @@ -207,7 +141,7 @@ class TestWarnings(SoupTest): def test_url_warning_with_unicode_and_space(self): with warnings.catch_warnings(record=True) as warning_list: - soup = self.soup("http://www.crummyuncode.com/ is great") + soup = self.soup(u"http://www.crummyuncode.com/ is great") self.assertFalse(any("looks like a URL" in str(w.message) for w in warning_list)) @@ -229,9 +163,9 @@ class TestEntitySubstitution(unittest.TestCase): def test_simple_html_substitution(self): # Unicode characters corresponding to named HTML entites # are substituted, and no others. - s = "foo\u2200\N{SNOWMAN}\u00f5bar" + s = u"foo\u2200\N{SNOWMAN}\u00f5bar" self.assertEqual(self.sub.substitute_html(s), - "foo∀\N{SNOWMAN}õbar") + u"foo∀\N{SNOWMAN}õbar") def test_smart_quote_substitution(self): # MS smart quotes are a common source of frustration, so we @@ -283,7 +217,7 @@ class TestEntitySubstitution(unittest.TestCase): self.assertEqual( self.sub.substitute_xml_containing_entities("ÁT&T"), "ÁT&T") - + def test_quotes_not_html_substituted(self): """There's no need to do this except inside attribute values.""" text = 'Bob\'s "bar"' @@ -296,7 +230,7 @@ class TestEncodingConversion(SoupTest): def setUp(self): super(TestEncodingConversion, self).setUp() - self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' + self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' self.utf8_data = self.unicode_data.encode("utf-8") # Just so you know what it looks like. self.assertEqual( @@ -316,7 +250,7 @@ class TestEncodingConversion(SoupTest): ascii = b"<foo>a</foo>" soup_from_ascii = self.soup(ascii) unicode_output = soup_from_ascii.decode() - self.assertTrue(isinstance(unicode_output, str)) + self.assertTrue(isinstance(unicode_output, unicode)) self.assertEqual(unicode_output, self.document_for(ascii.decode())) self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") finally: @@ -328,7 +262,7 @@ class TestEncodingConversion(SoupTest): # is not set. soup_from_unicode = self.soup(self.unicode_data) self.assertEqual(soup_from_unicode.decode(), self.unicode_data) - self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!') + self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') self.assertEqual(soup_from_unicode.original_encoding, None) def test_utf8_in_unicode_out(self): @@ -336,7 +270,7 @@ class TestEncodingConversion(SoupTest): # attribute is set. soup_from_utf8 = self.soup(self.utf8_data) self.assertEqual(soup_from_utf8.decode(), self.unicode_data) - self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!') + self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') def test_utf8_out(self): # The internal data structures can be encoded as UTF-8. @@ -347,14 +281,14 @@ class TestEncodingConversion(SoupTest): PYTHON_3_PRE_3_2, "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") def test_attribute_name_containing_unicode_characters(self): - markup = '<div><a \N{SNOWMAN}="snowman"></a></div>' + markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>' self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) class TestUnicodeDammit(unittest.TestCase): """Standalone tests of UnicodeDammit.""" def test_unicode_input(self): - markup = "I'm already Unicode! \N{SNOWMAN}" + markup = u"I'm already Unicode! \N{SNOWMAN}" dammit = UnicodeDammit(markup) self.assertEqual(dammit.unicode_markup, markup) @@ -362,7 +296,7 @@ class TestUnicodeDammit(unittest.TestCase): markup = b"<foo>\x91\x92\x93\x94</foo>" dammit = UnicodeDammit(markup) self.assertEqual( - dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>") + dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>") def test_smart_quotes_to_xml_entities(self): markup = b"<foo>\x91\x92\x93\x94</foo>" @@ -386,14 +320,14 @@ class TestUnicodeDammit(unittest.TestCase): utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83" dammit = UnicodeDammit(utf8) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') - self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}') + self.assertEqual(dammit.unicode_markup, u'Sacr\xe9 bleu! \N{SNOWMAN}') def test_convert_hebrew(self): hebrew = b"\xed\xe5\xec\xf9" dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8') - self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9') + self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9') def test_dont_see_smart_quotes_where_there_are_none(self): utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" @@ -402,19 +336,19 @@ class TestUnicodeDammit(unittest.TestCase): self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) def test_ignore_inappropriate_codecs(self): - utf8_data = "Räksmörgås".encode("utf-8") + utf8_data = u"Räksmörgås".encode("utf-8") dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') def test_ignore_invalid_codecs(self): - utf8_data = "Räksmörgås".encode("utf-8") + utf8_data = u"Räksmörgås".encode("utf-8") for bad_encoding in ['.utf8', '...', 'utF---16.!']: dammit = UnicodeDammit(utf8_data, [bad_encoding]) self.assertEqual(dammit.original_encoding.lower(), 'utf-8') def test_exclude_encodings(self): # This is UTF-8. - utf8_data = "Räksmörgås".encode("utf-8") + utf8_data = u"Räksmörgås".encode("utf-8") # But if we exclude UTF-8 from consideration, the guess is # Windows-1252. @@ -430,7 +364,7 @@ class TestUnicodeDammit(unittest.TestCase): detected = EncodingDetector( b'<?xml version="1.0" encoding="UTF-\xdb" ?>') encodings = list(detected.encodings) - assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings + assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings def test_detect_html5_style_meta_tag(self): @@ -470,7 +404,7 @@ class TestUnicodeDammit(unittest.TestCase): bs4.dammit.chardet_dammit = noop dammit = UnicodeDammit(doc) self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue("\ufffd" in dammit.unicode_markup) + self.assertTrue(u"\ufffd" in dammit.unicode_markup) soup = BeautifulSoup(doc, "html.parser") self.assertTrue(soup.contains_replacement_characters) @@ -482,17 +416,17 @@ class TestUnicodeDammit(unittest.TestCase): # A document written in UTF-16LE will have its byte order marker stripped. data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' dammit = UnicodeDammit(data) - self.assertEqual("<a>áé</a>", dammit.unicode_markup) + self.assertEqual(u"<a>áé</a>", dammit.unicode_markup) self.assertEqual("utf-16le", dammit.original_encoding) def test_detwingle(self): # Here's a UTF8 document. - utf8 = ("\N{SNOWMAN}" * 3).encode("utf8") + utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8") # Here's a Windows-1252 document. windows_1252 = ( - "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" - "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") + u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" + u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") # Through some unholy alchemy, they've been stuck together. doc = utf8 + windows_1252 + utf8 @@ -507,7 +441,7 @@ class TestUnicodeDammit(unittest.TestCase): fixed = UnicodeDammit.detwingle(doc) self.assertEqual( - "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) + u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) def test_detwingle_ignores_multibyte_characters(self): # Each of these characters has a UTF-8 representation ending @@ -515,9 +449,9 @@ class TestUnicodeDammit(unittest.TestCase): # Windows-1252. But our code knows to skip over multibyte # UTF-8 characters, so they'll survive the process unscathed. for tricky_unicode_char in ( - "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' - "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' - "\xf0\x90\x90\x93", # This is a CJK character, not sure which one. + u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' + u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' + u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one. ): input = tricky_unicode_char.encode("utf8") self.assertTrue(input.endswith(b'\x93')) diff --git a/libs/bs4/tests/test_tree.py b/libs/bs4/tests/test_tree.py index 3b4beeb8f..c0e7c4080 100644 --- a/libs/bs4/tests/test_tree.py +++ b/libs/bs4/tests/test_tree.py @@ -1,3 +1,4 @@ + # -*- coding: utf-8 -*- """Tests for Beautiful Soup's tree traversal methods. @@ -25,7 +26,6 @@ from bs4.element import ( Comment, Declaration, Doctype, - Formatter, NavigableString, SoupStrainer, Tag, @@ -71,13 +71,13 @@ class TestFind(TreeTest): self.assertEqual(soup.find("b").string, "2") def test_unicode_text_find(self): - soup = self.soup('<h1>Räksmörgås</h1>') - self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås') + soup = self.soup(u'<h1>Räksmörgås</h1>') + self.assertEqual(soup.find(string=u'Räksmörgås'), u'Räksmörgås') def test_unicode_attribute_find(self): - soup = self.soup('<h1 id="Räksmörgås">here it is</h1>') + soup = self.soup(u'<h1 id="Räksmörgås">here it is</h1>') str(soup) - self.assertEqual("here it is", soup.find(id='Räksmörgås').text) + self.assertEqual("here it is", soup.find(id=u'Räksmörgås').text) def test_find_everything(self): @@ -97,17 +97,17 @@ class TestFindAll(TreeTest): """You can search the tree for text nodes.""" soup = self.soup("<html>Foo<b>bar</b>\xbb</html>") # Exact match. - self.assertEqual(soup.find_all(string="bar"), ["bar"]) - self.assertEqual(soup.find_all(text="bar"), ["bar"]) + self.assertEqual(soup.find_all(string="bar"), [u"bar"]) + self.assertEqual(soup.find_all(text="bar"), [u"bar"]) # Match any of a number of strings. self.assertEqual( - soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"]) + soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) # Match a regular expression. self.assertEqual(soup.find_all(text=re.compile('.*')), - ["Foo", "bar", '\xbb']) + [u"Foo", u"bar", u'\xbb']) # Match anything. self.assertEqual(soup.find_all(text=True), - ["Foo", "bar", '\xbb']) + [u"Foo", u"bar", u'\xbb']) def test_find_all_limit(self): """You can limit the number of items returned by find_all.""" @@ -250,8 +250,8 @@ class TestFindAllByAttribute(TreeTest): ["Matching a.", "Matching b."]) def test_find_all_by_utf8_attribute_value(self): - peace = "םולש".encode("utf8") - data = '<a title="םולש"></a>'.encode("utf8") + peace = u"םולש".encode("utf8") + data = u'<a title="םולש"></a>'.encode("utf8") soup = self.soup(data) self.assertEqual([soup.a], soup.find_all(title=peace)) self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) @@ -417,48 +417,6 @@ class TestFindAllByAttribute(TreeTest): self.assertEqual([], soup.find_all(id=1, text="bar")) -class TestSmooth(TreeTest): - """Test Tag.smooth.""" - - def test_smooth(self): - soup = self.soup("<div>a</div>") - div = soup.div - div.append("b") - div.append("c") - div.append(Comment("Comment 1")) - div.append(Comment("Comment 2")) - div.append("d") - builder = self.default_builder() - span = Tag(soup, builder, 'span') - span.append('1') - span.append('2') - div.append(span) - - # At this point the tree has a bunch of adjacent - # NavigableStrings. This is normal, but it has no meaning in - # terms of HTML, so we may want to smooth things out for - # output. - - # Since the <span> tag has two children, its .string is None. - self.assertEqual(None, div.span.string) - - self.assertEqual(7, len(div.contents)) - div.smooth() - self.assertEqual(5, len(div.contents)) - - # The three strings at the beginning of div.contents have been - # merged into on string. - # - self.assertEqual('abc', div.contents[0]) - - # The call is recursive -- the <span> tag was also smoothed. - self.assertEqual('12', div.span.string) - - # The two comments have _not_ been merged, even though - # comments are strings. Merging comments would change the - # meaning of the HTML. - self.assertEqual('Comment 1', div.contents[1]) - self.assertEqual('Comment 2', div.contents[2]) class TestIndex(TreeTest): @@ -647,7 +605,7 @@ class SiblingTest(TreeTest): </html>''' # All that whitespace looks good but makes the tests more # difficult. Get rid of it. - markup = re.compile(r"\n\s*").sub("", markup) + markup = re.compile("\n\s*").sub("", markup) self.tree = self.soup(markup) @@ -745,12 +703,12 @@ class TestTagCreation(SoupTest): """Test the ability to create new tags.""" def test_new_tag(self): soup = self.soup("") - new_tag = soup.new_tag("foo", bar="baz", attrs={"name": "a name"}) + new_tag = soup.new_tag("foo", bar="baz") self.assertTrue(isinstance(new_tag, Tag)) self.assertEqual("foo", new_tag.name) - self.assertEqual(dict(bar="baz", name="a name"), new_tag.attrs) + self.assertEqual(dict(bar="baz"), new_tag.attrs) self.assertEqual(None, new_tag.parent) - + def test_tag_inherits_self_closing_rules_from_builder(self): if XML_BUILDER_PRESENT: xml_soup = BeautifulSoup("", "lxml-xml") @@ -863,26 +821,6 @@ class TestTreeModification(SoupTest): soup = self.soup(text) self.assertRaises(ValueError, soup.a.insert, 0, soup.a) - def test_insert_beautifulsoup_object_inserts_children(self): - """Inserting one BeautifulSoup object into another actually inserts all - of its children -- you'll never combine BeautifulSoup objects. - """ - soup = self.soup("<p>And now, a word:</p><p>And we're back.</p>") - - text = "<p>p2</p><p>p3</p>" - to_insert = self.soup(text) - soup.insert(1, to_insert) - - for i in soup.descendants: - assert not isinstance(i, BeautifulSoup) - - p1, p2, p3, p4 = list(soup.children) - self.assertEqual("And now, a word:", p1.string) - self.assertEqual("p2", p2.string) - self.assertEqual("p3", p3.string) - self.assertEqual("And we're back.", p4.string) - - def test_replace_with_maintains_next_element_throughout(self): soup = self.soup('<p><a>one</a><b>three</b></p>') a = soup.a @@ -939,7 +877,7 @@ class TestTreeModification(SoupTest): self.assertEqual(soup.a.contents[0].next_element, "bar") def test_insert_tag(self): - builder = self.default_builder() + builder = self.default_builder soup = self.soup( "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder) magic_tag = Tag(soup, builder, 'magictag') @@ -974,13 +912,6 @@ class TestTreeModification(SoupTest): soup.a.append(soup.b) self.assertEqual(data, soup.decode()) - def test_extend(self): - data = "<a><b><c><d><e><f><g></g></f></e></d></c></b></a>" - soup = self.soup(data) - l = [soup.g, soup.f, soup.e, soup.d, soup.c, soup.b] - soup.a.extend(l) - self.assertEqual("<a><g></g><f></f><e></e><d></d><c></c><b></b></a>", soup.decode()) - def test_move_tag_to_beginning_of_parent(self): data = "<a><b></b><c></c><d></d></a>" soup = self.soup(data) @@ -1007,29 +938,6 @@ class TestTreeModification(SoupTest): self.assertEqual( soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ")) - # Can't insert an element before itself. - b = soup.b - self.assertRaises(ValueError, b.insert_before, b) - - # Can't insert before if an element has no parent. - b.extract() - self.assertRaises(ValueError, b.insert_before, "nope") - - # Can insert an identical element - soup = self.soup("<a>") - soup.a.insert_before(soup.new_tag("a")) - - def test_insert_multiple_before(self): - soup = self.soup("<a>foo</a><b>bar</b>") - soup.b.insert_before("BAZ", " ", "QUUX") - soup.a.insert_before("QUUX", " ", "BAZ") - self.assertEqual( - soup.decode(), self.document_for("QUUX BAZ<a>foo</a>BAZ QUUX<b>bar</b>")) - - soup.a.insert_before(soup.b, "FOO") - self.assertEqual( - soup.decode(), self.document_for("QUUX BAZ<b>bar</b>FOO<a>foo</a>BAZ QUUX")) - def test_insert_after(self): soup = self.soup("<a>foo</a><b>bar</b>") soup.b.insert_after("BAZ") @@ -1040,28 +948,6 @@ class TestTreeModification(SoupTest): self.assertEqual( soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ")) - # Can't insert an element after itself. - b = soup.b - self.assertRaises(ValueError, b.insert_after, b) - - # Can't insert after if an element has no parent. - b.extract() - self.assertRaises(ValueError, b.insert_after, "nope") - - # Can insert an identical element - soup = self.soup("<a>") - soup.a.insert_before(soup.new_tag("a")) - - def test_insert_multiple_after(self): - soup = self.soup("<a>foo</a><b>bar</b>") - soup.b.insert_after("BAZ", " ", "QUUX") - soup.a.insert_after("QUUX", " ", "BAZ") - self.assertEqual( - soup.decode(), self.document_for("<a>foo</a>QUUX BAZ<b>bar</b>BAZ QUUX")) - soup.b.insert_after(soup.a, "FOO ") - self.assertEqual( - soup.decode(), self.document_for("QUUX BAZ<b>bar</b><a>foo</a>FOO BAZ QUUX")) - def test_insert_after_raises_exception_if_after_has_no_meaning(self): soup = self.soup("") tag = soup.new_tag("a") @@ -1225,7 +1111,7 @@ class TestTreeModification(SoupTest): <script>baz</script> </html>""") [soup.script.extract() for i in soup.find_all("script")] - self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body)) + self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body)) def test_extract_works_when_element_is_surrounded_by_identical_strings(self): @@ -1300,7 +1186,7 @@ class TestElementObjects(SoupTest): tag = soup.bTag self.assertEqual(soup.b, tag) self.assertEqual( - '.bTag is deprecated, use .find("b") instead. If you really were looking for a tag called bTag, use .find("bTag")', + '.bTag is deprecated, use .find("b") instead.', str(w[0].message)) def test_has_attr(self): @@ -1463,19 +1349,19 @@ class TestPersistence(SoupTest): soup = BeautifulSoup(b'<p> </p>', 'html.parser') encoding = soup.original_encoding copy = soup.__copy__() - self.assertEqual("<p> </p>", str(copy)) + self.assertEqual(u"<p> </p>", unicode(copy)) self.assertEqual(encoding, copy.original_encoding) def test_unicode_pickle(self): # A tree containing Unicode characters can be pickled. - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) loaded = pickle.loads(dumped) self.assertEqual(loaded.decode(), soup.decode()) def test_copy_navigablestring_is_not_attached_to_tree(self): - html = "<b>Foo<a></a></b><b>Bar</b>" + html = u"<b>Foo<a></a></b><b>Bar</b>" soup = self.soup(html) s1 = soup.find(string="Foo") s2 = copy.copy(s1) @@ -1487,7 +1373,7 @@ class TestPersistence(SoupTest): self.assertEqual(None, s2.previous_element) def test_copy_navigablestring_subclass_has_same_type(self): - html = "<b><!--Foo--></b>" + html = u"<b><!--Foo--></b>" soup = self.soup(html) s1 = soup.string s2 = copy.copy(s1) @@ -1495,19 +1381,19 @@ class TestPersistence(SoupTest): self.assertTrue(isinstance(s2, Comment)) def test_copy_entire_soup(self): - html = "<div><b>Foo<a></a></b><b>Bar</b></div>end" + html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end" soup = self.soup(html) soup_copy = copy.copy(soup) self.assertEqual(soup, soup_copy) def test_copy_tag_copies_contents(self): - html = "<div><b>Foo<a></a></b><b>Bar</b></div>end" + html = u"<div><b>Foo<a></a></b><b>Bar</b></div>end" soup = self.soup(html) div = soup.div div_copy = copy.copy(div) # The two tags look the same, and evaluate to equal. - self.assertEqual(str(div), str(div_copy)) + self.assertEqual(unicode(div), unicode(div_copy)) self.assertEqual(div, div_copy) # But they're not the same object. @@ -1523,75 +1409,67 @@ class TestPersistence(SoupTest): class TestSubstitutions(SoupTest): def test_default_formatter_is_minimal(self): - markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="minimal") # The < is converted back into < but the e-with-acute is left alone. self.assertEqual( decoded, self.document_for( - "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) def test_formatter_html(self): - markup = "<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="html") self.assertEqual( decoded, - self.document_for("<br/><b><<Sacré bleu!>></b>")) + self.document_for("<b><<Sacré bleu!>></b>")) - def test_formatter_html5(self): - markup = "<br><b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html5") - self.assertEqual( - decoded, - self.document_for("<br><b><<Sacré bleu!>></b>")) - def test_formatter_minimal(self): - markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter="minimal") # The < is converted back into < but the e-with-acute is left alone. self.assertEqual( decoded, self.document_for( - "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) def test_formatter_null(self): - markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" + markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" soup = self.soup(markup) decoded = soup.decode(formatter=None) # Neither the angle brackets nor the e-with-acute are converted. # This is not valid HTML, but it's what the user wanted. self.assertEqual(decoded, - self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) + self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) def test_formatter_custom(self): - markup = "<b><foo></b><b>bar</b><br/>" + markup = u"<b><foo></b><b>bar</b>" soup = self.soup(markup) decoded = soup.decode(formatter = lambda x: x.upper()) # Instead of normal entity conversion code, the custom # callable is called on every string. self.assertEqual( decoded, - self.document_for("<b><FOO></b><b>BAR</b><br/>")) + self.document_for(u"<b><FOO></b><b>BAR</b>")) def test_formatter_is_run_on_attribute_values(self): - markup = '<a href="http://a.com?a=b&c=é">e</a>' + markup = u'<a href="http://a.com?a=b&c=é">e</a>' soup = self.soup(markup) a = soup.a - expect_minimal = '<a href="http://a.com?a=b&c=é">e</a>' + expect_minimal = u'<a href="http://a.com?a=b&c=é">e</a>' self.assertEqual(expect_minimal, a.decode()) self.assertEqual(expect_minimal, a.decode(formatter="minimal")) - expect_html = '<a href="http://a.com?a=b&c=é">e</a>' + expect_html = u'<a href="http://a.com?a=b&c=é">e</a>' self.assertEqual(expect_html, a.decode(formatter="html")) self.assertEqual(markup, a.decode(formatter=None)) - expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>' + expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>' self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) def test_formatter_skips_script_tag_for_html_documents(self): @@ -1613,28 +1491,28 @@ class TestSubstitutions(SoupTest): self.assertTrue(b"< < hey > >" in encoded) def test_prettify_leaves_preformatted_text_alone(self): - soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>") + soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz ") # Everything outside the <pre> tag is reformatted, but everything # inside is left alone. self.assertEqual( - '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>', + u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>', soup.div.prettify()) - def test_prettify_accepts_formatter_function(self): + def test_prettify_accepts_formatter(self): soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser') pretty = soup.prettify(formatter = lambda x: x.upper()) self.assertTrue("FOO" in pretty) def test_prettify_outputs_unicode_by_default(self): soup = self.soup("<a></a>") - self.assertEqual(str, type(soup.prettify())) + self.assertEqual(unicode, type(soup.prettify())) def test_prettify_can_encode_data(self): soup = self.soup("<a></a>") self.assertEqual(bytes, type(soup.prettify("utf-8"))) def test_html_entity_substitution_off_by_default(self): - markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" + markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" soup = self.soup(markup) encoded = soup.b.encode("utf-8") self.assertEqual(encoded, markup.encode('utf-8')) @@ -1678,77 +1556,54 @@ class TestEncoding(SoupTest): """Test the ability to encode objects into strings.""" def test_unicode_string_can_be_encoded(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual(soup.b.string.encode("utf-8"), - "\N{SNOWMAN}".encode("utf-8")) + u"\N{SNOWMAN}".encode("utf-8")) def test_tag_containing_unicode_string_can_be_encoded(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual( soup.b.encode("utf-8"), html.encode("utf-8")) def test_encoding_substitutes_unrecognized_characters_by_default(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual(soup.b.encode("ascii"), b"<b>☃</b>") def test_encoding_can_be_made_strict(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertRaises( UnicodeEncodeError, soup.encode, "ascii", errors="strict") def test_decode_contents(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) - self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents()) + self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents()) def test_encode_contents(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual( - "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( + u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( encoding="utf8")) def test_deprecated_renderContents(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) self.assertEqual( - "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) + u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) def test_repr(self): - html = "<b>\N{SNOWMAN}</b>" + html = u"<b>\N{SNOWMAN}</b>" soup = self.soup(html) if PY3K: self.assertEqual(html, repr(soup)) else: self.assertEqual(b'<b>\\u2603</b>', repr(soup)) -class TestFormatter(SoupTest): - - def test_sort_attributes(self): - # Test the ability to override Formatter.attributes() to, - # e.g., disable the normal sorting of attributes. - class UnsortedFormatter(Formatter): - def attributes(self, tag): - self.called_with = tag - for k, v in sorted(tag.attrs.items()): - if k == 'ignore': - continue - yield k,v - - soup = self.soup('<p cval="1" aval="2" ignore="ignored"></p>') - formatter = UnsortedFormatter() - decoded = soup.decode(formatter=formatter) - - # attributes() was called on the <p> tag. It filtered out one - # attribute and sorted the other two. - self.assertEqual(formatter.called_with, soup.p) - self.assertEqual('<p aval="2" cval="1"></p>', decoded) - - class TestNavigableStringSubclasses(SoupTest): def test_cdata(self): @@ -1865,7 +1720,7 @@ class TestSoupSelector(TreeTest): els = self.soup.select('title') self.assertEqual(len(els), 1) self.assertEqual(els[0].name, 'title') - self.assertEqual(els[0].contents, ['The title']) + self.assertEqual(els[0].contents, [u'The title']) def test_one_tag_many(self): els = self.soup.select('div') @@ -1900,7 +1755,7 @@ class TestSoupSelector(TreeTest): self.assertEqual(len(self.soup.select('del')), 0) def test_invalid_tag(self): - self.assertRaises(SyntaxError, self.soup.select, 'tag%t') + self.assertRaises(ValueError, self.soup.select, 'tag%t') def test_select_dashed_tag_ids(self): self.assertSelects('custom-dashed-tag', ['dash1', 'dash2']) @@ -1911,7 +1766,7 @@ class TestSoupSelector(TreeTest): self.assertEqual(dashed[0]['id'], 'dash2') def test_dashed_tag_text(self): - self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.') + self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, u'Hello there.') def test_select_dashed_matches_find_all(self): self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag')) @@ -2091,31 +1946,32 @@ class TestSoupSelector(TreeTest): NotImplementedError, self.soup.select, "a:no-such-pseudoclass") self.assertRaises( - SyntaxError, self.soup.select, "a:nth-of-type(a)") + NotImplementedError, self.soup.select, "a:nth-of-type(a)") + def test_nth_of_type(self): # Try to select first paragraph els = self.soup.select('div#inner p:nth-of-type(1)') self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, 'Some text') + self.assertEqual(els[0].string, u'Some text') # Try to select third paragraph els = self.soup.select('div#inner p:nth-of-type(3)') self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, 'Another') + self.assertEqual(els[0].string, u'Another') # Try to select (non-existent!) fourth paragraph els = self.soup.select('div#inner p:nth-of-type(4)') self.assertEqual(len(els), 0) - # Zero will select no tags. - els = self.soup.select('div p:nth-of-type(0)') - self.assertEqual(len(els), 0) + # Pass in an invalid value. + self.assertRaises( + ValueError, self.soup.select, 'div p:nth-of-type(0)') def test_nth_of_type_direct_descendant(self): els = self.soup.select('div#inner > p:nth-of-type(1)') self.assertEqual(len(els), 1) - self.assertEqual(els[0].string, 'Some text') + self.assertEqual(els[0].string, u'Some text') def test_id_child_selector_nth_of_type(self): self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) @@ -2147,7 +2003,7 @@ class TestSoupSelector(TreeTest): self.assertEqual([], self.soup.select('#inner ~ h2')) def test_dangling_combinator(self): - self.assertRaises(SyntaxError, self.soup.select, 'h1 >') + self.assertRaises(ValueError, self.soup.select, 'h1 >') def test_sibling_combinator_wont_select_same_tag_twice(self): self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr']) @@ -2178,8 +2034,8 @@ class TestSoupSelector(TreeTest): self.assertSelects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac']) def test_invalid_multiple_select(self): - self.assertRaises(SyntaxError, self.soup.select, ',x, y') - self.assertRaises(SyntaxError, self.soup.select, 'x,,y') + self.assertRaises(ValueError, self.soup.select, ',x, y') + self.assertRaises(ValueError, self.soup.select, 'x,,y') def test_multiple_select_attrs(self): self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb']) @@ -2190,16 +2046,5 @@ class TestSoupSelector(TreeTest): def test_multiple_select_nested(self): self.assertSelects('body > div > x, y > z', ['xid', 'zidb']) - def test_select_duplicate_elements(self): - # When markup contains duplicate elements, a multiple select - # will find all of them. - markup = '<div class="c1"/><div class="c2"/><div class="c1"/>' - soup = BeautifulSoup(markup, 'html.parser') - selected = soup.select(".c1, .c2") - self.assertEqual(3, len(selected)) - - # Verify that find_all finds the same elements, though because - # of an implementation detail it finds them in a different - # order. - for element in soup.find_all(class_=['c1', 'c2']): - assert element in selected + + diff --git a/libs/enum2.7/LICENSE b/libs/enum2.7/LICENSE deleted file mode 100644 index 9003b8850..000000000 --- a/libs/enum2.7/LICENSE +++ /dev/null @@ -1,32 +0,0 @@ -Copyright (c) 2013, Ethan Furman. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - Redistributions of source code must retain the above - copyright notice, this list of conditions and the - following disclaimer. - - Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - Neither the name Ethan Furman nor the names of any - contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/libs/enum2.7/README b/libs/enum2.7/README deleted file mode 100644 index aa2333d8d..000000000 --- a/libs/enum2.7/README +++ /dev/null @@ -1,3 +0,0 @@ -enum34 is the new Python stdlib enum module available in Python 3.4 -backported for previous versions of Python from 2.4 to 3.3. -tested on 2.6, 2.7, and 3.3+ diff --git a/libs/enum2.7/__init__.py b/libs/enum2.7/__init__.py deleted file mode 100644 index d6ffb3a40..000000000 --- a/libs/enum2.7/__init__.py +++ /dev/null @@ -1,837 +0,0 @@ -"""Python Enumerations""" - -import sys as _sys - -__all__ = ['Enum', 'IntEnum', 'unique'] - -version = 1, 1, 6 - -pyver = float('%s.%s' % _sys.version_info[:2]) - -try: - any -except NameError: - def any(iterable): - for element in iterable: - if element: - return True - return False - -try: - from collections import OrderedDict -except ImportError: - OrderedDict = None - -try: - basestring -except NameError: - # In Python 2 basestring is the ancestor of both str and unicode - # in Python 3 it's just str, but was missing in 3.1 - basestring = str - -try: - unicode -except NameError: - # In Python 3 unicode no longer exists (it's just str) - unicode = str - -class _RouteClassAttributeToGetattr(object): - """Route attribute access on a class to __getattr__. - - This is a descriptor, used to define attributes that act differently when - accessed through an instance and through a class. Instance access remains - normal, but access to an attribute through a class will be routed to the - class's __getattr__ method; this is done by raising AttributeError. - - """ - def __init__(self, fget=None): - self.fget = fget - - def __get__(self, instance, ownerclass=None): - if instance is None: - raise AttributeError() - return self.fget(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") - - -def _is_descriptor(obj): - """Returns True if obj is a descriptor, False otherwise.""" - return ( - hasattr(obj, '__get__') or - hasattr(obj, '__set__') or - hasattr(obj, '__delete__')) - - -def _is_dunder(name): - """Returns True if a __dunder__ name, False otherwise.""" - return (name[:2] == name[-2:] == '__' and - name[2:3] != '_' and - name[-3:-2] != '_' and - len(name) > 4) - - -def _is_sunder(name): - """Returns True if a _sunder_ name, False otherwise.""" - return (name[0] == name[-1] == '_' and - name[1:2] != '_' and - name[-2:-1] != '_' and - len(name) > 2) - - -def _make_class_unpicklable(cls): - """Make the given class un-picklable.""" - def _break_on_call_reduce(self, protocol=None): - raise TypeError('%r cannot be pickled' % self) - cls.__reduce_ex__ = _break_on_call_reduce - cls.__module__ = '<unknown>' - - -class _EnumDict(dict): - """Track enum member order and ensure member names are not reused. - - EnumMeta will use the names found in self._member_names as the - enumeration member names. - - """ - def __init__(self): - super(_EnumDict, self).__init__() - self._member_names = [] - - def __setitem__(self, key, value): - """Changes anything not dundered or not a descriptor. - - If a descriptor is added with the same name as an enum member, the name - is removed from _member_names (this may leave a hole in the numerical - sequence of values). - - If an enum member name is used twice, an error is raised; duplicate - values are not checked for. - - Single underscore (sunder) names are reserved. - - Note: in 3.x __order__ is simply discarded as a not necessary piece - leftover from 2.x - - """ - if pyver >= 3.0 and key in ('_order_', '__order__'): - return - elif key == '__order__': - key = '_order_' - if _is_sunder(key): - if key != '_order_': - raise ValueError('_names_ are reserved for future Enum use') - elif _is_dunder(key): - pass - elif key in self._member_names: - # descriptor overwriting an enum? - raise TypeError('Attempted to reuse key: %r' % key) - elif not _is_descriptor(value): - if key in self: - # enum overwriting a descriptor? - raise TypeError('Key already defined as: %r' % self[key]) - self._member_names.append(key) - super(_EnumDict, self).__setitem__(key, value) - - -# Dummy value for Enum as EnumMeta explicity checks for it, but of course until -# EnumMeta finishes running the first time the Enum class doesn't exist. This -# is also why there are checks in EnumMeta like `if Enum is not None` -Enum = None - - -class EnumMeta(type): - """Metaclass for Enum""" - @classmethod - def __prepare__(metacls, cls, bases): - return _EnumDict() - - def __new__(metacls, cls, bases, classdict): - # an Enum class is final once enumeration items have been defined; it - # cannot be mixed with other types (int, float, etc.) if it has an - # inherited __new__ unless a new __new__ is defined (or the resulting - # class will fail). - if type(classdict) is dict: - original_dict = classdict - classdict = _EnumDict() - for k, v in original_dict.items(): - classdict[k] = v - - member_type, first_enum = metacls._get_mixins_(bases) - __new__, save_new, use_args = metacls._find_new_(classdict, member_type, - first_enum) - # save enum items into separate mapping so they don't get baked into - # the new class - members = dict((k, classdict[k]) for k in classdict._member_names) - for name in classdict._member_names: - del classdict[name] - - # py2 support for definition order - _order_ = classdict.get('_order_') - if _order_ is None: - if pyver < 3.0: - try: - _order_ = [name for (name, value) in sorted(members.items(), key=lambda item: item[1])] - except TypeError: - _order_ = [name for name in sorted(members.keys())] - else: - _order_ = classdict._member_names - else: - del classdict['_order_'] - if pyver < 3.0: - _order_ = _order_.replace(',', ' ').split() - aliases = [name for name in members if name not in _order_] - _order_ += aliases - - # check for illegal enum names (any others?) - invalid_names = set(members) & set(['mro']) - if invalid_names: - raise ValueError('Invalid enum member name(s): %s' % ( - ', '.join(invalid_names), )) - - # save attributes from super classes so we know if we can take - # the shortcut of storing members in the class dict - base_attributes = set([a for b in bases for a in b.__dict__]) - # create our new Enum type - enum_class = super(EnumMeta, metacls).__new__(metacls, cls, bases, classdict) - enum_class._member_names_ = [] # names in random order - if OrderedDict is not None: - enum_class._member_map_ = OrderedDict() - else: - enum_class._member_map_ = {} # name->value map - enum_class._member_type_ = member_type - - # Reverse value->name map for hashable values. - enum_class._value2member_map_ = {} - - # instantiate them, checking for duplicates as we go - # we instantiate first instead of checking for duplicates first in case - # a custom __new__ is doing something funky with the values -- such as - # auto-numbering ;) - if __new__ is None: - __new__ = enum_class.__new__ - for member_name in _order_: - value = members[member_name] - if not isinstance(value, tuple): - args = (value, ) - else: - args = value - if member_type is tuple: # special case for tuple enums - args = (args, ) # wrap it one more time - if not use_args or not args: - enum_member = __new__(enum_class) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = value - else: - enum_member = __new__(enum_class, *args) - if not hasattr(enum_member, '_value_'): - enum_member._value_ = member_type(*args) - value = enum_member._value_ - enum_member._name_ = member_name - enum_member.__objclass__ = enum_class - enum_member.__init__(*args) - # If another member with the same value was already defined, the - # new member becomes an alias to the existing one. - for name, canonical_member in enum_class._member_map_.items(): - if canonical_member.value == enum_member._value_: - enum_member = canonical_member - break - else: - # Aliases don't appear in member names (only in __members__). - enum_class._member_names_.append(member_name) - # performance boost for any member that would not shadow - # a DynamicClassAttribute (aka _RouteClassAttributeToGetattr) - if member_name not in base_attributes: - setattr(enum_class, member_name, enum_member) - # now add to _member_map_ - enum_class._member_map_[member_name] = enum_member - try: - # This may fail if value is not hashable. We can't add the value - # to the map, and by-value lookups for this value will be - # linear. - enum_class._value2member_map_[value] = enum_member - except TypeError: - pass - - - # If a custom type is mixed into the Enum, and it does not know how - # to pickle itself, pickle.dumps will succeed but pickle.loads will - # fail. Rather than have the error show up later and possibly far - # from the source, sabotage the pickle protocol for this class so - # that pickle.dumps also fails. - # - # However, if the new class implements its own __reduce_ex__, do not - # sabotage -- it's on them to make sure it works correctly. We use - # __reduce_ex__ instead of any of the others as it is preferred by - # pickle over __reduce__, and it handles all pickle protocols. - unpicklable = False - if '__reduce_ex__' not in classdict: - if member_type is not object: - methods = ('__getnewargs_ex__', '__getnewargs__', - '__reduce_ex__', '__reduce__') - if not any(m in member_type.__dict__ for m in methods): - _make_class_unpicklable(enum_class) - unpicklable = True - - - # double check that repr and friends are not the mixin's or various - # things break (such as pickle) - for name in ('__repr__', '__str__', '__format__', '__reduce_ex__'): - class_method = getattr(enum_class, name) - obj_method = getattr(member_type, name, None) - enum_method = getattr(first_enum, name, None) - if name not in classdict and class_method is not enum_method: - if name == '__reduce_ex__' and unpicklable: - continue - setattr(enum_class, name, enum_method) - - # method resolution and int's are not playing nice - # Python's less than 2.6 use __cmp__ - - if pyver < 2.6: - - if issubclass(enum_class, int): - setattr(enum_class, '__cmp__', getattr(int, '__cmp__')) - - elif pyver < 3.0: - - if issubclass(enum_class, int): - for method in ( - '__le__', - '__lt__', - '__gt__', - '__ge__', - '__eq__', - '__ne__', - '__hash__', - ): - setattr(enum_class, method, getattr(int, method)) - - # replace any other __new__ with our own (as long as Enum is not None, - # anyway) -- again, this is to support pickle - if Enum is not None: - # if the user defined their own __new__, save it before it gets - # clobbered in case they subclass later - if save_new: - setattr(enum_class, '__member_new__', enum_class.__dict__['__new__']) - setattr(enum_class, '__new__', Enum.__dict__['__new__']) - return enum_class - - def __bool__(cls): - """ - classes/types should always be True. - """ - return True - - def __call__(cls, value, names=None, module=None, type=None, start=1): - """Either returns an existing member, or creates a new enum class. - - This method is used both when an enum class is given a value to match - to an enumeration member (i.e. Color(3)) and for the functional API - (i.e. Color = Enum('Color', names='red green blue')). - - When used for the functional API: `module`, if set, will be stored in - the new class' __module__ attribute; `type`, if set, will be mixed in - as the first base class. - - Note: if `module` is not set this routine will attempt to discover the - calling module by walking the frame stack; if this is unsuccessful - the resulting class will not be pickleable. - - """ - if names is None: # simple value lookup - return cls.__new__(cls, value) - # otherwise, functional API: we're creating a new Enum type - return cls._create_(value, names, module=module, type=type, start=start) - - def __contains__(cls, member): - return isinstance(member, cls) and member.name in cls._member_map_ - - def __delattr__(cls, attr): - # nicer error message when someone tries to delete an attribute - # (see issue19025). - if attr in cls._member_map_: - raise AttributeError( - "%s: cannot delete Enum member." % cls.__name__) - super(EnumMeta, cls).__delattr__(attr) - - def __dir__(self): - return (['__class__', '__doc__', '__members__', '__module__'] + - self._member_names_) - - @property - def __members__(cls): - """Returns a mapping of member name->value. - - This mapping lists all enum members, including aliases. Note that this - is a copy of the internal mapping. - - """ - return cls._member_map_.copy() - - def __getattr__(cls, name): - """Return the enum member matching `name` - - We use __getattr__ instead of descriptors or inserting into the enum - class' __dict__ in order to support `name` and `value` being both - properties for enum members (which live in the class' __dict__) and - enum members themselves. - - """ - if _is_dunder(name): - raise AttributeError(name) - try: - return cls._member_map_[name] - except KeyError: - raise AttributeError(name) - - def __getitem__(cls, name): - return cls._member_map_[name] - - def __iter__(cls): - return (cls._member_map_[name] for name in cls._member_names_) - - def __reversed__(cls): - return (cls._member_map_[name] for name in reversed(cls._member_names_)) - - def __len__(cls): - return len(cls._member_names_) - - __nonzero__ = __bool__ - - def __repr__(cls): - return "<enum %r>" % cls.__name__ - - def __setattr__(cls, name, value): - """Block attempts to reassign Enum members. - - A simple assignment to the class namespace only changes one of the - several possible ways to get an Enum member from the Enum class, - resulting in an inconsistent Enumeration. - - """ - member_map = cls.__dict__.get('_member_map_', {}) - if name in member_map: - raise AttributeError('Cannot reassign members.') - super(EnumMeta, cls).__setattr__(name, value) - - def _create_(cls, class_name, names=None, module=None, type=None, start=1): - """Convenience method to create a new Enum class. - - `names` can be: - - * A string containing member names, separated either with spaces or - commas. Values are auto-numbered from 1. - * An iterable of member names. Values are auto-numbered from 1. - * An iterable of (member name, value) pairs. - * A mapping of member name -> value. - - """ - if pyver < 3.0: - # if class_name is unicode, attempt a conversion to ASCII - if isinstance(class_name, unicode): - try: - class_name = class_name.encode('ascii') - except UnicodeEncodeError: - raise TypeError('%r is not representable in ASCII' % class_name) - metacls = cls.__class__ - if type is None: - bases = (cls, ) - else: - bases = (type, cls) - classdict = metacls.__prepare__(class_name, bases) - _order_ = [] - - # special processing needed for names? - if isinstance(names, basestring): - names = names.replace(',', ' ').split() - if isinstance(names, (tuple, list)) and isinstance(names[0], basestring): - names = [(e, i+start) for (i, e) in enumerate(names)] - - # Here, names is either an iterable of (name, value) or a mapping. - item = None # in case names is empty - for item in names: - if isinstance(item, basestring): - member_name, member_value = item, names[item] - else: - member_name, member_value = item - classdict[member_name] = member_value - _order_.append(member_name) - # only set _order_ in classdict if name/value was not from a mapping - if not isinstance(item, basestring): - classdict['_order_'] = ' '.join(_order_) - enum_class = metacls.__new__(metacls, class_name, bases, classdict) - - # TODO: replace the frame hack if a blessed way to know the calling - # module is ever developed - if module is None: - try: - module = _sys._getframe(2).f_globals['__name__'] - except (AttributeError, ValueError): - pass - if module is None: - _make_class_unpicklable(enum_class) - else: - enum_class.__module__ = module - - return enum_class - - @staticmethod - def _get_mixins_(bases): - """Returns the type for creating enum members, and the first inherited - enum class. - - bases: the tuple of bases that was given to __new__ - - """ - if not bases or Enum is None: - return object, Enum - - - # double check that we are not subclassing a class with existing - # enumeration members; while we're at it, see if any other data - # type has been mixed in so we can use the correct __new__ - member_type = first_enum = None - for base in bases: - if (base is not Enum and - issubclass(base, Enum) and - base._member_names_): - raise TypeError("Cannot extend enumerations") - # base is now the last base in bases - if not issubclass(base, Enum): - raise TypeError("new enumerations must be created as " - "`ClassName([mixin_type,] enum_type)`") - - # get correct mix-in type (either mix-in type of Enum subclass, or - # first base if last base is Enum) - if not issubclass(bases[0], Enum): - member_type = bases[0] # first data type - first_enum = bases[-1] # enum type - else: - for base in bases[0].__mro__: - # most common: (IntEnum, int, Enum, object) - # possible: (<Enum 'AutoIntEnum'>, <Enum 'IntEnum'>, - # <class 'int'>, <Enum 'Enum'>, - # <class 'object'>) - if issubclass(base, Enum): - if first_enum is None: - first_enum = base - else: - if member_type is None: - member_type = base - - return member_type, first_enum - - if pyver < 3.0: - @staticmethod - def _find_new_(classdict, member_type, first_enum): - """Returns the __new__ to be used for creating the enum members. - - classdict: the class dictionary given to __new__ - member_type: the data type whose __new__ will be used by default - first_enum: enumeration to check for an overriding __new__ - - """ - # now find the correct __new__, checking to see of one was defined - # by the user; also check earlier enum classes in case a __new__ was - # saved as __member_new__ - __new__ = classdict.get('__new__', None) - if __new__: - return None, True, True # __new__, save_new, use_args - - N__new__ = getattr(None, '__new__') - O__new__ = getattr(object, '__new__') - if Enum is None: - E__new__ = N__new__ - else: - E__new__ = Enum.__dict__['__new__'] - # check all possibles for __member_new__ before falling back to - # __new__ - for method in ('__member_new__', '__new__'): - for possible in (member_type, first_enum): - try: - target = possible.__dict__[method] - except (AttributeError, KeyError): - target = getattr(possible, method, None) - if target not in [ - None, - N__new__, - O__new__, - E__new__, - ]: - if method == '__member_new__': - classdict['__new__'] = target - return None, False, True - if isinstance(target, staticmethod): - target = target.__get__(member_type) - __new__ = target - break - if __new__ is not None: - break - else: - __new__ = object.__new__ - - # if a non-object.__new__ is used then whatever value/tuple was - # assigned to the enum member name will be passed to __new__ and to the - # new enum member's __init__ - if __new__ is object.__new__: - use_args = False - else: - use_args = True - - return __new__, False, use_args - else: - @staticmethod - def _find_new_(classdict, member_type, first_enum): - """Returns the __new__ to be used for creating the enum members. - - classdict: the class dictionary given to __new__ - member_type: the data type whose __new__ will be used by default - first_enum: enumeration to check for an overriding __new__ - - """ - # now find the correct __new__, checking to see of one was defined - # by the user; also check earlier enum classes in case a __new__ was - # saved as __member_new__ - __new__ = classdict.get('__new__', None) - - # should __new__ be saved as __member_new__ later? - save_new = __new__ is not None - - if __new__ is None: - # check all possibles for __member_new__ before falling back to - # __new__ - for method in ('__member_new__', '__new__'): - for possible in (member_type, first_enum): - target = getattr(possible, method, None) - if target not in ( - None, - None.__new__, - object.__new__, - Enum.__new__, - ): - __new__ = target - break - if __new__ is not None: - break - else: - __new__ = object.__new__ - - # if a non-object.__new__ is used then whatever value/tuple was - # assigned to the enum member name will be passed to __new__ and to the - # new enum member's __init__ - if __new__ is object.__new__: - use_args = False - else: - use_args = True - - return __new__, save_new, use_args - - -######################################################## -# In order to support Python 2 and 3 with a single -# codebase we have to create the Enum methods separately -# and then use the `type(name, bases, dict)` method to -# create the class. -######################################################## -temp_enum_dict = {} -temp_enum_dict['__doc__'] = "Generic enumeration.\n\n Derive from this class to define new enumerations.\n\n" - -def __new__(cls, value): - # all enum instances are actually created during class construction - # without calling this method; this method is called by the metaclass' - # __call__ (i.e. Color(3) ), and by pickle - if type(value) is cls: - # For lookups like Color(Color.red) - value = value.value - #return value - # by-value search for a matching enum member - # see if it's in the reverse mapping (for hashable values) - try: - if value in cls._value2member_map_: - return cls._value2member_map_[value] - except TypeError: - # not there, now do long search -- O(n) behavior - for member in cls._member_map_.values(): - if member.value == value: - return member - raise ValueError("%s is not a valid %s" % (value, cls.__name__)) -temp_enum_dict['__new__'] = __new__ -del __new__ - -def __repr__(self): - return "<%s.%s: %r>" % ( - self.__class__.__name__, self._name_, self._value_) -temp_enum_dict['__repr__'] = __repr__ -del __repr__ - -def __str__(self): - return "%s.%s" % (self.__class__.__name__, self._name_) -temp_enum_dict['__str__'] = __str__ -del __str__ - -if pyver >= 3.0: - def __dir__(self): - added_behavior = [ - m - for cls in self.__class__.mro() - for m in cls.__dict__ - if m[0] != '_' and m not in self._member_map_ - ] - return (['__class__', '__doc__', '__module__', ] + added_behavior) - temp_enum_dict['__dir__'] = __dir__ - del __dir__ - -def __format__(self, format_spec): - # mixed-in Enums should use the mixed-in type's __format__, otherwise - # we can get strange results with the Enum name showing up instead of - # the value - - # pure Enum branch - if self._member_type_ is object: - cls = str - val = str(self) - # mix-in branch - else: - cls = self._member_type_ - val = self.value - return cls.__format__(val, format_spec) -temp_enum_dict['__format__'] = __format__ -del __format__ - - -#################################### -# Python's less than 2.6 use __cmp__ - -if pyver < 2.6: - - def __cmp__(self, other): - if type(other) is self.__class__: - if self is other: - return 0 - return -1 - return NotImplemented - raise TypeError("unorderable types: %s() and %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__cmp__'] = __cmp__ - del __cmp__ - -else: - - def __le__(self, other): - raise TypeError("unorderable types: %s() <= %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__le__'] = __le__ - del __le__ - - def __lt__(self, other): - raise TypeError("unorderable types: %s() < %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__lt__'] = __lt__ - del __lt__ - - def __ge__(self, other): - raise TypeError("unorderable types: %s() >= %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__ge__'] = __ge__ - del __ge__ - - def __gt__(self, other): - raise TypeError("unorderable types: %s() > %s()" % (self.__class__.__name__, other.__class__.__name__)) - temp_enum_dict['__gt__'] = __gt__ - del __gt__ - - -def __eq__(self, other): - if type(other) is self.__class__: - return self is other - return NotImplemented -temp_enum_dict['__eq__'] = __eq__ -del __eq__ - -def __ne__(self, other): - if type(other) is self.__class__: - return self is not other - return NotImplemented -temp_enum_dict['__ne__'] = __ne__ -del __ne__ - -def __hash__(self): - return hash(self._name_) -temp_enum_dict['__hash__'] = __hash__ -del __hash__ - -def __reduce_ex__(self, proto): - return self.__class__, (self._value_, ) -temp_enum_dict['__reduce_ex__'] = __reduce_ex__ -del __reduce_ex__ - -# _RouteClassAttributeToGetattr is used to provide access to the `name` -# and `value` properties of enum members while keeping some measure of -# protection from modification, while still allowing for an enumeration -# to have members named `name` and `value`. This works because enumeration -# members are not set directly on the enum class -- __getattr__ is -# used to look them up. - -@_RouteClassAttributeToGetattr -def name(self): - return self._name_ -temp_enum_dict['name'] = name -del name - -@_RouteClassAttributeToGetattr -def value(self): - return self._value_ -temp_enum_dict['value'] = value -del value - -@classmethod -def _convert(cls, name, module, filter, source=None): - """ - Create a new Enum subclass that replaces a collection of global constants - """ - # convert all constants from source (or module) that pass filter() to - # a new Enum called name, and export the enum and its members back to - # module; - # also, replace the __reduce_ex__ method so unpickling works in - # previous Python versions - module_globals = vars(_sys.modules[module]) - if source: - source = vars(source) - else: - source = module_globals - members = dict((name, value) for name, value in source.items() if filter(name)) - cls = cls(name, members, module=module) - cls.__reduce_ex__ = _reduce_ex_by_name - module_globals.update(cls.__members__) - module_globals[name] = cls - return cls -temp_enum_dict['_convert'] = _convert -del _convert - -Enum = EnumMeta('Enum', (object, ), temp_enum_dict) -del temp_enum_dict - -# Enum has now been created -########################### - -class IntEnum(int, Enum): - """Enum where members are also (and must be) ints""" - -def _reduce_ex_by_name(self, proto): - return self.name - -def unique(enumeration): - """Class decorator that ensures only unique members exist in an enumeration.""" - duplicates = [] - for name, member in enumeration.__members__.items(): - if name != member.name: - duplicates.append((name, member.name)) - if duplicates: - duplicate_names = ', '.join( - ["%s -> %s" % (alias, name) for (alias, name) in duplicates] - ) - raise ValueError('duplicate names found in %r: %s' % - (enumeration, duplicate_names) - ) - return enumeration diff --git a/libs/enum2.7/doc/enum.pdf b/libs/enum2.7/doc/enum.pdf deleted file mode 100644 index 3fb6ec264..000000000 --- a/libs/enum2.7/doc/enum.pdf +++ /dev/null @@ -1,2237 +0,0 @@ -%PDF-1.4 -%���� ReportLab Generated PDF document http://www.reportlab.com -1 0 obj -<< /F1 2 0 R /F2 3 0 R /F3 4 0 R /F4 5 0 R /F5 8 0 R /F6 15 0 R >> -endobj -2 0 obj -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> -endobj -3 0 obj -<< /BaseFont /Courier-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font >> -endobj -4 0 obj -<< /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font >> -endobj -5 0 obj -<< /BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F4 /Subtype /Type1 /Type /Font >> -endobj -6 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 22 0 R /XYZ 62.69291 639.3236 0 ] /Rect [ 335.1805 574.4272 405.2473 586.4272 ] /Subtype /Link /Type /Annot >> -endobj -7 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 22 0 R /XYZ 62.69291 639.3236 0 ] /Rect [ 255.9742 427.4272 321.5378 439.4272 ] /Subtype /Link /Type /Annot >> -endobj -8 0 obj -<< /BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding /Name /F5 /Subtype /Type1 /Type /Font >> -endobj -9 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 26 0 R /XYZ 62.69291 278.1236 0 ] /Rect [ 82.69291 182.2272 201.0729 194.2272 ] /Subtype /Link /Type /Annot >> -endobj -10 0 obj -<< /Annots [ 6 0 R 7 0 R 9 0 R ] /Contents 56 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 - /Trans << >> /Type /Page >> -endobj -11 0 obj -<< /Contents 57 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -12 0 obj -<< /Contents 58 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -13 0 obj -<< /Contents 59 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -14 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 22 0 R /XYZ 62.69291 157.2236 0 ] /Rect [ 101.6029 741.7736 141.6229 753.7736 ] /Subtype /Link /Type /Annot >> -endobj -15 0 obj -<< /BaseFont /Helvetica-BoldOblique /Encoding /WinAnsiEncoding /Name /F6 /Subtype /Type1 /Type /Font >> -endobj -16 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 25 0 R /XYZ 62.69291 359.6236 0 ] /Rect [ 327.1529 585.5736 392.7329 597.5736 ] /Subtype /Link /Type /Annot >> -endobj -17 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 22 0 R /XYZ 62.69291 639.3236 0 ] /Rect [ 181.1917 312.1736 246.4634 324.1736 ] /Subtype /Link /Type /Annot >> -endobj -18 0 obj -<< /Annots [ 14 0 R 16 0 R 17 0 R ] /Contents 60 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 - /Trans << >> /Type /Page >> -endobj -19 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 26 0 R /XYZ 62.69291 610.8236 0 ] /Rect [ 326.1329 511.3736 357.2629 523.3736 ] /Subtype /Link /Type /Annot >> -endobj -20 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 25 0 R /XYZ 62.69291 359.6236 0 ] /Rect [ 241.1229 185.9736 306.7029 197.9736 ] /Subtype /Link /Type /Annot >> -endobj -21 0 obj -<< /Annots [ 19 0 R 20 0 R ] /Contents 61 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 - /Trans << >> /Type /Page >> -endobj -22 0 obj -<< /Contents 62 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -23 0 obj -<< /Contents 63 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -24 0 obj -<< /Contents 64 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -25 0 obj -<< /Contents 65 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -26 0 obj -<< /Contents 66 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> - /Type /Page >> -endobj -27 0 obj -<< /Border [ 0 0 0 ] /Contents () /Dest [ 26 0 R /XYZ 62.69291 610.8236 0 ] /Rect [ 309.4094 299.5736 340.6461 311.5736 ] /Subtype /Link /Type /Annot >> -endobj -28 0 obj -<< /Annots [ 27 0 R ] /Contents 67 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 55 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 - /Trans << >> /Type /Page >> -endobj -29 0 obj -<< /Outlines 31 0 R /PageLabels 68 0 R /PageMode /UseNone /Pages 55 0 R /Type /Catalog >> -endobj -30 0 obj -<< /Author () /CreationDate (D:20160515202831+08'00') /Creator (\(unspecified\)) /Keywords () /Producer (ReportLab PDF Library - www.reportlab.com) /Subject (\(unspecified\)) - /Title (enum --- support for enumerations) >> -endobj -31 0 obj -<< /Count 27 /First 32 0 R /Last 51 0 R /Type /Outlines >> -endobj -32 0 obj -<< /Dest [ 10 0 R /XYZ 62.69291 660.6772 0 ] /Next 33 0 R /Parent 31 0 R /Title (Module Contents) >> -endobj -33 0 obj -<< /Dest [ 10 0 R /XYZ 62.69291 477.6772 0 ] /Next 34 0 R /Parent 31 0 R /Prev 32 0 R /Title (Creating an Enum) >> -endobj -34 0 obj -<< /Dest [ 11 0 R /XYZ 62.69291 197.0236 0 ] /Next 35 0 R /Parent 31 0 R /Prev 33 0 R /Title (Programmatic access to enumeration members and their attributes) >> -endobj -35 0 obj -<< /Dest [ 12 0 R /XYZ 62.69291 501.4236 0 ] /Next 36 0 R /Parent 31 0 R /Prev 34 0 R /Title (Duplicating enum members and values) >> -endobj -36 0 obj -<< /Dest [ 13 0 R /XYZ 62.69291 215.0236 0 ] /Next 37 0 R /Parent 31 0 R /Prev 35 0 R /Title (Comparisons) >> -endobj -37 0 obj -<< /Dest [ 18 0 R /XYZ 62.69291 362.4236 0 ] /Next 38 0 R /Parent 31 0 R /Prev 36 0 R /Title (Allowed members and attributes of enumerations) >> -endobj -38 0 obj -<< /Dest [ 21 0 R /XYZ 62.69291 498.6236 0 ] /Next 39 0 R /Parent 31 0 R /Prev 37 0 R /Title (Restricted subclassing of enumerations) >> -endobj -39 0 obj -<< /Dest [ 21 0 R /XYZ 62.69291 173.2236 0 ] /Next 40 0 R /Parent 31 0 R /Prev 38 0 R /Title (Pickling) >> -endobj -40 0 obj -<< /Dest [ 22 0 R /XYZ 62.69291 635.8236 0 ] /Next 41 0 R /Parent 31 0 R /Prev 39 0 R /Title (Functional API) >> -endobj -41 0 obj -<< /Count 2 /Dest [ 22 0 R /XYZ 62.69291 187.2236 0 ] /First 42 0 R /Last 43 0 R /Next 44 0 R /Parent 31 0 R - /Prev 40 0 R /Title (Derived Enumerations) >> -endobj -42 0 obj -<< /Dest [ 22 0 R /XYZ 62.69291 154.2236 0 ] /Next 43 0 R /Parent 41 0 R /Title (IntEnum) >> -endobj -43 0 obj -<< /Dest [ 23 0 R /XYZ 62.69291 217.4236 0 ] /Parent 41 0 R /Prev 42 0 R /Title (Others) >> -endobj -44 0 obj -<< /Count 1 /Dest [ 24 0 R /XYZ 62.69291 567.0236 0 ] /First 45 0 R /Last 45 0 R /Next 46 0 R /Parent 31 0 R - /Prev 41 0 R /Title (Decorators) >> -endobj -45 0 obj -<< /Dest [ 24 0 R /XYZ 62.69291 534.0236 0 ] /Parent 44 0 R /Title (unique) >> -endobj -46 0 obj -<< /Count 4 /Dest [ 24 0 R /XYZ 62.69291 356.8236 0 ] /First 47 0 R /Last 50 0 R /Next 51 0 R /Parent 31 0 R - /Prev 44 0 R /Title (Interesting examples) >> -endobj -47 0 obj -<< /Dest [ 24 0 R /XYZ 62.69291 281.8236 0 ] /Next 48 0 R /Parent 46 0 R /Title (AutoNumber) >> -endobj -48 0 obj -<< /Dest [ 25 0 R /XYZ 62.69291 641.8236 0 ] /Next 49 0 R /Parent 46 0 R /Prev 47 0 R /Title (UniqueEnum) >> -endobj -49 0 obj -<< /Dest [ 25 0 R /XYZ 62.69291 356.6236 0 ] /Next 50 0 R /Parent 46 0 R /Prev 48 0 R /Title (OrderedEnum) >> -endobj -50 0 obj -<< /Dest [ 26 0 R /XYZ 62.69291 607.8236 0 ] /Parent 46 0 R /Prev 49 0 R /Title (Planet) >> -endobj -51 0 obj -<< /Count 3 /Dest [ 26 0 R /XYZ 62.69291 274.6236 0 ] /First 52 0 R /Last 54 0 R /Parent 31 0 R /Prev 46 0 R - /Title (How are Enums different?) >> -endobj -52 0 obj -<< /Dest [ 26 0 R /XYZ 62.69291 211.6236 0 ] /Next 53 0 R /Parent 51 0 R /Title (Enum Classes) >> -endobj -53 0 obj -<< /Dest [ 28 0 R /XYZ 62.69291 664.0236 0 ] /Next 54 0 R /Parent 51 0 R /Prev 52 0 R /Title (Enum Members \(aka instances\)) >> -endobj -54 0 obj -<< /Dest [ 28 0 R /XYZ 62.69291 592.0236 0 ] /Parent 51 0 R /Prev 53 0 R /Title (Finer Points) >> -endobj -55 0 obj -<< /Count 12 /Kids [ 10 0 R 11 0 R 12 0 R 13 0 R 18 0 R 21 0 R 22 0 R 23 0 R 24 0 R 25 0 R - 26 0 R 28 0 R ] /Type /Pages >> -endobj -56 0 obj -<< /Length 6458 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 741.0236 cm -q -BT 1 0 0 1 0 4 Tm 73.71488 0 Td 24 TL /F2 20 Tf 0 0 0 rg (enum ) Tj /F3 20 Tf 0 0 0 rg (--- support for enumerations) Tj T* -73.71488 0 Td ET -Q -Q -q -1 0 0 1 62.69291 702.6772 cm -n 0 14.17323 m 469.8898 14.17323 l S -Q -q -1 0 0 1 62.69291 672.6772 cm -q -BT 1 0 0 1 0 14 Tm 2.091318 Tw 12 TL /F1 10 Tf 0 0 0 rg (An enumeration is a set of symbolic names \(members\) bound to unique, constant values. Within an) Tj T* 0 Tw (enumeration, the members can be compared by identity, and the enumeration itself can be iterated over.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 639.6772 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Module Contents) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 609.6772 cm -q -BT 1 0 0 1 0 14 Tm 2.027485 Tw 12 TL /F1 10 Tf 0 0 0 rg (This module defines two enumeration classes that can be used to define unique sets of names and) Tj T* 0 Tw (values: ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (and ) Tj /F4 10 Tf 0 0 0 rg (IntEnum) Tj /F1 10 Tf 0 0 0 rg (. It also defines one decorator, ) Tj /F4 10 Tf 0 0 0 rg (unique) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 591.6772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F4 10 Tf 12 TL (Enum) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 561.6772 cm -q -BT 1 0 0 1 0 14 Tm 1.128443 Tw 12 TL /F1 10 Tf 0 0 0 rg (Base class for creating enumerated constants. See section ) Tj 0 0 .501961 rg (Functional API ) Tj 0 0 0 rg (for an alternate construction) Tj T* 0 Tw (syntax.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 543.6772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F4 10 Tf 12 TL (IntEnum) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 525.6772 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Base class for creating enumerated constants that are also subclasses of ) Tj /F4 10 Tf 0 0 0 rg (int) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 507.6772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F4 10 Tf 12 TL (unique) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 489.6772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Enum class decorator that ensures only one name is bound to any one value.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 456.6772 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Creating an Enum) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 414.6772 cm -q -BT 1 0 0 1 0 26 Tm 2.432651 Tw 12 TL /F1 10 Tf 0 0 0 rg (Enumerations are created using the ) Tj /F4 10 Tf 0 0 0 rg (class ) Tj /F1 10 Tf 0 0 0 rg (syntax, which makes them easy to read and write. An) Tj T* 0 Tw .533555 Tw (alternative creation method is described in ) Tj 0 0 .501961 rg (Functional API) Tj 0 0 0 rg (. To define an enumeration, subclass ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (as) Tj T* 0 Tw (follows:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 333.4772 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( from enum import Enum) Tj T* (>) Tj (>) Tj (>) Tj ( class Color\(Enum\):) Tj T* (... red = 1) Tj T* (... green = 2) Tj T* (... blue = 3) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 313.4772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Note: Nomenclature) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 307.4772 cm -Q -q -1 0 0 1 62.69291 229.4772 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 0 2 Tm T* ET -q -1 0 0 1 20 72 cm -Q -q -1 0 0 1 20 72 cm -Q -q -1 0 0 1 20 60 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 -3 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 10.5 0 Td (\177) Tj T* -10.5 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The class ) Tj /F4 10 Tf 0 0 0 rg (Color ) Tj /F1 10 Tf 0 0 0 rg (is an ) Tj /F5 10 Tf (enumeration ) Tj /F1 10 Tf (\(or ) Tj /F5 10 Tf (enum) Tj /F1 10 Tf (\)) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 20 54 cm -Q -q -1 0 0 1 20 30 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 9 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 10.5 0 Td (\177) Tj T* -10.5 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 14 Tm 5.568863 Tw 12 TL /F1 10 Tf 0 0 0 rg (The attributes ) Tj /F4 10 Tf 0 0 0 rg (Color.red) Tj /F1 10 Tf 0 0 0 rg (, ) Tj /F4 10 Tf 0 0 0 rg (Color.green) Tj /F1 10 Tf 0 0 0 rg (, etc., are ) Tj /F5 10 Tf (enumeration members ) Tj /F1 10 Tf (\(or ) Tj /F5 10 Tf (enum) Tj T* 0 Tw (members) Tj /F1 10 Tf (\).) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 20 24 cm -Q -q -1 0 0 1 20 0 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 9 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 10.5 0 Td (\177) Tj T* -10.5 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 14 Tm 1.471318 Tw 12 TL /F1 10 Tf 0 0 0 rg (The enum members have ) Tj /F5 10 Tf (names ) Tj /F1 10 Tf (and ) Tj /F5 10 Tf (values ) Tj /F1 10 Tf (\(the name of ) Tj /F4 10 Tf 0 0 0 rg (Color.red ) Tj /F1 10 Tf 0 0 0 rg (is ) Tj /F4 10 Tf 0 0 0 rg (red) Tj /F1 10 Tf 0 0 0 rg (, the value of) Tj T* 0 Tw /F4 10 Tf 0 0 0 rg (Color.blue ) Tj /F1 10 Tf 0 0 0 rg (is ) Tj /F4 10 Tf 0 0 0 rg (3) Tj /F1 10 Tf 0 0 0 rg (, etc.\)) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 20 0 cm -Q -q -Q -Q -q -1 0 0 1 62.69291 229.4772 cm -Q -q -1 0 0 1 62.69291 211.4772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Note:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 205.4772 cm -Q -q -1 0 0 1 62.69291 181.4772 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 0 2 Tm T* ET -q -1 0 0 1 20 0 cm -q -BT 1 0 0 1 0 14 Tm .126235 Tw 12 TL /F1 10 Tf 0 0 0 rg (Even though we use the ) Tj /F4 10 Tf 0 0 0 rg (class ) Tj /F1 10 Tf 0 0 0 rg (syntax to create Enums, Enums are not normal Python classes. See) Tj T* 0 Tw 0 0 .501961 rg (How are Enums different? ) Tj 0 0 0 rg (for more details.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 181.4772 cm -Q -q -1 0 0 1 62.69291 163.4772 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Enumeration members have human readable string representations:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 118.2772 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( print\(Color.red\)) Tj T* (Color.red) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 98.27717 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (...while their ) Tj /F4 10 Tf 0 0 0 rg (repr ) Tj /F1 10 Tf 0 0 0 rg (has more information:) Tj T* ET -Q -Q - -endstream -endobj -57 0 obj -<< /Length 4174 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 727.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( print\(repr\(Color.red\)\)) Tj T* (<) Tj (Color.red: 1) Tj (>) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 707.8236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F5 10 Tf (type ) Tj /F1 10 Tf (of an enumeration member is the enumeration it belongs to:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 626.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( type\(Color.red\)) Tj T* (<) Tj (enum 'Color') Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( isinstance\(Color.green, Color\)) Tj T* (True) Tj T* (>) Tj (>) Tj (>) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 606.6236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Enum members also have a property that contains just their item name:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 561.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( print\(Color.red.name\)) Tj T* (red) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 529.4236 cm -q -BT 1 0 0 1 0 14 Tm .464985 Tw 12 TL /F1 10 Tf 0 0 0 rg (Enumerations support iteration. In Python 3.x definition order is used; in Python 2.x the definition order is) Tj T* 0 Tw (not available, but class attribute ) Tj /F4 10 Tf 0 0 0 rg (__order__ ) Tj /F1 10 Tf 0 0 0 rg (is supported; otherwise, value order is used:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 340.2236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 180 re B* -Q -q -BT 1 0 0 1 0 158 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Shake\(Enum\):) Tj T* (... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x) Tj T* (... vanilla = 7) Tj T* (... chocolate = 4) Tj T* (... cookies = 9) Tj T* (... mint = 3) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( for shake in Shake:) Tj T* (... print\(shake\)) Tj T* (...) Tj T* (Shake.vanilla) Tj T* (Shake.chocolate) Tj T* (Shake.cookies) Tj T* (Shake.mint) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 308.2236 cm -q -BT 1 0 0 1 0 14 Tm 1.893735 Tw 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F4 10 Tf 0 0 0 rg (__order__ ) Tj /F1 10 Tf 0 0 0 rg (attribute is always removed, and in 3.x it is also ignored \(order is definition order\);) Tj T* 0 Tw (however, in the stdlib version it will be ignored but not removed.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 290.2236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Enumeration members are hashable, so they can be used in dictionaries and sets:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 209.0236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( apples = {}) Tj T* (>) Tj (>) Tj (>) Tj ( apples[Color.red] = 'red delicious') Tj T* (>) Tj (>) Tj (>) Tj ( apples[Color.green] = 'granny smith') Tj T* (>) Tj (>) Tj (>) Tj ( apples == {Color.red: 'red delicious', Color.green: 'granny smith'}) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 155.0236 cm -q -BT 1 0 0 1 0 24.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Programmatic access to enumeration members and) Tj T* (their attributes) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 113.0236 cm -q -BT 1 0 0 1 0 26 Tm 3.541797 Tw 12 TL /F1 10 Tf 0 0 0 rg (Sometimes it's useful to access members in enumerations programmatically \(i.e. situations where) Tj T* 0 Tw .922651 Tw /F4 10 Tf 0 0 0 rg (Color.red ) Tj /F1 10 Tf 0 0 0 rg (won't do because the exact color is not known at program-writing time\). ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (allows such) Tj T* 0 Tw (access:) Tj T* ET -Q -Q - -endstream -endobj -58 0 obj -<< /Length 3791 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 703.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 60 re B* -Q -q -BT 1 0 0 1 0 38 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Color\(1\)) Tj T* (<) Tj (Color.red: 1) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Color\(3\)) Tj T* (<) Tj (Color.blue: 3) Tj (>) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 683.8236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If you want to access enum members by ) Tj /F5 10 Tf (name) Tj /F1 10 Tf (, use item access:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 614.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 60 re B* -Q -q -BT 1 0 0 1 0 38 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Color['red']) Tj T* (<) Tj (Color.red: 1) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Color['green']) Tj T* (<) Tj (Color.green: 2) Tj (>) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 594.6236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If have an enum member and need its ) Tj /F4 10 Tf 0 0 0 rg (name ) Tj /F1 10 Tf 0 0 0 rg (or ) Tj /F4 10 Tf 0 0 0 rg (value) Tj /F1 10 Tf 0 0 0 rg (:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 513.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( member = Color.red) Tj T* (>) Tj (>) Tj (>) Tj ( member.name) Tj T* ('red') Tj T* (>) Tj (>) Tj (>) Tj ( member.value) Tj T* (1) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 480.4236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Duplicating enum members and values) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 450.4236 cm -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .13832 Tw (Having two enum members \(or any other attribute\) with the same name is invalid; in Python 3.x this would) Tj T* 0 Tw (raise an error, but in Python 2.x the second member simply overwrites the first:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 249.2236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 192 re B* -Q -q -BT 1 0 0 1 0 170 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( # python 2.x) Tj T* (>) Tj (>) Tj (>) Tj ( class Shape\(Enum\):) Tj T* (... square = 2) Tj T* (... square = 3) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( Shape.square) Tj T* (<) Tj (Shape.square: 3) Tj (>) Tj T* T* (>) Tj (>) Tj (>) Tj ( # python 3.x) Tj T* (>) Tj (>) Tj (>) Tj ( class Shape\(Enum\):) Tj T* (... square = 2) Tj T* (... square = 3) Tj T* (Traceback \(most recent call last\):) Tj T* (...) Tj T* (TypeError: Attempted to reuse key: 'square') Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 205.2236 cm -q -0 0 0 rg -BT 1 0 0 1 0 26 Tm /F1 10 Tf 12 TL .384987 Tw (However, two enum members are allowed to have the same value. Given two members A and B with the) Tj T* 0 Tw .444772 Tw (same value \(and A defined first\), B is an alias to A. By-value lookup of the value of A and B will return A.) Tj T* 0 Tw (By-name lookup of B will also return A:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 88.02362 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 492 108 re B* -Q -q -BT 1 0 0 1 0 86 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Shape\(Enum\):) Tj T* (... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x) Tj T* (... square = 2) Tj T* (... diamond = 1) Tj T* (... circle = 3) Tj T* (... alias_for_square = 2) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( Shape.square) Tj T* ET -Q -Q -Q -Q -Q - -endstream -endobj -59 0 obj -<< /Length 4406 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 691.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (<) Tj (Shape.square: 2) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Shape.alias_for_square) Tj T* (<) Tj (Shape.square: 2) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Shape\(2\)) Tj T* (<) Tj (Shape.square: 2) Tj (>) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 659.8236 cm -q -BT 1 0 0 1 0 14 Tm 1.074104 Tw 12 TL /F1 10 Tf 0 0 0 rg (Allowing aliases is not always desirable. ) Tj /F4 10 Tf 0 0 0 rg (unique ) Tj /F1 10 Tf 0 0 0 rg (can be used to ensure that none exist in a particular) Tj T* 0 Tw (enumeration:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 506.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 144 re B* -Q -q -BT 1 0 0 1 0 122 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( from enum import unique) Tj T* (>) Tj (>) Tj (>) Tj ( @unique) Tj T* (... class Mistake\(Enum\):) Tj T* (... __order__ = 'one two three four' # only needed in 2.x) Tj T* (... one = 1) Tj T* (... two = 2) Tj T* (... three = 3) Tj T* (... four = 3) Tj T* (Traceback \(most recent call last\):) Tj T* (...) Tj T* (ValueError: duplicate names found in ) Tj (<) Tj (enum 'Mistake') Tj (>) Tj (: four -) Tj (>) Tj ( three) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 486.6236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Iterating over the members of an enum does not provide the aliases:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 441.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( list\(Shape\)) Tj T* ([) Tj (<) Tj (Shape.square: 2) Tj (>) Tj (, ) Tj (<) Tj (Shape.diamond: 1) Tj (>) Tj (, ) Tj (<) Tj (Shape.circle: 3) Tj (>) Tj (]) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 409.4236 cm -q -BT 1 0 0 1 0 14 Tm 1.307126 Tw 12 TL /F1 10 Tf 0 0 0 rg (The special attribute ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (is a dictionary mapping names to members. It includes all names) Tj T* 0 Tw (defined in the enumeration, including the aliases:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 304.2236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 96 re B* -Q -q -BT 1 0 0 1 0 74 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( for name, member in sorted\(Shape.__members__.items\(\)\):) Tj T* (... name, member) Tj T* (...) Tj T* (\('alias_for_square', ) Tj (<) Tj (Shape.square: 2) Tj (>) Tj (\)) Tj T* (\('circle', ) Tj (<) Tj (Shape.circle: 3) Tj (>) Tj (\)) Tj T* (\('diamond', ) Tj (<) Tj (Shape.diamond: 1) Tj (>) Tj (\)) Tj T* (\('square', ) Tj (<) Tj (Shape.square: 2) Tj (>) Tj (\)) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 272.2236 cm -q -BT 1 0 0 1 0 14 Tm .080751 Tw 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (attribute can be used for detailed programmatic access to the enumeration members.) Tj T* 0 Tw (For example, finding all the aliases:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 227.0236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 486 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( [name for name, member in Shape.__members__.items\(\) if member.name != name]) Tj T* (['alias_for_square']) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 194.0236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Comparisons) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 176.0236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Enumeration members are compared by identity:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 82.82362 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 84 re B* -Q -q -BT 1 0 0 1 0 62 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Color.red is Color.red) Tj T* (True) Tj T* (>) Tj (>) Tj (>) Tj ( Color.red is Color.blue) Tj T* (False) Tj T* (>) Tj (>) Tj (>) Tj ( Color.red is not Color.blue) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q - -endstream -endobj -60 0 obj -<< /Length 4521 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 741.0236 cm -q -BT 1 0 0 1 0 14 Tm 1.131647 Tw 12 TL /F1 10 Tf 0 0 0 rg (Ordered comparisons between enumeration values are ) Tj /F5 10 Tf (not ) Tj /F1 10 Tf (supported. Enum members are not integers) Tj T* 0 Tw (\(but see ) Tj 0 0 .501961 rg (IntEnum ) Tj 0 0 0 rg (below\):) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 671.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 60 re B* -Q -q -BT 1 0 0 1 0 38 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Color.red ) Tj (<) Tj ( Color.blue) Tj T* (Traceback \(most recent call last\):) Tj T* ( File ") Tj (<) Tj (stdin) Tj (>) Tj (", line 1, in ) Tj (<) Tj (module) Tj (>) Tj T* (TypeError: unorderable types: Color\(\) ) Tj (<) Tj ( Color\(\)) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 651.8236 cm -Q -q -1 0 0 1 62.69291 568.8236 cm -.960784 .960784 .862745 rg -n 0 83 469.8898 -83 re f* -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 6 57 Tm T* ET -q -1 0 0 1 16 52 cm -q -0 0 0 rg -BT 1 0 0 1 0 2.5 Tm /F6 12.5 Tf 15 TL (Warning) Tj T* ET -Q -Q -q -1 0 0 1 16 16 cm -q -BT 1 0 0 1 0 14 Tm .189398 Tw 12 TL /F1 10 Tf 0 0 0 rg (In Python 2 ) Tj /F5 10 Tf (everything ) Tj /F1 10 Tf (is ordered, even though the ordering may not make sense. If you want your) Tj T* 0 Tw (enumerations to have a sensible ordering check out the ) Tj 0 0 .501961 rg (OrderedEnum ) Tj 0 0 0 rg (recipe below.) Tj T* ET -Q -Q -q -1 J -1 j -.662745 .662745 .662745 RG -.5 w -n 0 83 m 469.8898 83 l S -n 0 0 m 469.8898 0 l S -n 0 0 m 0 83 l S -n 469.8898 0 m 469.8898 83 l S -Q -Q -q -1 0 0 1 62.69291 562.8236 cm -Q -q -1 0 0 1 62.69291 544.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Equality comparisons are defined though:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 451.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 84 re B* -Q -q -BT 1 0 0 1 0 62 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Color.blue == Color.red) Tj T* (False) Tj T* (>) Tj (>) Tj (>) Tj ( Color.blue != Color.red) Tj T* (True) Tj T* (>) Tj (>) Tj (>) Tj ( Color.blue == Color.blue) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 419.6236 cm -q -BT 1 0 0 1 0 14 Tm 2.582706 Tw 12 TL /F1 10 Tf 0 0 0 rg (Comparisons against non-enumeration values will always compare not equal \(again, ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (was) Tj T* 0 Tw (explicitly designed to behave differently, see below\):) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 374.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Color.blue == 2) Tj T* (False) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 341.4236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Allowed members and attributes of enumerations) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 287.4236 cm -q -BT 1 0 0 1 0 38 Tm 2.755697 Tw 12 TL /F1 10 Tf 0 0 0 rg (The examples above use integers for enumeration values. Using integers is short and handy \(and) Tj T* 0 Tw .241751 Tw (provided by default by the ) Tj 0 0 .501961 rg (Functional API) Tj 0 0 0 rg (\), but not strictly enforced. In the vast majority of use-cases, one) Tj T* 0 Tw .848221 Tw (doesn't care what the actual value of an enumeration is. But if the value ) Tj /F5 10 Tf (is ) Tj /F1 10 Tf (important, enumerations can) Tj T* 0 Tw (have arbitrary values.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 257.4236 cm -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .638735 Tw (Enumerations are Python classes, and can have methods and special methods as usual. If we have this) Tj T* 0 Tw (enumeration:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 80.22362 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 168 re B* -Q -q -BT 1 0 0 1 0 146 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Mood\(Enum\):) Tj T* (... funky = 1) Tj T* (... happy = 3) Tj T* (...) Tj T* (... def describe\(self\):) Tj T* (... # self is the member here) Tj T* (... return self.name, self.value) Tj T* (...) Tj T* (... def __str__\(self\):) Tj T* (... return 'my custom str! {0}'.format\(self.value\)) Tj T* (...) Tj T* (... @classmethod) Tj T* (... def favorite_mood\(cls\):) Tj T* ET -Q -Q -Q -Q -Q - -endstream -endobj -61 0 obj -<< /Length 4627 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 727.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (... # cls here is the enumeration) Tj T* (... return cls.happy) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 707.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Then:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 614.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 84 re B* -Q -q -BT 1 0 0 1 0 62 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Mood.favorite_mood\(\)) Tj T* (<) Tj (Mood.happy: 3) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Mood.happy.describe\(\)) Tj T* (\('happy', 3\)) Tj T* (>) Tj (>) Tj (>) Tj ( str\(Mood.funky\)) Tj T* ('my custom str! 1') Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 558.6236 cm -q -BT 1 0 0 1 0 38 Tm 3.14186 Tw 12 TL /F1 10 Tf 0 0 0 rg (The rules for what is allowed are as follows: _sunder_ names \(starting and ending with a single) Tj T* 0 Tw .310651 Tw (underscore\) are reserved by enum and cannot be used; all other attributes defined within an enumeration) Tj T* 0 Tw 2.199213 Tw (will become members of this enumeration, with the exception of ) Tj /F5 10 Tf (__dunder__ ) Tj /F1 10 Tf (names and descriptors) Tj T* 0 Tw (\(methods are also descriptors\).) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 540.6236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Note:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 534.6236 cm -Q -q -1 0 0 1 62.69291 510.6236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 0 2 Tm T* ET -q -1 0 0 1 20 0 cm -q -BT 1 0 0 1 0 14 Tm .979213 Tw 12 TL /F1 10 Tf 0 0 0 rg (If your enumeration defines ) Tj /F4 10 Tf 0 0 0 rg (__new__ ) Tj /F1 10 Tf 0 0 0 rg (and/or ) Tj /F4 10 Tf 0 0 0 rg (__init__ ) Tj /F1 10 Tf 0 0 0 rg (then whatever value\(s\) were given to the) Tj T* 0 Tw (enum member will be passed into those methods. See ) Tj 0 0 .501961 rg (Planet ) Tj 0 0 0 rg (for an example.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 510.6236 cm -Q -q -1 0 0 1 62.69291 477.6236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Restricted subclassing of enumerations) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 447.6236 cm -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .778735 Tw (Subclassing an enumeration is allowed only if the enumeration does not define any members. So this is) Tj T* 0 Tw (forbidden:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 366.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class MoreColor\(Color\):) Tj T* (... pink = 17) Tj T* (Traceback \(most recent call last\):) Tj T* (...) Tj T* (TypeError: Cannot extend enumerations) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 346.4236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (But this is allowed:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 229.2236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 108 re B* -Q -q -BT 1 0 0 1 0 86 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Foo\(Enum\):) Tj T* (... def some_behavior\(self\):) Tj T* (... pass) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( class Bar\(Foo\):) Tj T* (... happy = 1) Tj T* (... sad = 2) Tj T* (...) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 185.2236 cm -q -BT 1 0 0 1 0 26 Tm .127984 Tw 12 TL /F1 10 Tf 0 0 0 rg (Allowing subclassing of enums that define members would lead to a violation of some important invariants) Tj T* 0 Tw 1.889985 Tw (of types and instances. On the other hand, it makes sense to allow sharing some common behavior) Tj T* 0 Tw (between a group of enumerations. \(See ) Tj 0 0 .501961 rg (OrderedEnum ) Tj 0 0 0 rg (for an example.\)) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 152.2236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Pickling) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 134.2236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Enumerations can be pickled and unpickled:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 89.02362 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( from enum.test_enum import Fruit) Tj T* (>) Tj (>) Tj (>) Tj ( from pickle import dumps, loads) Tj T* ET -Q -Q -Q -Q -Q - -endstream -endobj -62 0 obj -<< /Length 5372 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 727.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -BT 1 0 0 1 0 14 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Fruit.tomato is loads\(dumps\(Fruit.tomato, 2\)\)) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 695.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 1.256457 Tw (The usual restrictions for pickling apply: picklable enums must be defined in the top level of a module,) Tj T* 0 Tw (since unpickling requires them to be importable from that module.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 677.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Note:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 671.8236 cm -Q -q -1 0 0 1 62.69291 647.8236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 0 2 Tm T* ET -q -1 0 0 1 20 0 cm -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .081163 Tw (With pickle protocol version 4 \(introduced in Python 3.4\) it is possible to easily pickle enums nested in) Tj T* 0 Tw (other classes.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 647.8236 cm -Q -q -1 0 0 1 62.69291 614.8236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Functional API) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 596.8236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (class is callable, providing the following functional API:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 467.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 120 re B* -Q -q -BT 1 0 0 1 0 98 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Animal = Enum\('Animal', 'ant bee cat dog'\)) Tj T* (>) Tj (>) Tj (>) Tj ( Animal) Tj T* (<) Tj (enum 'Animal') Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Animal.ant) Tj T* (<) Tj (Animal.ant: 1) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( Animal.ant.value) Tj T* (1) Tj T* (>) Tj (>) Tj (>) Tj ( list\(Animal\)) Tj T* ([) Tj (<) Tj (Animal.ant: 1) Tj (>) Tj (, ) Tj (<) Tj (Animal.bee: 2) Tj (>) Tj (, ) Tj (<) Tj (Animal.cat: 3) Tj (>) Tj (, ) Tj (<) Tj (Animal.dog: 4) Tj (>) Tj (]) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 435.6236 cm -q -BT 1 0 0 1 0 14 Tm .602209 Tw 12 TL /F1 10 Tf 0 0 0 rg (The semantics of this API resemble ) Tj /F4 10 Tf 0 0 0 rg (namedtuple) Tj /F1 10 Tf 0 0 0 rg (. The first argument of the call to ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (is the name of) Tj T* 0 Tw (the enumeration.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 369.6236 cm -q -BT 1 0 0 1 0 50 Tm 1.326412 Tw 12 TL /F1 10 Tf 0 0 0 rg (The second argument is the ) Tj /F5 10 Tf (source ) Tj /F1 10 Tf (of enumeration member names. It can be a whitespace-separated) Tj T* 0 Tw .993516 Tw (string of names, a sequence of names, a sequence of 2-tuples with key/value pairs, or a mapping \(e.g.) Tj T* 0 Tw 1.168555 Tw (dictionary\) of names to values. The last two options enable assigning arbitrary values to enumerations;) Tj T* 0 Tw .537485 Tw (the others auto-assign increasing integers starting with 1. A new class derived from ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (is returned. In) Tj T* 0 Tw (other words, the above assignment to ) Tj /F4 10 Tf 0 0 0 rg (Animal ) Tj /F1 10 Tf 0 0 0 rg (is equivalent to:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 288.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Animals\(Enum\):) Tj T* (... ant = 1) Tj T* (... bee = 2) Tj T* (... cat = 3) Tj T* (... dog = 4) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 232.4236 cm -q -0 0 0 rg -BT 1 0 0 1 0 38 Tm /F1 10 Tf 12 TL 1.239984 Tw (Pickling enums created with the functional API can be tricky as frame stack implementation details are) Tj T* 0 Tw .937132 Tw (used to try and figure out which module the enumeration is being created in \(e.g. it will fail if you use a) Tj T* 0 Tw 1.321163 Tw (utility function in separate module, and also may not work on IronPython or Jython\). The solution is to) Tj T* 0 Tw (specify the module name explicitly as follows:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 199.2236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 24 re B* -Q -q -BT 1 0 0 1 0 2 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( Animals = Enum\('Animals', 'ant bee cat dog', module=__name__\)) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 166.2236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Derived Enumerations) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 136.2236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (IntEnum) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 94.22362 cm -q -BT 1 0 0 1 0 26 Tm 1.99832 Tw 12 TL /F1 10 Tf 0 0 0 rg (A variation of ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (is provided which is also a subclass of ) Tj /F4 10 Tf 0 0 0 rg (int) Tj /F1 10 Tf 0 0 0 rg (. Members of an ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (can be) Tj T* 0 Tw .087984 Tw (compared to integers; by extension, integer enumerations of different types can also be compared to each) Tj T* 0 Tw (other:) Tj T* ET -Q -Q - -endstream -endobj -63 0 obj -<< /Length 4141 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 571.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 192 re B* -Q -q -BT 1 0 0 1 0 170 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( from enum import IntEnum) Tj T* (>) Tj (>) Tj (>) Tj ( class Shape\(IntEnum\):) Tj T* (... circle = 1) Tj T* (... square = 2) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( class Request\(IntEnum\):) Tj T* (... post = 1) Tj T* (... get = 2) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( Shape == 1) Tj T* (False) Tj T* (>) Tj (>) Tj (>) Tj ( Shape.circle == 1) Tj T* (True) Tj T* (>) Tj (>) Tj (>) Tj ( Shape.circle == Request.post) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 551.8236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (However, they still can't be compared to standard ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (enumerations:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 410.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 132 re B* -Q -q -BT 1 0 0 1 0 110 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Shape\(IntEnum\):) Tj T* (... circle = 1) Tj T* (... square = 2) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( class Color\(Enum\):) Tj T* (... red = 1) Tj T* (... green = 2) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( Shape.circle == Color.red) Tj T* (False) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 390.6236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (values behave like integers in other ways you'd expect:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 297.4236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 84 re B* -Q -q -BT 1 0 0 1 0 62 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( int\(Shape.circle\)) Tj T* (1) Tj T* (>) Tj (>) Tj (>) Tj ( ['a', 'b', 'c'][Shape.circle]) Tj T* ('b') Tj T* (>) Tj (>) Tj (>) Tj ( [i for i in range\(Shape.square\)]) Tj T* ([0, 1]) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 229.4236 cm -q -BT 1 0 0 1 0 50 Tm 1.197126 Tw 12 TL /F1 10 Tf 0 0 0 rg (For the vast majority of code, ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (is strongly recommended, since ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (breaks some semantic) Tj T* 0 Tw .793318 Tw (promises of an enumeration \(by being comparable to integers, and thus by transitivity to other unrelated) Tj T* 0 Tw .554985 Tw (enumerations\). It should be used only in special cases where there's no other choice; for example, when) Tj T* 0 Tw .746136 Tw (integer constants are replaced with enumerations and backwards compatibility is required with code that) Tj T* 0 Tw (still expects integers.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 199.4236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (Others) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 181.4236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (While ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (is part of the ) Tj /F4 10 Tf 0 0 0 rg (enum ) Tj /F1 10 Tf 0 0 0 rg (module, it would be very simple to implement independently:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 136.2236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 36 re B* -Q -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F4 10 Tf 12 TL (class IntEnum\(int, Enum\):) Tj T* ( pass) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 104.2236 cm -q -BT 1 0 0 1 0 14 Tm .361412 Tw 12 TL /F1 10 Tf 0 0 0 rg (This demonstrates how similar derived enumerations can be defined; for example a ) Tj /F4 10 Tf 0 0 0 rg (StrEnum ) Tj /F1 10 Tf 0 0 0 rg (that mixes) Tj T* 0 Tw (in ) Tj /F4 10 Tf 0 0 0 rg (str ) Tj /F1 10 Tf 0 0 0 rg (instead of ) Tj /F4 10 Tf 0 0 0 rg (int) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 86.22362 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Some rules:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 80.22362 cm -Q -q -1 0 0 1 62.69291 80.22362 cm -Q - -endstream -endobj -64 0 obj -<< /Length 7108 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 741.0236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 9 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 5.66 0 Td (1.) Tj T* -5.66 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 14 Tm .477318 Tw 12 TL /F1 10 Tf 0 0 0 rg (When subclassing ) Tj /F4 10 Tf 0 0 0 rg (Enum) Tj /F1 10 Tf 0 0 0 rg (, mix-in types must appear before ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (itself in the sequence of bases, as) Tj T* 0 Tw (in the ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (example above.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 735.0236 cm -Q -q -1 0 0 1 62.69291 699.0236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 21 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 5.66 0 Td (2.) Tj T* -5.66 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 26 Tm 1.147045 Tw 12 TL /F1 10 Tf 0 0 0 rg (While ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (can have members of any type, once you mix in an additional type, all the members) Tj T* 0 Tw .420574 Tw (must have values of that type, e.g. ) Tj /F4 10 Tf 0 0 0 rg (int ) Tj /F1 10 Tf 0 0 0 rg (above. This restriction does not apply to mix-ins which only) Tj T* 0 Tw (add methods and don't specify another data type such as ) Tj /F4 10 Tf 0 0 0 rg (int ) Tj /F1 10 Tf 0 0 0 rg (or ) Tj /F4 10 Tf 0 0 0 rg (str) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 693.0236 cm -Q -q -1 0 0 1 62.69291 669.0236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 9 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 5.66 0 Td (3.) Tj T* -5.66 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 14 Tm .100542 Tw 12 TL /F1 10 Tf 0 0 0 rg (When another data type is mixed in, the ) Tj /F4 10 Tf 0 0 0 rg (value ) Tj /F1 10 Tf 0 0 0 rg (attribute is ) Tj /F5 10 Tf (not the same ) Tj /F1 10 Tf (as the enum member itself,) Tj T* 0 Tw (although it is equivalant and will compare equal.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 663.0236 cm -Q -q -1 0 0 1 62.69291 609.0236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 39 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 5.66 0 Td (4.) Tj T* -5.66 0 Td ET -Q -Q -q -1 0 0 1 23 27 cm -q -BT 1 0 0 1 0 14 Tm 1.85998 Tw 12 TL /F1 10 Tf 0 0 0 rg (%-style formatting: ) Tj /F4 10 Tf 0 0 0 rg (%s ) Tj /F1 10 Tf 0 0 0 rg (and ) Tj /F4 10 Tf 0 0 0 rg (%r ) Tj /F1 10 Tf 0 0 0 rg (call ) Tj /F4 10 Tf 0 0 0 rg (Enum) Tj /F1 10 Tf 0 0 0 rg ('s ) Tj /F4 10 Tf 0 0 0 rg (__str__ ) Tj /F1 10 Tf 0 0 0 rg (and ) Tj /F4 10 Tf 0 0 0 rg (__repr__ ) Tj /F1 10 Tf 0 0 0 rg (respectively; other codes) Tj T* 0 Tw (\(such as ) Tj /F4 10 Tf 0 0 0 rg (%i ) Tj /F1 10 Tf 0 0 0 rg (or ) Tj /F4 10 Tf 0 0 0 rg (%h ) Tj /F1 10 Tf 0 0 0 rg (for IntEnum\) treat the enum member as its mixed-in type.) Tj T* ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 14 Tm .067045 Tw 12 TL /F1 10 Tf 0 0 0 rg (Note: Prior to Python 3.4 there is a bug in ) Tj /F4 10 Tf 0 0 0 rg (str) Tj /F1 10 Tf 0 0 0 rg ('s %-formatting: ) Tj /F4 10 Tf 0 0 0 rg (int ) Tj /F1 10 Tf 0 0 0 rg (subclasses are printed as strings) Tj T* 0 Tw (and not numbers when the ) Tj /F4 10 Tf 0 0 0 rg (%d) Tj /F1 10 Tf 0 0 0 rg (, ) Tj /F4 10 Tf 0 0 0 rg (%i) Tj /F1 10 Tf 0 0 0 rg (, or ) Tj /F4 10 Tf 0 0 0 rg (%u ) Tj /F1 10 Tf 0 0 0 rg (codes are used.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 603.0236 cm -Q -q -1 0 0 1 62.69291 579.0236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -q -1 0 0 1 6 9 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL 5.66 0 Td (5.) Tj T* -5.66 0 Td ET -Q -Q -q -1 0 0 1 23 -3 cm -q -BT 1 0 0 1 0 14 Tm 1.880751 Tw 12 TL /F4 10 Tf 0 0 0 rg (str.__format__ ) Tj /F1 10 Tf 0 0 0 rg (\(or ) Tj /F4 10 Tf 0 0 0 rg (format) Tj /F1 10 Tf 0 0 0 rg (\) will use the mixed-in type's ) Tj /F4 10 Tf 0 0 0 rg (__format__) Tj /F1 10 Tf 0 0 0 rg (. If the ) Tj /F4 10 Tf 0 0 0 rg (Enum) Tj /F1 10 Tf 0 0 0 rg ('s ) Tj /F4 10 Tf 0 0 0 rg (str ) Tj /F1 10 Tf 0 0 0 rg (or) Tj T* 0 Tw /F4 10 Tf 0 0 0 rg (repr ) Tj /F1 10 Tf 0 0 0 rg (is desired use the ) Tj /F4 10 Tf 0 0 0 rg (!s ) Tj /F1 10 Tf 0 0 0 rg (or ) Tj /F4 10 Tf 0 0 0 rg (!r) Tj /F1 10 Tf 0 0 0 rg ( ) Tj /F4 10 Tf 0 0 0 rg (str ) Tj /F1 10 Tf 0 0 0 rg (format codes.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 579.0236 cm -Q -q -1 0 0 1 62.69291 546.0236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Decorators) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 516.0236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (unique) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 486.0236 cm -q -BT 1 0 0 1 0 14 Tm .287251 Tw 12 TL /F1 10 Tf 0 0 0 rg (A ) Tj /F4 10 Tf 0 0 0 rg (class ) Tj /F1 10 Tf 0 0 0 rg (decorator specifically for enumerations. It searches an enumeration's ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (gathering) Tj T* 0 Tw (any aliases it finds; if any are found ) Tj /F4 10 Tf 0 0 0 rg (ValueError ) Tj /F1 10 Tf 0 0 0 rg (is raised with the details:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 368.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 108 re B* -Q -q -BT 1 0 0 1 0 86 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( @unique) Tj T* (... class NoDupes\(Enum\):) Tj T* (... first = 'one') Tj T* (... second = 'two') Tj T* (... third = 'two') Tj T* (Traceback \(most recent call last\):) Tj T* (...) Tj T* (ValueError: duplicate names found in ) Tj (<) Tj (enum 'NoDupes') Tj (>) Tj (: third -) Tj (>) Tj ( second) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 335.8236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (Interesting examples) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 293.8236 cm -q -BT 1 0 0 1 0 26 Tm .593735 Tw 12 TL /F1 10 Tf 0 0 0 rg (While ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (and ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (are expected to cover the majority of use-cases, they cannot cover them all.) Tj T* 0 Tw .897045 Tw (Here are recipes for some different types of enumerations that can be used directly, or as examples for) Tj T* 0 Tw (creating one's own.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 263.8236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (AutoNumber) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 245.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Avoids having to specify the value for each enumeration member:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 80.62362 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 156 re B* -Q -q -BT 1 0 0 1 0 134 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class AutoNumber\(Enum\):) Tj T* (... def __new__\(cls\):) Tj T* (... value = len\(cls.__members__\) + 1) Tj T* (... obj = object.__new__\(cls\)) Tj T* (... obj._value_ = value) Tj T* (... return obj) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( class Color\(AutoNumber\):) Tj T* (... __order__ = "red green blue" # only needed in 2.x) Tj T* (... red = \(\)) Tj T* (... green = \(\)) Tj T* (... blue = \(\)) Tj T* ET -Q -Q -Q -Q -Q - -endstream -endobj -65 0 obj -<< /Length 4158 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 715.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 48 re B* -Q -q -BT 1 0 0 1 0 26 Tm 12 TL /F4 10 Tf 0 0 0 rg (...) Tj T* (>) Tj (>) Tj (>) Tj ( Color.green.value == 2) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 695.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Note:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 689.8236 cm -Q -q -1 0 0 1 62.69291 653.8236 cm -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 0 2 Tm T* ET -q -1 0 0 1 20 0 cm -q -BT 1 0 0 1 0 26 Tm .144104 Tw 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F5 10 Tf 0 0 0 rg (__new__ ) Tj /F1 10 Tf 0 0 0 rg (method, if defined, is used during creation of the Enum members; it is then replaced by) Tj T* 0 Tw .799985 Tw (Enum's ) Tj /F5 10 Tf 0 0 0 rg (__new__ ) Tj /F1 10 Tf 0 0 0 rg (which is used after class creation for lookup of existing members. Due to the way) Tj T* 0 Tw (Enums are supposed to behave, there is no way to customize Enum's ) Tj /F5 10 Tf 0 0 0 rg (__new__) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET -Q -Q -q -Q -Q -q -1 0 0 1 62.69291 653.8236 cm -Q -q -1 0 0 1 62.69291 623.8236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (UniqueEnum) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 605.8236 cm -q -0 0 0 rg -BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Raises an error if a duplicate member name is found instead of creating an alias:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 368.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 228 re B* -Q -q -BT 1 0 0 1 0 206 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class UniqueEnum\(Enum\):) Tj T* (... def __init__\(self, *args\):) Tj T* (... cls = self.__class__) Tj T* (... if any\(self.value == e.value for e in cls\):) Tj T* (... a = self.name) Tj T* (... e = cls\(self.value\).name) Tj T* (... raise ValueError\() Tj T* (... "aliases not allowed in UniqueEnum: %r --) Tj (>) Tj ( %r") Tj T* (... % \(a, e\)\)) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( class Color\(UniqueEnum\):) Tj T* (... red = 1) Tj T* (... green = 2) Tj T* (... blue = 3) Tj T* (... grene = 2) Tj T* (Traceback \(most recent call last\):) Tj T* (...) Tj T* (ValueError: aliases not allowed in UniqueEnum: 'grene' --) Tj (>) Tj ( 'green') Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 338.6236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (OrderedEnum) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 308.6236 cm -q -BT 1 0 0 1 0 14 Tm 1.335984 Tw 12 TL /F1 10 Tf 0 0 0 rg (An ordered enumeration that is not based on ) Tj /F4 10 Tf 0 0 0 rg (IntEnum ) Tj /F1 10 Tf 0 0 0 rg (and so maintains the normal ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (invariants) Tj T* 0 Tw (\(such as not being comparable to other enumerations\):) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 83.42362 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 216 re B* -Q -q -BT 1 0 0 1 0 194 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class OrderedEnum\(Enum\):) Tj T* (... def __ge__\(self, other\):) Tj T* (... if self.__class__ is other.__class__:) Tj T* (... return self._value_ ) Tj (>) Tj (= other._value_) Tj T* (... return NotImplemented) Tj T* (... def __gt__\(self, other\):) Tj T* (... if self.__class__ is other.__class__:) Tj T* (... return self._value_ ) Tj (>) Tj ( other._value_) Tj T* (... return NotImplemented) Tj T* (... def __le__\(self, other\):) Tj T* (... if self.__class__ is other.__class__:) Tj T* (... return self._value_ ) Tj (<) Tj (= other._value_) Tj T* (... return NotImplemented) Tj T* (... def __lt__\(self, other\):) Tj T* (... if self.__class__ is other.__class__:) Tj T* (... return self._value_ ) Tj (<) Tj ( other._value_) Tj T* (... return NotImplemented) Tj T* ET -Q -Q -Q -Q -Q - -endstream -endobj -66 0 obj -<< /Length 4039 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 619.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 144 re B* -Q -q -BT 1 0 0 1 0 122 Tm 12 TL /F4 10 Tf 0 0 0 rg (...) Tj T* (>) Tj (>) Tj (>) Tj ( class Grade\(OrderedEnum\):) Tj T* (... __ordered__ = 'A B C D F') Tj T* (... A = 5) Tj T* (... B = 4) Tj T* (... C = 3) Tj T* (... D = 2) Tj T* (... F = 1) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( Grade.C ) Tj (<) Tj ( Grade.A) Tj T* (True) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 589.8236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (Planet) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 571.8236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If ) Tj /F4 10 Tf 0 0 0 rg (__new__ ) Tj /F1 10 Tf 0 0 0 rg (or ) Tj /F4 10 Tf 0 0 0 rg (__init__ ) Tj /F1 10 Tf 0 0 0 rg (is defined the value of the enum member will be passed to those methods:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 286.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 276 re B* -Q -q -BT 1 0 0 1 0 254 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class Planet\(Enum\):) Tj T* (... MERCURY = \(3.303e+23, 2.4397e6\)) Tj T* (... VENUS = \(4.869e+24, 6.0518e6\)) Tj T* (... EARTH = \(5.976e+24, 6.37814e6\)) Tj T* (... MARS = \(6.421e+23, 3.3972e6\)) Tj T* (... JUPITER = \(1.9e+27, 7.1492e7\)) Tj T* (... SATURN = \(5.688e+26, 6.0268e7\)) Tj T* (... URANUS = \(8.686e+25, 2.5559e7\)) Tj T* (... NEPTUNE = \(1.024e+26, 2.4746e7\)) Tj T* (... def __init__\(self, mass, radius\):) Tj T* (... self.mass = mass # in kilograms) Tj T* (... self.radius = radius # in meters) Tj T* (... @property) Tj T* (... def surface_gravity\(self\):) Tj T* (... # universal gravitational constant \(m3 kg-1 s-2\)) Tj T* (... G = 6.67300E-11) Tj T* (... return G * self.mass / \(self.radius * self.radius\)) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( Planet.EARTH.value) Tj T* (\(5.976e+24, 6378140.0\)) Tj T* (>) Tj (>) Tj (>) Tj ( Planet.EARTH.surface_gravity) Tj T* (9.802652743337129) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 253.6236 cm -q -BT 1 0 0 1 0 3.5 Tm 21 TL /F3 17.5 Tf 0 0 0 rg (How are Enums different?) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 223.6236 cm -q -0 0 0 rg -BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 2.090651 Tw (Enums have a custom metaclass that affects many aspects of both derived Enum classes and their) Tj T* 0 Tw (instances \(members\).) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 193.6236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (Enum Classes) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 127.6236 cm -q -BT 1 0 0 1 0 50 Tm 1.263615 Tw 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F4 10 Tf 0 0 0 rg (EnumMeta ) Tj /F1 10 Tf 0 0 0 rg (metaclass is responsible for providing the ) Tj /F4 10 Tf 0 0 0 rg (__contains__) Tj /F1 10 Tf 0 0 0 rg (, ) Tj /F4 10 Tf 0 0 0 rg (__dir__) Tj /F1 10 Tf 0 0 0 rg (, ) Tj /F4 10 Tf 0 0 0 rg (__iter__ ) Tj /F1 10 Tf 0 0 0 rg (and) Tj T* 0 Tw 2.264724 Tw (other methods that allow one to do things with an ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (class that fail on a typical class, such as) Tj T* 0 Tw 2.594147 Tw /F4 10 Tf 0 0 0 rg (list\(Color\) ) Tj /F1 10 Tf 0 0 0 rg (or ) Tj /F4 10 Tf 0 0 0 rg (some_var) Tj ( ) Tj (in) Tj ( ) Tj (Color) Tj /F1 10 Tf 0 0 0 rg (. ) Tj /F4 10 Tf 0 0 0 rg (EnumMeta ) Tj /F1 10 Tf 0 0 0 rg (is responsible for ensuring that various other) Tj T* 0 Tw 2.196905 Tw (methods on the final ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (class are correct \(such as ) Tj /F4 10 Tf 0 0 0 rg (__new__) Tj /F1 10 Tf 0 0 0 rg (, ) Tj /F4 10 Tf 0 0 0 rg (__getnewargs__) Tj /F1 10 Tf 0 0 0 rg (, ) Tj /F4 10 Tf 0 0 0 rg (__str__ ) Tj /F1 10 Tf 0 0 0 rg (and) Tj T* 0 Tw /F4 10 Tf 0 0 0 rg (__repr__) Tj /F1 10 Tf 0 0 0 rg (\).) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 115.6236 cm -Q - -endstream -endobj -67 0 obj -<< /Length 5453 >> -stream -1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET -q -1 0 0 1 62.69291 682.0236 cm -.960784 .960784 .862745 rg -n 0 83 469.8898 -83 re f* -0 0 0 rg -BT /F1 10 Tf 12 TL ET -BT 1 0 0 1 6 57 Tm T* ET -q -1 0 0 1 16 52 cm -q -0 0 0 rg -BT 1 0 0 1 0 2.5 Tm /F6 12.5 Tf 15 TL (Note) Tj T* ET -Q -Q -q -1 0 0 1 16 16 cm -q -BT 1 0 0 1 0 14 Tm .686654 Tw 12 TL /F4 10 Tf 0 0 0 rg (__dir__ ) Tj /F1 10 Tf 0 0 0 rg (is not changed in the Python 2 line as it messes up some of the decorators included in) Tj T* 0 Tw (the stdlib.) Tj T* ET -Q -Q -q -1 J -1 j -.662745 .662745 .662745 RG -.5 w -n 0 83 m 469.8898 83 l S -n 0 0 m 469.8898 0 l S -n 0 0 m 0 83 l S -n 469.8898 0 m 469.8898 83 l S -Q -Q -q -1 0 0 1 62.69291 676.0236 cm -Q -q -1 0 0 1 62.69291 646.0236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (Enum Members \(aka instances\)) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 604.0236 cm -q -BT 1 0 0 1 0 26 Tm .491984 Tw 12 TL /F1 10 Tf 0 0 0 rg (The most interesting thing about Enum members is that they are singletons. ) Tj /F4 10 Tf 0 0 0 rg (EnumMeta ) Tj /F1 10 Tf 0 0 0 rg (creates them all) Tj T* 0 Tw .084988 Tw (while it is creating the ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (class itself, and then puts a custom ) Tj /F4 10 Tf 0 0 0 rg (__new__ ) Tj /F1 10 Tf 0 0 0 rg (in place to ensure that no new) Tj T* 0 Tw (ones are ever instantiated by returning only the existing member instances.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 574.0236 cm -q -BT 1 0 0 1 0 3 Tm 18 TL /F3 15 Tf 0 0 0 rg (Finer Points) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 520.0236 cm -q -BT 1 0 0 1 0 38 Tm 5.488555 Tw 12 TL /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (members are instances of an ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (class, and even though they are accessible as) Tj T* 0 Tw 1.504147 Tw /F5 10 Tf 0 0 0 rg (EnumClass.member1.member2) Tj /F1 10 Tf 0 0 0 rg (, they should not be accessed directly from the member as that lookup) Tj T* 0 Tw .329985 Tw (may fail or, worse, return something besides the ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (member you were looking for \(changed in version) Tj T* 0 Tw (1.1.1\):) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 390.8236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 120 re B* -Q -q -BT 1 0 0 1 0 98 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( class FieldTypes\(Enum\):) Tj T* (... name = 1) Tj T* (... value = 2) Tj T* (... size = 3) Tj T* (...) Tj T* (>) Tj (>) Tj (>) Tj ( FieldTypes.value.size) Tj T* (<) Tj (FieldTypes.size: 3) Tj (>) Tj T* (>) Tj (>) Tj (>) Tj ( FieldTypes.size.value) Tj T* (3) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 370.8236 cm -q -BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (attribute is only available on the class.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 316.8236 cm -q -BT 1 0 0 1 0 38 Tm 1.374651 Tw 12 TL /F1 10 Tf 0 0 0 rg (In Python 3.x ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (is always an ) Tj /F4 10 Tf 0 0 0 rg (OrderedDict) Tj /F1 10 Tf 0 0 0 rg (, with the order being the definition order. In) Tj T* 0 Tw 3.009213 Tw (Python 2.7 ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (is an ) Tj /F4 10 Tf 0 0 0 rg (OrderedDict ) Tj /F1 10 Tf 0 0 0 rg (if ) Tj /F4 10 Tf 0 0 0 rg (__order__ ) Tj /F1 10 Tf 0 0 0 rg (was specified, and a plain ) Tj /F4 10 Tf 0 0 0 rg (dict) Tj T* 0 Tw 1.851318 Tw /F1 10 Tf 0 0 0 rg (otherwise. In all other Python 2.x versions ) Tj /F4 10 Tf 0 0 0 rg (__members__ ) Tj /F1 10 Tf 0 0 0 rg (is a plain ) Tj /F4 10 Tf 0 0 0 rg (dict ) Tj /F1 10 Tf 0 0 0 rg (even if ) Tj /F4 10 Tf 0 0 0 rg (__order__ ) Tj /F1 10 Tf 0 0 0 rg (was) Tj T* 0 Tw (specified as the ) Tj /F4 10 Tf 0 0 0 rg (OrderedDict ) Tj /F1 10 Tf 0 0 0 rg (type didn't exist yet.) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 286.8236 cm -q -BT 1 0 0 1 0 14 Tm .106654 Tw 12 TL /F1 10 Tf 0 0 0 rg (If you give your ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (subclass extra methods, like the ) Tj 0 0 .501961 rg (Planet ) Tj 0 0 0 rg (class above, those methods will show up in) Tj T* 0 Tw (a ) Tj /F5 10 Tf 0 0 0 rg (dir ) Tj /F1 10 Tf 0 0 0 rg (of the member, but not of the class:) Tj T* ET -Q -Q -q -1 0 0 1 62.69291 205.6236 cm -q -q -1 0 0 1 0 0 cm -q -1 0 0 1 6.6 6.6 cm -q -.662745 .662745 .662745 RG -.5 w -.960784 .960784 .862745 rg -n -6 -6 468.6898 72 re B* -Q -q -BT 1 0 0 1 0 50 Tm 12 TL /F4 10 Tf 0 0 0 rg (>) Tj (>) Tj (>) Tj ( dir\(Planet\)) Tj T* (['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS',) Tj T* ('VENUS', '__class__', '__doc__', '__members__', '__module__']) Tj T* (>) Tj (>) Tj (>) Tj ( dir\(Planet.EARTH\)) Tj T* (['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value']) Tj T* ET -Q -Q -Q -Q -Q -q -1 0 0 1 62.69291 161.6236 cm -q -BT 1 0 0 1 0 26 Tm .938935 Tw 12 TL /F1 10 Tf 0 0 0 rg (A ) Tj /F4 10 Tf 0 0 0 rg (__new__ ) Tj /F1 10 Tf 0 0 0 rg (method will only be used for the creation of the ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (members -- after that it is replaced.) Tj T* 0 Tw .949461 Tw (This means if you wish to change how ) Tj /F4 10 Tf 0 0 0 rg (Enum ) Tj /F1 10 Tf 0 0 0 rg (members are looked up you either have to write a helper) Tj T* 0 Tw (function or a ) Tj /F4 10 Tf 0 0 0 rg (classmethod) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET -Q -Q - -endstream -endobj -68 0 obj -<< /Nums [ 0 69 0 R 1 70 0 R 2 71 0 R 3 72 0 R 4 73 0 R - 5 74 0 R 6 75 0 R 7 76 0 R 8 77 0 R 9 78 0 R - 10 79 0 R 11 80 0 R ] >> -endobj -69 0 obj -<< /S /D /St 1 >> -endobj -70 0 obj -<< /S /D /St 2 >> -endobj -71 0 obj -<< /S /D /St 3 >> -endobj -72 0 obj -<< /S /D /St 4 >> -endobj -73 0 obj -<< /S /D /St 5 >> -endobj -74 0 obj -<< /S /D /St 6 >> -endobj -75 0 obj -<< /S /D /St 7 >> -endobj -76 0 obj -<< /S /D /St 8 >> -endobj -77 0 obj -<< /S /D /St 9 >> -endobj -78 0 obj -<< /S /D /St 10 >> -endobj -79 0 obj -<< /S /D /St 11 >> -endobj -80 0 obj -<< /S /D /St 12 >> -endobj -xref -0 81 -0000000000 65535 f -0000000075 00000 n -0000000160 00000 n -0000000270 00000 n -0000000383 00000 n -0000000498 00000 n -0000000606 00000 n -0000000777 00000 n -0000000948 00000 n -0000001066 00000 n -0000001237 00000 n -0000001477 00000 n -0000001687 00000 n -0000001897 00000 n -0000002107 00000 n -0000002279 00000 n -0000002402 00000 n -0000002574 00000 n -0000002746 00000 n -0000002989 00000 n -0000003161 00000 n -0000003333 00000 n -0000003569 00000 n -0000003779 00000 n -0000003989 00000 n -0000004199 00000 n -0000004409 00000 n -0000004619 00000 n -0000004791 00000 n -0000005020 00000 n -0000005129 00000 n -0000005373 00000 n -0000005451 00000 n -0000005571 00000 n -0000005705 00000 n -0000005886 00000 n -0000006039 00000 n -0000006168 00000 n -0000006332 00000 n -0000006488 00000 n -0000006614 00000 n -0000006746 00000 n -0000006924 00000 n -0000007036 00000 n -0000007147 00000 n -0000007315 00000 n -0000007413 00000 n -0000007591 00000 n -0000007706 00000 n -0000007834 00000 n -0000007963 00000 n -0000008074 00000 n -0000008243 00000 n -0000008360 00000 n -0000008508 00000 n -0000008625 00000 n -0000008771 00000 n -0000015286 00000 n -0000019517 00000 n -0000023365 00000 n -0000027828 00000 n -0000032406 00000 n -0000037090 00000 n -0000042519 00000 n -0000046717 00000 n -0000053882 00000 n -0000058097 00000 n -0000062193 00000 n -0000067703 00000 n -0000067856 00000 n -0000067893 00000 n -0000067930 00000 n -0000067967 00000 n -0000068004 00000 n -0000068041 00000 n -0000068078 00000 n -0000068115 00000 n -0000068152 00000 n -0000068189 00000 n -0000068227 00000 n -0000068265 00000 n -trailer -<< /ID - % ReportLab generated PDF document -- digest (http://www.reportlab.com) - [(<}|~gm\352\320\235=\001p\220v\224\336) (<}|~gm\352\320\235=\001p\220v\224\336)] - /Info 30 0 R /Root 29 0 R /Size 81 >> -startxref -68303 -%%EOF diff --git a/libs/enum2.7/doc/enum.rst b/libs/enum2.7/doc/enum.rst deleted file mode 100644 index 3afc23821..000000000 --- a/libs/enum2.7/doc/enum.rst +++ /dev/null @@ -1,735 +0,0 @@ -``enum`` --- support for enumerations -======================================== - -.. :synopsis: enumerations are sets of symbolic names bound to unique, constant - values. -.. :moduleauthor:: Ethan Furman <[email protected]> -.. :sectionauthor:: Barry Warsaw <[email protected]>, -.. :sectionauthor:: Eli Bendersky <[email protected]>, -.. :sectionauthor:: Ethan Furman <[email protected]> - ----------------- - -An enumeration is a set of symbolic names (members) bound to unique, constant -values. Within an enumeration, the members can be compared by identity, and -the enumeration itself can be iterated over. - - -Module Contents ---------------- - -This module defines two enumeration classes that can be used to define unique -sets of names and values: ``Enum`` and ``IntEnum``. It also defines -one decorator, ``unique``. - -``Enum`` - -Base class for creating enumerated constants. See section `Functional API`_ -for an alternate construction syntax. - -``IntEnum`` - -Base class for creating enumerated constants that are also subclasses of ``int``. - -``unique`` - -Enum class decorator that ensures only one name is bound to any one value. - - -Creating an Enum ----------------- - -Enumerations are created using the ``class`` syntax, which makes them -easy to read and write. An alternative creation method is described in -`Functional API`_. To define an enumeration, subclass ``Enum`` as -follows:: - - >>> from enum import Enum - >>> class Color(Enum): - ... red = 1 - ... green = 2 - ... blue = 3 - -Note: Nomenclature - - - The class ``Color`` is an *enumeration* (or *enum*) - - The attributes ``Color.red``, ``Color.green``, etc., are - *enumeration members* (or *enum members*). - - The enum members have *names* and *values* (the name of - ``Color.red`` is ``red``, the value of ``Color.blue`` is - ``3``, etc.) - -Note: - - Even though we use the ``class`` syntax to create Enums, Enums - are not normal Python classes. See `How are Enums different?`_ for - more details. - -Enumeration members have human readable string representations:: - - >>> print(Color.red) - Color.red - -...while their ``repr`` has more information:: - - >>> print(repr(Color.red)) - <Color.red: 1> - -The *type* of an enumeration member is the enumeration it belongs to:: - - >>> type(Color.red) - <enum 'Color'> - >>> isinstance(Color.green, Color) - True - >>> - -Enum members also have a property that contains just their item name:: - - >>> print(Color.red.name) - red - -Enumerations support iteration. In Python 3.x definition order is used; in -Python 2.x the definition order is not available, but class attribute -``__order__`` is supported; otherwise, value order is used:: - - >>> class Shake(Enum): - ... __order__ = 'vanilla chocolate cookies mint' # only needed in 2.x - ... vanilla = 7 - ... chocolate = 4 - ... cookies = 9 - ... mint = 3 - ... - >>> for shake in Shake: - ... print(shake) - ... - Shake.vanilla - Shake.chocolate - Shake.cookies - Shake.mint - -The ``__order__`` attribute is always removed, and in 3.x it is also ignored -(order is definition order); however, in the stdlib version it will be ignored -but not removed. - -Enumeration members are hashable, so they can be used in dictionaries and sets:: - - >>> apples = {} - >>> apples[Color.red] = 'red delicious' - >>> apples[Color.green] = 'granny smith' - >>> apples == {Color.red: 'red delicious', Color.green: 'granny smith'} - True - - -Programmatic access to enumeration members and their attributes ---------------------------------------------------------------- - -Sometimes it's useful to access members in enumerations programmatically (i.e. -situations where ``Color.red`` won't do because the exact color is not known -at program-writing time). ``Enum`` allows such access:: - - >>> Color(1) - <Color.red: 1> - >>> Color(3) - <Color.blue: 3> - -If you want to access enum members by *name*, use item access:: - - >>> Color['red'] - <Color.red: 1> - >>> Color['green'] - <Color.green: 2> - -If have an enum member and need its ``name`` or ``value``:: - - >>> member = Color.red - >>> member.name - 'red' - >>> member.value - 1 - - -Duplicating enum members and values ------------------------------------ - -Having two enum members (or any other attribute) with the same name is invalid; -in Python 3.x this would raise an error, but in Python 2.x the second member -simply overwrites the first:: - - >>> # python 2.x - >>> class Shape(Enum): - ... square = 2 - ... square = 3 - ... - >>> Shape.square - <Shape.square: 3> - - >>> # python 3.x - >>> class Shape(Enum): - ... square = 2 - ... square = 3 - Traceback (most recent call last): - ... - TypeError: Attempted to reuse key: 'square' - -However, two enum members are allowed to have the same value. Given two members -A and B with the same value (and A defined first), B is an alias to A. By-value -lookup of the value of A and B will return A. By-name lookup of B will also -return A:: - - >>> class Shape(Enum): - ... __order__ = 'square diamond circle alias_for_square' # only needed in 2.x - ... square = 2 - ... diamond = 1 - ... circle = 3 - ... alias_for_square = 2 - ... - >>> Shape.square - <Shape.square: 2> - >>> Shape.alias_for_square - <Shape.square: 2> - >>> Shape(2) - <Shape.square: 2> - - -Allowing aliases is not always desirable. ``unique`` can be used to ensure -that none exist in a particular enumeration:: - - >>> from enum import unique - >>> @unique - ... class Mistake(Enum): - ... __order__ = 'one two three four' # only needed in 2.x - ... one = 1 - ... two = 2 - ... three = 3 - ... four = 3 - Traceback (most recent call last): - ... - ValueError: duplicate names found in <enum 'Mistake'>: four -> three - -Iterating over the members of an enum does not provide the aliases:: - - >>> list(Shape) - [<Shape.square: 2>, <Shape.diamond: 1>, <Shape.circle: 3>] - -The special attribute ``__members__`` is a dictionary mapping names to members. -It includes all names defined in the enumeration, including the aliases:: - - >>> for name, member in sorted(Shape.__members__.items()): - ... name, member - ... - ('alias_for_square', <Shape.square: 2>) - ('circle', <Shape.circle: 3>) - ('diamond', <Shape.diamond: 1>) - ('square', <Shape.square: 2>) - -The ``__members__`` attribute can be used for detailed programmatic access to -the enumeration members. For example, finding all the aliases:: - - >>> [name for name, member in Shape.__members__.items() if member.name != name] - ['alias_for_square'] - -Comparisons ------------ - -Enumeration members are compared by identity:: - - >>> Color.red is Color.red - True - >>> Color.red is Color.blue - False - >>> Color.red is not Color.blue - True - -Ordered comparisons between enumeration values are *not* supported. Enum -members are not integers (but see `IntEnum`_ below):: - - >>> Color.red < Color.blue - Traceback (most recent call last): - File "<stdin>", line 1, in <module> - TypeError: unorderable types: Color() < Color() - -.. warning:: - - In Python 2 *everything* is ordered, even though the ordering may not - make sense. If you want your enumerations to have a sensible ordering - check out the `OrderedEnum`_ recipe below. - - -Equality comparisons are defined though:: - - >>> Color.blue == Color.red - False - >>> Color.blue != Color.red - True - >>> Color.blue == Color.blue - True - -Comparisons against non-enumeration values will always compare not equal -(again, ``IntEnum`` was explicitly designed to behave differently, see -below):: - - >>> Color.blue == 2 - False - - -Allowed members and attributes of enumerations ----------------------------------------------- - -The examples above use integers for enumeration values. Using integers is -short and handy (and provided by default by the `Functional API`_), but not -strictly enforced. In the vast majority of use-cases, one doesn't care what -the actual value of an enumeration is. But if the value *is* important, -enumerations can have arbitrary values. - -Enumerations are Python classes, and can have methods and special methods as -usual. If we have this enumeration:: - - >>> class Mood(Enum): - ... funky = 1 - ... happy = 3 - ... - ... def describe(self): - ... # self is the member here - ... return self.name, self.value - ... - ... def __str__(self): - ... return 'my custom str! {0}'.format(self.value) - ... - ... @classmethod - ... def favorite_mood(cls): - ... # cls here is the enumeration - ... return cls.happy - -Then:: - - >>> Mood.favorite_mood() - <Mood.happy: 3> - >>> Mood.happy.describe() - ('happy', 3) - >>> str(Mood.funky) - 'my custom str! 1' - -The rules for what is allowed are as follows: _sunder_ names (starting and -ending with a single underscore) are reserved by enum and cannot be used; -all other attributes defined within an enumeration will become members of this -enumeration, with the exception of *__dunder__* names and descriptors (methods -are also descriptors). - -Note: - - If your enumeration defines ``__new__`` and/or ``__init__`` then - whatever value(s) were given to the enum member will be passed into - those methods. See `Planet`_ for an example. - - -Restricted subclassing of enumerations --------------------------------------- - -Subclassing an enumeration is allowed only if the enumeration does not define -any members. So this is forbidden:: - - >>> class MoreColor(Color): - ... pink = 17 - Traceback (most recent call last): - ... - TypeError: Cannot extend enumerations - -But this is allowed:: - - >>> class Foo(Enum): - ... def some_behavior(self): - ... pass - ... - >>> class Bar(Foo): - ... happy = 1 - ... sad = 2 - ... - -Allowing subclassing of enums that define members would lead to a violation of -some important invariants of types and instances. On the other hand, it makes -sense to allow sharing some common behavior between a group of enumerations. -(See `OrderedEnum`_ for an example.) - - -Pickling --------- - -Enumerations can be pickled and unpickled:: - - >>> from enum.test_enum import Fruit - >>> from pickle import dumps, loads - >>> Fruit.tomato is loads(dumps(Fruit.tomato, 2)) - True - -The usual restrictions for pickling apply: picklable enums must be defined in -the top level of a module, since unpickling requires them to be importable -from that module. - -Note: - - With pickle protocol version 4 (introduced in Python 3.4) it is possible - to easily pickle enums nested in other classes. - - - -Functional API --------------- - -The ``Enum`` class is callable, providing the following functional API:: - - >>> Animal = Enum('Animal', 'ant bee cat dog') - >>> Animal - <enum 'Animal'> - >>> Animal.ant - <Animal.ant: 1> - >>> Animal.ant.value - 1 - >>> list(Animal) - [<Animal.ant: 1>, <Animal.bee: 2>, <Animal.cat: 3>, <Animal.dog: 4>] - -The semantics of this API resemble ``namedtuple``. The first argument -of the call to ``Enum`` is the name of the enumeration. - -The second argument is the *source* of enumeration member names. It can be a -whitespace-separated string of names, a sequence of names, a sequence of -2-tuples with key/value pairs, or a mapping (e.g. dictionary) of names to -values. The last two options enable assigning arbitrary values to -enumerations; the others auto-assign increasing integers starting with 1. A -new class derived from ``Enum`` is returned. In other words, the above -assignment to ``Animal`` is equivalent to:: - - >>> class Animals(Enum): - ... ant = 1 - ... bee = 2 - ... cat = 3 - ... dog = 4 - -Pickling enums created with the functional API can be tricky as frame stack -implementation details are used to try and figure out which module the -enumeration is being created in (e.g. it will fail if you use a utility -function in separate module, and also may not work on IronPython or Jython). -The solution is to specify the module name explicitly as follows:: - - >>> Animals = Enum('Animals', 'ant bee cat dog', module=__name__) - -Derived Enumerations --------------------- - -IntEnum -^^^^^^^ - -A variation of ``Enum`` is provided which is also a subclass of -``int``. Members of an ``IntEnum`` can be compared to integers; -by extension, integer enumerations of different types can also be compared -to each other:: - - >>> from enum import IntEnum - >>> class Shape(IntEnum): - ... circle = 1 - ... square = 2 - ... - >>> class Request(IntEnum): - ... post = 1 - ... get = 2 - ... - >>> Shape == 1 - False - >>> Shape.circle == 1 - True - >>> Shape.circle == Request.post - True - -However, they still can't be compared to standard ``Enum`` enumerations:: - - >>> class Shape(IntEnum): - ... circle = 1 - ... square = 2 - ... - >>> class Color(Enum): - ... red = 1 - ... green = 2 - ... - >>> Shape.circle == Color.red - False - -``IntEnum`` values behave like integers in other ways you'd expect:: - - >>> int(Shape.circle) - 1 - >>> ['a', 'b', 'c'][Shape.circle] - 'b' - >>> [i for i in range(Shape.square)] - [0, 1] - -For the vast majority of code, ``Enum`` is strongly recommended, -since ``IntEnum`` breaks some semantic promises of an enumeration (by -being comparable to integers, and thus by transitivity to other -unrelated enumerations). It should be used only in special cases where -there's no other choice; for example, when integer constants are -replaced with enumerations and backwards compatibility is required with code -that still expects integers. - - -Others -^^^^^^ - -While ``IntEnum`` is part of the ``enum`` module, it would be very -simple to implement independently:: - - class IntEnum(int, Enum): - pass - -This demonstrates how similar derived enumerations can be defined; for example -a ``StrEnum`` that mixes in ``str`` instead of ``int``. - -Some rules: - -1. When subclassing ``Enum``, mix-in types must appear before - ``Enum`` itself in the sequence of bases, as in the ``IntEnum`` - example above. -2. While ``Enum`` can have members of any type, once you mix in an - additional type, all the members must have values of that type, e.g. - ``int`` above. This restriction does not apply to mix-ins which only - add methods and don't specify another data type such as ``int`` or - ``str``. -3. When another data type is mixed in, the ``value`` attribute is *not the - same* as the enum member itself, although it is equivalant and will compare - equal. -4. %-style formatting: ``%s`` and ``%r`` call ``Enum``'s ``__str__`` and - ``__repr__`` respectively; other codes (such as ``%i`` or ``%h`` for - IntEnum) treat the enum member as its mixed-in type. - - Note: Prior to Python 3.4 there is a bug in ``str``'s %-formatting: ``int`` - subclasses are printed as strings and not numbers when the ``%d``, ``%i``, - or ``%u`` codes are used. -5. ``str.__format__`` (or ``format``) will use the mixed-in - type's ``__format__``. If the ``Enum``'s ``str`` or - ``repr`` is desired use the ``!s`` or ``!r`` ``str`` format codes. - - -Decorators ----------- - -unique -^^^^^^ - -A ``class`` decorator specifically for enumerations. It searches an -enumeration's ``__members__`` gathering any aliases it finds; if any are -found ``ValueError`` is raised with the details:: - - >>> @unique - ... class NoDupes(Enum): - ... first = 'one' - ... second = 'two' - ... third = 'two' - Traceback (most recent call last): - ... - ValueError: duplicate names found in <enum 'NoDupes'>: third -> second - - -Interesting examples --------------------- - -While ``Enum`` and ``IntEnum`` are expected to cover the majority of -use-cases, they cannot cover them all. Here are recipes for some different -types of enumerations that can be used directly, or as examples for creating -one's own. - - -AutoNumber -^^^^^^^^^^ - -Avoids having to specify the value for each enumeration member:: - - >>> class AutoNumber(Enum): - ... def __new__(cls): - ... value = len(cls.__members__) + 1 - ... obj = object.__new__(cls) - ... obj._value_ = value - ... return obj - ... - >>> class Color(AutoNumber): - ... __order__ = "red green blue" # only needed in 2.x - ... red = () - ... green = () - ... blue = () - ... - >>> Color.green.value == 2 - True - -Note: - - The `__new__` method, if defined, is used during creation of the Enum - members; it is then replaced by Enum's `__new__` which is used after - class creation for lookup of existing members. Due to the way Enums are - supposed to behave, there is no way to customize Enum's `__new__`. - - -UniqueEnum -^^^^^^^^^^ - -Raises an error if a duplicate member name is found instead of creating an -alias:: - - >>> class UniqueEnum(Enum): - ... def __init__(self, *args): - ... cls = self.__class__ - ... if any(self.value == e.value for e in cls): - ... a = self.name - ... e = cls(self.value).name - ... raise ValueError( - ... "aliases not allowed in UniqueEnum: %r --> %r" - ... % (a, e)) - ... - >>> class Color(UniqueEnum): - ... red = 1 - ... green = 2 - ... blue = 3 - ... grene = 2 - Traceback (most recent call last): - ... - ValueError: aliases not allowed in UniqueEnum: 'grene' --> 'green' - - -OrderedEnum -^^^^^^^^^^^ - -An ordered enumeration that is not based on ``IntEnum`` and so maintains -the normal ``Enum`` invariants (such as not being comparable to other -enumerations):: - - >>> class OrderedEnum(Enum): - ... def __ge__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ >= other._value_ - ... return NotImplemented - ... def __gt__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ > other._value_ - ... return NotImplemented - ... def __le__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ <= other._value_ - ... return NotImplemented - ... def __lt__(self, other): - ... if self.__class__ is other.__class__: - ... return self._value_ < other._value_ - ... return NotImplemented - ... - >>> class Grade(OrderedEnum): - ... __ordered__ = 'A B C D F' - ... A = 5 - ... B = 4 - ... C = 3 - ... D = 2 - ... F = 1 - ... - >>> Grade.C < Grade.A - True - - -Planet -^^^^^^ - -If ``__new__`` or ``__init__`` is defined the value of the enum member -will be passed to those methods:: - - >>> class Planet(Enum): - ... MERCURY = (3.303e+23, 2.4397e6) - ... VENUS = (4.869e+24, 6.0518e6) - ... EARTH = (5.976e+24, 6.37814e6) - ... MARS = (6.421e+23, 3.3972e6) - ... JUPITER = (1.9e+27, 7.1492e7) - ... SATURN = (5.688e+26, 6.0268e7) - ... URANUS = (8.686e+25, 2.5559e7) - ... NEPTUNE = (1.024e+26, 2.4746e7) - ... def __init__(self, mass, radius): - ... self.mass = mass # in kilograms - ... self.radius = radius # in meters - ... @property - ... def surface_gravity(self): - ... # universal gravitational constant (m3 kg-1 s-2) - ... G = 6.67300E-11 - ... return G * self.mass / (self.radius * self.radius) - ... - >>> Planet.EARTH.value - (5.976e+24, 6378140.0) - >>> Planet.EARTH.surface_gravity - 9.802652743337129 - - -How are Enums different? ------------------------- - -Enums have a custom metaclass that affects many aspects of both derived Enum -classes and their instances (members). - - -Enum Classes -^^^^^^^^^^^^ - -The ``EnumMeta`` metaclass is responsible for providing the -``__contains__``, ``__dir__``, ``__iter__`` and other methods that -allow one to do things with an ``Enum`` class that fail on a typical -class, such as ``list(Color)`` or ``some_var in Color``. ``EnumMeta`` is -responsible for ensuring that various other methods on the final ``Enum`` -class are correct (such as ``__new__``, ``__getnewargs__``, -``__str__`` and ``__repr__``). - -.. note:: - - ``__dir__`` is not changed in the Python 2 line as it messes up some - of the decorators included in the stdlib. - - -Enum Members (aka instances) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The most interesting thing about Enum members is that they are singletons. -``EnumMeta`` creates them all while it is creating the ``Enum`` -class itself, and then puts a custom ``__new__`` in place to ensure -that no new ones are ever instantiated by returning only the existing -member instances. - - -Finer Points -^^^^^^^^^^^^ - -``Enum`` members are instances of an ``Enum`` class, and even though they -are accessible as `EnumClass.member1.member2`, they should not be -accessed directly from the member as that lookup may fail or, worse, -return something besides the ``Enum`` member you were looking for -(changed in version 1.1.1):: - - >>> class FieldTypes(Enum): - ... name = 1 - ... value = 2 - ... size = 3 - ... - >>> FieldTypes.value.size - <FieldTypes.size: 3> - >>> FieldTypes.size.value - 3 - -The ``__members__`` attribute is only available on the class. - -In Python 3.x ``__members__`` is always an ``OrderedDict``, with the order being -the definition order. In Python 2.7 ``__members__`` is an ``OrderedDict`` if -``__order__`` was specified, and a plain ``dict`` otherwise. In all other Python -2.x versions ``__members__`` is a plain ``dict`` even if ``__order__`` was specified -as the ``OrderedDict`` type didn't exist yet. - -If you give your ``Enum`` subclass extra methods, like the `Planet`_ -class above, those methods will show up in a `dir` of the member, -but not of the class:: - - >>> dir(Planet) - ['EARTH', 'JUPITER', 'MARS', 'MERCURY', 'NEPTUNE', 'SATURN', 'URANUS', - 'VENUS', '__class__', '__doc__', '__members__', '__module__'] - >>> dir(Planet.EARTH) - ['__class__', '__doc__', '__module__', 'name', 'surface_gravity', 'value'] - -A ``__new__`` method will only be used for the creation of the -``Enum`` members -- after that it is replaced. This means if you wish to -change how ``Enum`` members are looked up you either have to write a -helper function or a ``classmethod``. diff --git a/libs/enum2.7/test.py b/libs/enum2.7/test.py deleted file mode 100644 index d9edfaee4..000000000 --- a/libs/enum2.7/test.py +++ /dev/null @@ -1,1820 +0,0 @@ -from pickle import dumps, loads, PicklingError, HIGHEST_PROTOCOL -import sys -import unittest -pyver = float('%s.%s' % sys.version_info[:2]) -if pyver < 2.5: - sys.path.insert(0, '.') -import enum -from enum import Enum, IntEnum, unique, EnumMeta - -if pyver < 2.6: - from __builtin__ import enumerate as bltin_enumerate - def enumerate(thing, start=0): - result = [] - for i, item in bltin_enumerate(thing): - i = i + start - result.append((i, item)) - return result - -try: - any -except NameError: - def any(iterable): - for element in iterable: - if element: - return True - return False - -try: - unicode -except NameError: - unicode = str - -try: - from collections import OrderedDict -except ImportError: - OrderedDict = None - -# for pickle tests -try: - class Stooges(Enum): - LARRY = 1 - CURLY = 2 - MOE = 3 -except Exception: - Stooges = sys.exc_info()[1] - -try: - class IntStooges(int, Enum): - LARRY = 1 - CURLY = 2 - MOE = 3 -except Exception: - IntStooges = sys.exc_info()[1] - -try: - class FloatStooges(float, Enum): - LARRY = 1.39 - CURLY = 2.72 - MOE = 3.142596 -except Exception: - FloatStooges = sys.exc_info()[1] - -# for pickle test and subclass tests -try: - class StrEnum(str, Enum): - 'accepts only string values' - class Name(StrEnum): - BDFL = 'Guido van Rossum' - FLUFL = 'Barry Warsaw' -except Exception: - Name = sys.exc_info()[1] - -try: - Question = Enum('Question', 'who what when where why', module=__name__) -except Exception: - Question = sys.exc_info()[1] - -try: - Answer = Enum('Answer', 'him this then there because') -except Exception: - Answer = sys.exc_info()[1] - -try: - Theory = Enum('Theory', 'rule law supposition', qualname='spanish_inquisition') -except Exception: - Theory = sys.exc_info()[1] - -# for doctests -try: - class Fruit(Enum): - tomato = 1 - banana = 2 - cherry = 3 -except Exception: - pass - -def test_pickle_dump_load(assertion, source, target=None, - protocol=(0, HIGHEST_PROTOCOL)): - start, stop = protocol - failures = [] - for protocol in range(start, stop+1): - try: - if target is None: - assertion(loads(dumps(source, protocol=protocol)) is source) - else: - assertion(loads(dumps(source, protocol=protocol)), target) - except Exception: - exc, tb = sys.exc_info()[1:] - failures.append('%2d: %s' %(protocol, exc)) - if failures: - raise ValueError('Failed with protocols: %s' % ', '.join(failures)) - -def test_pickle_exception(assertion, exception, obj, - protocol=(0, HIGHEST_PROTOCOL)): - start, stop = protocol - failures = [] - for protocol in range(start, stop+1): - try: - assertion(exception, dumps, obj, protocol=protocol) - except Exception: - exc = sys.exc_info()[1] - failures.append('%d: %s %s' % (protocol, exc.__class__.__name__, exc)) - if failures: - raise ValueError('Failed with protocols: %s' % ', '.join(failures)) - - -class TestHelpers(unittest.TestCase): - # _is_descriptor, _is_sunder, _is_dunder - - def test_is_descriptor(self): - class foo: - pass - for attr in ('__get__','__set__','__delete__'): - obj = foo() - self.assertFalse(enum._is_descriptor(obj)) - setattr(obj, attr, 1) - self.assertTrue(enum._is_descriptor(obj)) - - def test_is_sunder(self): - for s in ('_a_', '_aa_'): - self.assertTrue(enum._is_sunder(s)) - - for s in ('a', 'a_', '_a', '__a', 'a__', '__a__', '_a__', '__a_', '_', - '__', '___', '____', '_____',): - self.assertFalse(enum._is_sunder(s)) - - def test_is_dunder(self): - for s in ('__a__', '__aa__'): - self.assertTrue(enum._is_dunder(s)) - for s in ('a', 'a_', '_a', '__a', 'a__', '_a_', '_a__', '__a_', '_', - '__', '___', '____', '_____',): - self.assertFalse(enum._is_dunder(s)) - - -class TestEnum(unittest.TestCase): - def setUp(self): - class Season(Enum): - SPRING = 1 - SUMMER = 2 - AUTUMN = 3 - WINTER = 4 - self.Season = Season - - class Konstants(float, Enum): - E = 2.7182818 - PI = 3.1415926 - TAU = 2 * PI - self.Konstants = Konstants - - class Grades(IntEnum): - A = 5 - B = 4 - C = 3 - D = 2 - F = 0 - self.Grades = Grades - - class Directional(str, Enum): - EAST = 'east' - WEST = 'west' - NORTH = 'north' - SOUTH = 'south' - self.Directional = Directional - - from datetime import date - class Holiday(date, Enum): - NEW_YEAR = 2013, 1, 1 - IDES_OF_MARCH = 2013, 3, 15 - self.Holiday = Holiday - - if pyver >= 3.0: # do not specify custom `dir` on previous versions - def test_dir_on_class(self): - Season = self.Season - self.assertEqual( - set(dir(Season)), - set(['__class__', '__doc__', '__members__', '__module__', - 'SPRING', 'SUMMER', 'AUTUMN', 'WINTER']), - ) - - def test_dir_on_item(self): - Season = self.Season - self.assertEqual( - set(dir(Season.WINTER)), - set(['__class__', '__doc__', '__module__', 'name', 'value']), - ) - - def test_dir_with_added_behavior(self): - class Test(Enum): - this = 'that' - these = 'those' - def wowser(self): - return ("Wowser! I'm %s!" % self.name) - self.assertEqual( - set(dir(Test)), - set(['__class__', '__doc__', '__members__', '__module__', 'this', 'these']), - ) - self.assertEqual( - set(dir(Test.this)), - set(['__class__', '__doc__', '__module__', 'name', 'value', 'wowser']), - ) - - def test_dir_on_sub_with_behavior_on_super(self): - # see issue22506 - class SuperEnum(Enum): - def invisible(self): - return "did you see me?" - class SubEnum(SuperEnum): - sample = 5 - self.assertEqual( - set(dir(SubEnum.sample)), - set(['__class__', '__doc__', '__module__', 'name', 'value', 'invisible']), - ) - - if pyver >= 2.7: # OrderedDict first available here - def test_members_is_ordereddict_if_ordered(self): - class Ordered(Enum): - __order__ = 'first second third' - first = 'bippity' - second = 'boppity' - third = 'boo' - self.assertTrue(type(Ordered.__members__) is OrderedDict) - - def test_members_is_ordereddict_if_not_ordered(self): - class Unordered(Enum): - this = 'that' - these = 'those' - self.assertTrue(type(Unordered.__members__) is OrderedDict) - - if pyver >= 3.0: # all objects are ordered in Python 2.x - def test_members_is_always_ordered(self): - class AlwaysOrdered(Enum): - first = 1 - second = 2 - third = 3 - self.assertTrue(type(AlwaysOrdered.__members__) is OrderedDict) - - def test_comparisons(self): - def bad_compare(): - Season.SPRING > 4 - Season = self.Season - self.assertNotEqual(Season.SPRING, 1) - self.assertRaises(TypeError, bad_compare) - - class Part(Enum): - SPRING = 1 - CLIP = 2 - BARREL = 3 - - self.assertNotEqual(Season.SPRING, Part.SPRING) - def bad_compare(): - Season.SPRING < Part.CLIP - self.assertRaises(TypeError, bad_compare) - - def test_enum_in_enum_out(self): - Season = self.Season - self.assertTrue(Season(Season.WINTER) is Season.WINTER) - - def test_enum_value(self): - Season = self.Season - self.assertEqual(Season.SPRING.value, 1) - - def test_intenum_value(self): - self.assertEqual(IntStooges.CURLY.value, 2) - - def test_enum(self): - Season = self.Season - lst = list(Season) - self.assertEqual(len(lst), len(Season)) - self.assertEqual(len(Season), 4, Season) - self.assertEqual( - [Season.SPRING, Season.SUMMER, Season.AUTUMN, Season.WINTER], lst) - - for i, season in enumerate('SPRING SUMMER AUTUMN WINTER'.split()): - i += 1 - e = Season(i) - self.assertEqual(e, getattr(Season, season)) - self.assertEqual(e.value, i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, season) - self.assertTrue(e in Season) - self.assertTrue(type(e) is Season) - self.assertTrue(isinstance(e, Season)) - self.assertEqual(str(e), 'Season.' + season) - self.assertEqual( - repr(e), - '<Season.%s: %s>' % (season, i), - ) - - def test_value_name(self): - Season = self.Season - self.assertEqual(Season.SPRING.name, 'SPRING') - self.assertEqual(Season.SPRING.value, 1) - def set_name(obj, new_value): - obj.name = new_value - def set_value(obj, new_value): - obj.value = new_value - self.assertRaises(AttributeError, set_name, Season.SPRING, 'invierno', ) - self.assertRaises(AttributeError, set_value, Season.SPRING, 2) - - def test_attribute_deletion(self): - class Season(Enum): - SPRING = 1 - SUMMER = 2 - AUTUMN = 3 - WINTER = 4 - - def spam(cls): - pass - - self.assertTrue(hasattr(Season, 'spam')) - del Season.spam - self.assertFalse(hasattr(Season, 'spam')) - - self.assertRaises(AttributeError, delattr, Season, 'SPRING') - self.assertRaises(AttributeError, delattr, Season, 'DRY') - self.assertRaises(AttributeError, delattr, Season.SPRING, 'name') - - def test_bool_of_class(self): - class Empty(Enum): - pass - self.assertTrue(bool(Empty)) - - def test_bool_of_member(self): - class Count(Enum): - zero = 0 - one = 1 - two = 2 - for member in Count: - self.assertTrue(bool(member)) - - def test_invalid_names(self): - def create_bad_class_1(): - class Wrong(Enum): - mro = 9 - def create_bad_class_2(): - class Wrong(Enum): - _reserved_ = 3 - self.assertRaises(ValueError, create_bad_class_1) - self.assertRaises(ValueError, create_bad_class_2) - - def test_contains(self): - Season = self.Season - self.assertTrue(Season.AUTUMN in Season) - self.assertTrue(3 not in Season) - - val = Season(3) - self.assertTrue(val in Season) - - class OtherEnum(Enum): - one = 1; two = 2 - self.assertTrue(OtherEnum.two not in Season) - - if pyver >= 2.6: # when `format` came into being - - def test_format_enum(self): - Season = self.Season - self.assertEqual('{0}'.format(Season.SPRING), - '{0}'.format(str(Season.SPRING))) - self.assertEqual( '{0:}'.format(Season.SPRING), - '{0:}'.format(str(Season.SPRING))) - self.assertEqual('{0:20}'.format(Season.SPRING), - '{0:20}'.format(str(Season.SPRING))) - self.assertEqual('{0:^20}'.format(Season.SPRING), - '{0:^20}'.format(str(Season.SPRING))) - self.assertEqual('{0:>20}'.format(Season.SPRING), - '{0:>20}'.format(str(Season.SPRING))) - self.assertEqual('{0:<20}'.format(Season.SPRING), - '{0:<20}'.format(str(Season.SPRING))) - - def test_format_enum_custom(self): - class TestFloat(float, Enum): - one = 1.0 - two = 2.0 - def __format__(self, spec): - return 'TestFloat success!' - self.assertEqual('{0}'.format(TestFloat.one), 'TestFloat success!') - - def assertFormatIsValue(self, spec, member): - self.assertEqual(spec.format(member), spec.format(member.value)) - - def test_format_enum_date(self): - Holiday = self.Holiday - self.assertFormatIsValue('{0}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:^20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:>20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:<20}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:%Y %m}', Holiday.IDES_OF_MARCH) - self.assertFormatIsValue('{0:%Y %m %M:00}', Holiday.IDES_OF_MARCH) - - def test_format_enum_float(self): - Konstants = self.Konstants - self.assertFormatIsValue('{0}', Konstants.TAU) - self.assertFormatIsValue('{0:}', Konstants.TAU) - self.assertFormatIsValue('{0:20}', Konstants.TAU) - self.assertFormatIsValue('{0:^20}', Konstants.TAU) - self.assertFormatIsValue('{0:>20}', Konstants.TAU) - self.assertFormatIsValue('{0:<20}', Konstants.TAU) - self.assertFormatIsValue('{0:n}', Konstants.TAU) - self.assertFormatIsValue('{0:5.2}', Konstants.TAU) - self.assertFormatIsValue('{0:f}', Konstants.TAU) - - def test_format_enum_int(self): - Grades = self.Grades - self.assertFormatIsValue('{0}', Grades.C) - self.assertFormatIsValue('{0:}', Grades.C) - self.assertFormatIsValue('{0:20}', Grades.C) - self.assertFormatIsValue('{0:^20}', Grades.C) - self.assertFormatIsValue('{0:>20}', Grades.C) - self.assertFormatIsValue('{0:<20}', Grades.C) - self.assertFormatIsValue('{0:+}', Grades.C) - self.assertFormatIsValue('{0:08X}', Grades.C) - self.assertFormatIsValue('{0:b}', Grades.C) - - def test_format_enum_str(self): - Directional = self.Directional - self.assertFormatIsValue('{0}', Directional.WEST) - self.assertFormatIsValue('{0:}', Directional.WEST) - self.assertFormatIsValue('{0:20}', Directional.WEST) - self.assertFormatIsValue('{0:^20}', Directional.WEST) - self.assertFormatIsValue('{0:>20}', Directional.WEST) - self.assertFormatIsValue('{0:<20}', Directional.WEST) - - def test_hash(self): - Season = self.Season - dates = {} - dates[Season.WINTER] = '1225' - dates[Season.SPRING] = '0315' - dates[Season.SUMMER] = '0704' - dates[Season.AUTUMN] = '1031' - self.assertEqual(dates[Season.AUTUMN], '1031') - - def test_enum_duplicates(self): - _order_ = "SPRING SUMMER AUTUMN WINTER" - class Season(Enum): - SPRING = 1 - SUMMER = 2 - AUTUMN = FALL = 3 - WINTER = 4 - ANOTHER_SPRING = 1 - lst = list(Season) - self.assertEqual( - lst, - [Season.SPRING, Season.SUMMER, - Season.AUTUMN, Season.WINTER, - ]) - self.assertTrue(Season.FALL is Season.AUTUMN) - self.assertEqual(Season.FALL.value, 3) - self.assertEqual(Season.AUTUMN.value, 3) - self.assertTrue(Season(3) is Season.AUTUMN) - self.assertTrue(Season(1) is Season.SPRING) - self.assertEqual(Season.FALL.name, 'AUTUMN') - self.assertEqual( - set([k for k,v in Season.__members__.items() if v.name != k]), - set(['FALL', 'ANOTHER_SPRING']), - ) - - if pyver >= 3.0: - cls = vars() - result = {'Enum':Enum} - exec("""def test_duplicate_name(self): - with self.assertRaises(TypeError): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - red = 4 - - with self.assertRaises(TypeError): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - def red(self): - return 'red' - - with self.assertRaises(TypeError): - class Color(Enum): - @property - - def red(self): - return 'redder' - red = 1 - green = 2 - blue = 3""", - result) - cls['test_duplicate_name'] = result['test_duplicate_name'] - - def test_enum_with_value_name(self): - class Huh(Enum): - name = 1 - value = 2 - self.assertEqual( - list(Huh), - [Huh.name, Huh.value], - ) - self.assertTrue(type(Huh.name) is Huh) - self.assertEqual(Huh.name.name, 'name') - self.assertEqual(Huh.name.value, 1) - - def test_intenum_from_scratch(self): - class phy(int, Enum): - pi = 3 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_intenum_inherited(self): - class IntEnum(int, Enum): - pass - class phy(IntEnum): - pi = 3 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_floatenum_from_scratch(self): - class phy(float, Enum): - pi = 3.1415926 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_floatenum_inherited(self): - class FloatEnum(float, Enum): - pass - class phy(FloatEnum): - pi = 3.1415926 - tau = 2 * pi - self.assertTrue(phy.pi < phy.tau) - - def test_strenum_from_scratch(self): - class phy(str, Enum): - pi = 'Pi' - tau = 'Tau' - self.assertTrue(phy.pi < phy.tau) - - def test_strenum_inherited(self): - class StrEnum(str, Enum): - pass - class phy(StrEnum): - pi = 'Pi' - tau = 'Tau' - self.assertTrue(phy.pi < phy.tau) - - def test_intenum(self): - class WeekDay(IntEnum): - SUNDAY = 1 - MONDAY = 2 - TUESDAY = 3 - WEDNESDAY = 4 - THURSDAY = 5 - FRIDAY = 6 - SATURDAY = 7 - - self.assertEqual(['a', 'b', 'c'][WeekDay.MONDAY], 'c') - self.assertEqual([i for i in range(WeekDay.TUESDAY)], [0, 1, 2]) - - lst = list(WeekDay) - self.assertEqual(len(lst), len(WeekDay)) - self.assertEqual(len(WeekDay), 7) - target = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' - target = target.split() - for i, weekday in enumerate(target): - i += 1 - e = WeekDay(i) - self.assertEqual(e, i) - self.assertEqual(int(e), i) - self.assertEqual(e.name, weekday) - self.assertTrue(e in WeekDay) - self.assertEqual(lst.index(e)+1, i) - self.assertTrue(0 < e < 8) - self.assertTrue(type(e) is WeekDay) - self.assertTrue(isinstance(e, int)) - self.assertTrue(isinstance(e, Enum)) - - def test_intenum_duplicates(self): - class WeekDay(IntEnum): - __order__ = 'SUNDAY MONDAY TUESDAY WEDNESDAY THURSDAY FRIDAY SATURDAY' - SUNDAY = 1 - MONDAY = 2 - TUESDAY = TEUSDAY = 3 - WEDNESDAY = 4 - THURSDAY = 5 - FRIDAY = 6 - SATURDAY = 7 - self.assertTrue(WeekDay.TEUSDAY is WeekDay.TUESDAY) - self.assertEqual(WeekDay(3).name, 'TUESDAY') - self.assertEqual([k for k,v in WeekDay.__members__.items() - if v.name != k], ['TEUSDAY', ]) - - def test_pickle_enum(self): - if isinstance(Stooges, Exception): - raise Stooges - test_pickle_dump_load(self.assertTrue, Stooges.CURLY) - test_pickle_dump_load(self.assertTrue, Stooges) - - def test_pickle_int(self): - if isinstance(IntStooges, Exception): - raise IntStooges - test_pickle_dump_load(self.assertTrue, IntStooges.CURLY) - test_pickle_dump_load(self.assertTrue, IntStooges) - - def test_pickle_float(self): - if isinstance(FloatStooges, Exception): - raise FloatStooges - test_pickle_dump_load(self.assertTrue, FloatStooges.CURLY) - test_pickle_dump_load(self.assertTrue, FloatStooges) - - def test_pickle_enum_function(self): - if isinstance(Answer, Exception): - raise Answer - test_pickle_dump_load(self.assertTrue, Answer.him) - test_pickle_dump_load(self.assertTrue, Answer) - - def test_pickle_enum_function_with_module(self): - if isinstance(Question, Exception): - raise Question - test_pickle_dump_load(self.assertTrue, Question.who) - test_pickle_dump_load(self.assertTrue, Question) - - if pyver == 3.4: - def test_class_nested_enum_and_pickle_protocol_four(self): - # would normally just have this directly in the class namespace - class NestedEnum(Enum): - twigs = 'common' - shiny = 'rare' - - self.__class__.NestedEnum = NestedEnum - self.NestedEnum.__qualname__ = '%s.NestedEnum' % self.__class__.__name__ - test_pickle_exception( - self.assertRaises, PicklingError, self.NestedEnum.twigs, - protocol=(0, 3)) - test_pickle_dump_load(self.assertTrue, self.NestedEnum.twigs, - protocol=(4, HIGHEST_PROTOCOL)) - - elif pyver == 3.5: - def test_class_nested_enum_and_pickle_protocol_four(self): - # would normally just have this directly in the class namespace - class NestedEnum(Enum): - twigs = 'common' - shiny = 'rare' - - self.__class__.NestedEnum = NestedEnum - self.NestedEnum.__qualname__ = '%s.NestedEnum' % self.__class__.__name__ - test_pickle_dump_load(self.assertTrue, self.NestedEnum.twigs, - protocol=(0, HIGHEST_PROTOCOL)) - - def test_exploding_pickle(self): - BadPickle = Enum('BadPickle', 'dill sweet bread-n-butter') - enum._make_class_unpicklable(BadPickle) - globals()['BadPickle'] = BadPickle - test_pickle_exception(self.assertRaises, TypeError, BadPickle.dill) - test_pickle_exception(self.assertRaises, PicklingError, BadPickle) - - def test_string_enum(self): - class SkillLevel(str, Enum): - master = 'what is the sound of one hand clapping?' - journeyman = 'why did the chicken cross the road?' - apprentice = 'knock, knock!' - self.assertEqual(SkillLevel.apprentice, 'knock, knock!') - - def test_getattr_getitem(self): - class Period(Enum): - morning = 1 - noon = 2 - evening = 3 - night = 4 - self.assertTrue(Period(2) is Period.noon) - self.assertTrue(getattr(Period, 'night') is Period.night) - self.assertTrue(Period['morning'] is Period.morning) - - def test_getattr_dunder(self): - Season = self.Season - self.assertTrue(getattr(Season, '__hash__')) - - def test_iteration_order(self): - class Season(Enum): - _order_ = 'SUMMER WINTER AUTUMN SPRING' - SUMMER = 2 - WINTER = 4 - AUTUMN = 3 - SPRING = 1 - self.assertEqual( - list(Season), - [Season.SUMMER, Season.WINTER, Season.AUTUMN, Season.SPRING], - ) - - def test_iteration_order_reversed(self): - self.assertEqual( - list(reversed(self.Season)), - [self.Season.WINTER, self.Season.AUTUMN, self.Season.SUMMER, - self.Season.SPRING] - ) - - def test_iteration_order_with_unorderable_values(self): - class Complex(Enum): - a = complex(7, 9) - b = complex(3.14, 2) - c = complex(1, -1) - d = complex(-77, 32) - self.assertEqual( - list(Complex), - [Complex.a, Complex.b, Complex.c, Complex.d], - ) - - def test_programatic_function_string(self): - SummerMonth = Enum('SummerMonth', 'june july august') - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_string_with_start(self): - SummerMonth = Enum('SummerMonth', 'june july august', start=10) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split(), 10): - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_string_list(self): - SummerMonth = Enum('SummerMonth', ['june', 'july', 'august']) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_string_list_with_start(self): - SummerMonth = Enum('SummerMonth', ['june', 'july', 'august'], start=20) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split(), 20): - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_iterable(self): - SummerMonth = Enum( - 'SummerMonth', - (('june', 1), ('july', 2), ('august', 3)) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_from_dict(self): - SummerMonth = Enum( - 'SummerMonth', - dict((('june', 1), ('july', 2), ('august', 3))) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - if pyver < 3.0: - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_type(self): - SummerMonth = Enum('SummerMonth', 'june july august', type=int) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_type_with_start(self): - SummerMonth = Enum('SummerMonth', 'june july august', type=int, start=30) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split(), 30): - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_type_from_subclass(self): - SummerMonth = IntEnum('SummerMonth', 'june july august') - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_type_from_subclass_with_start(self): - SummerMonth = IntEnum('SummerMonth', 'june july august', start=40) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate('june july august'.split(), 40): - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode(self): - SummerMonth = Enum('SummerMonth', unicode('june july august')) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_list(self): - SummerMonth = Enum('SummerMonth', [unicode('june'), unicode('july'), unicode('august')]) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_iterable(self): - SummerMonth = Enum( - 'SummerMonth', - ((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3)) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_from_unicode_dict(self): - SummerMonth = Enum( - 'SummerMonth', - dict(((unicode('june'), 1), (unicode('july'), 2), (unicode('august'), 3))) - ) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - if pyver < 3.0: - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(int(e.value), i) - self.assertNotEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_type(self): - SummerMonth = Enum('SummerMonth', unicode('june july august'), type=int) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programatic_function_unicode_type_from_subclass(self): - SummerMonth = IntEnum('SummerMonth', unicode('june july august')) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_programmatic_function_unicode_class(self): - if pyver < 3.0: - class_names = unicode('SummerMonth'), 'S\xfcmm\xe9rM\xf6nth'.decode('latin1') - else: - class_names = 'SummerMonth', 'S\xfcmm\xe9rM\xf6nth' - for i, class_name in enumerate(class_names): - if pyver < 3.0 and i == 1: - self.assertRaises(TypeError, Enum, class_name, unicode('june july august')) - else: - SummerMonth = Enum(class_name, unicode('june july august')) - lst = list(SummerMonth) - self.assertEqual(len(lst), len(SummerMonth)) - self.assertEqual(len(SummerMonth), 3, SummerMonth) - self.assertEqual( - [SummerMonth.june, SummerMonth.july, SummerMonth.august], - lst, - ) - for i, month in enumerate(unicode('june july august').split()): - i += 1 - e = SummerMonth(i) - self.assertEqual(e.value, i) - self.assertEqual(e.name, month) - self.assertTrue(e in SummerMonth) - self.assertTrue(type(e) is SummerMonth) - - def test_subclassing(self): - if isinstance(Name, Exception): - raise Name - self.assertEqual(Name.BDFL, 'Guido van Rossum') - self.assertTrue(Name.BDFL, Name('Guido van Rossum')) - self.assertTrue(Name.BDFL is getattr(Name, 'BDFL')) - test_pickle_dump_load(self.assertTrue, Name.BDFL) - - def test_extending(self): - def bad_extension(): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - class MoreColor(Color): - cyan = 4 - magenta = 5 - yellow = 6 - self.assertRaises(TypeError, bad_extension) - - def test_exclude_methods(self): - class whatever(Enum): - this = 'that' - these = 'those' - def really(self): - return 'no, not %s' % self.value - self.assertFalse(type(whatever.really) is whatever) - self.assertEqual(whatever.this.really(), 'no, not that') - - def test_wrong_inheritance_order(self): - def wrong_inherit(): - class Wrong(Enum, str): - NotHere = 'error before this point' - self.assertRaises(TypeError, wrong_inherit) - - def test_intenum_transitivity(self): - class number(IntEnum): - one = 1 - two = 2 - three = 3 - class numero(IntEnum): - uno = 1 - dos = 2 - tres = 3 - self.assertEqual(number.one, numero.uno) - self.assertEqual(number.two, numero.dos) - self.assertEqual(number.three, numero.tres) - - def test_introspection(self): - class Number(IntEnum): - one = 100 - two = 200 - self.assertTrue(Number.one._member_type_ is int) - self.assertTrue(Number._member_type_ is int) - class String(str, Enum): - yarn = 'soft' - rope = 'rough' - wire = 'hard' - self.assertTrue(String.yarn._member_type_ is str) - self.assertTrue(String._member_type_ is str) - class Plain(Enum): - vanilla = 'white' - one = 1 - self.assertTrue(Plain.vanilla._member_type_ is object) - self.assertTrue(Plain._member_type_ is object) - - def test_wrong_enum_in_call(self): - class Monochrome(Enum): - black = 0 - white = 1 - class Gender(Enum): - male = 0 - female = 1 - self.assertRaises(ValueError, Monochrome, Gender.male) - - def test_wrong_enum_in_mixed_call(self): - class Monochrome(IntEnum): - black = 0 - white = 1 - class Gender(Enum): - male = 0 - female = 1 - self.assertRaises(ValueError, Monochrome, Gender.male) - - def test_mixed_enum_in_call_1(self): - class Monochrome(IntEnum): - black = 0 - white = 1 - class Gender(IntEnum): - male = 0 - female = 1 - self.assertTrue(Monochrome(Gender.female) is Monochrome.white) - - def test_mixed_enum_in_call_2(self): - class Monochrome(Enum): - black = 0 - white = 1 - class Gender(IntEnum): - male = 0 - female = 1 - self.assertTrue(Monochrome(Gender.male) is Monochrome.black) - - def test_flufl_enum(self): - class Fluflnum(Enum): - def __int__(self): - return int(self.value) - class MailManOptions(Fluflnum): - option1 = 1 - option2 = 2 - option3 = 3 - self.assertEqual(int(MailManOptions.option1), 1) - - def test_no_such_enum_member(self): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - self.assertRaises(ValueError, Color, 4) - self.assertRaises(KeyError, Color.__getitem__, 'chartreuse') - - def test_new_repr(self): - class Color(Enum): - red = 1 - green = 2 - blue = 3 - def __repr__(self): - return "don't you just love shades of %s?" % self.name - self.assertEqual( - repr(Color.blue), - "don't you just love shades of blue?", - ) - - def test_inherited_repr(self): - class MyEnum(Enum): - def __repr__(self): - return "My name is %s." % self.name - class MyIntEnum(int, MyEnum): - this = 1 - that = 2 - theother = 3 - self.assertEqual(repr(MyIntEnum.that), "My name is that.") - - def test_multiple_mixin_mro(self): - class auto_enum(EnumMeta): - def __new__(metacls, cls, bases, classdict): - original_dict = classdict - classdict = enum._EnumDict() - for k, v in original_dict.items(): - classdict[k] = v - temp = type(classdict)() - names = set(classdict._member_names) - i = 0 - for k in classdict._member_names: - v = classdict[k] - if v == (): - v = i - else: - i = v - i += 1 - temp[k] = v - for k, v in classdict.items(): - if k not in names: - temp[k] = v - return super(auto_enum, metacls).__new__( - metacls, cls, bases, temp) - - AutoNumberedEnum = auto_enum('AutoNumberedEnum', (Enum,), {}) - - AutoIntEnum = auto_enum('AutoIntEnum', (IntEnum,), {}) - - class TestAutoNumber(AutoNumberedEnum): - a = () - b = 3 - c = () - - class TestAutoInt(AutoIntEnum): - a = () - b = 3 - c = () - - def test_subclasses_with_getnewargs(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 1: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __getnewargs__(self): - return self._args - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertTrue, NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - - if pyver >= 3.4: - def test_subclasses_with_getnewargs_ex(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 2: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __getnewargs_ex__(self): - return self._args, {} - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "{}({!r}, {})".format(type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '({0} + {1})'.format(self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - - self.assertIs(NEI.__new__, Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertEqual, NI5, 5, protocol=(4, HIGHEST_PROTOCOL)) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y, protocol=(4, HIGHEST_PROTOCOL)) - - def test_subclasses_with_reduce(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 1: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __reduce__(self): - return self.__class__, self._args - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertEqual, NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - - def test_subclasses_with_reduce_ex(self): - class NamedInt(int): - __qualname__ = 'NamedInt' # needed for pickle protocol 4 - def __new__(cls, *args): - _args = args - if len(args) < 1: - raise TypeError("name and value must be specified") - name, args = args[0], args[1:] - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - def __reduce_ex__(self, proto): - return self.__class__, self._args - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' # needed for pickle protocol 4 - x = ('the-x', 1) - y = ('the-y', 2) - - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - test_pickle_dump_load(self.assertEqual, NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - - def test_subclasses_without_direct_pickle_support(self): - class NamedInt(int): - __qualname__ = 'NamedInt' - def __new__(cls, *args): - _args = args - name, args = args[0], args[1:] - if len(args) == 0: - raise TypeError("name and value must be specified") - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' - x = ('the-x', 1) - y = ('the-y', 2) - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_exception(self.assertRaises, TypeError, NEI.x) - test_pickle_exception(self.assertRaises, PicklingError, NEI) - - def test_subclasses_without_direct_pickle_support_using_name(self): - class NamedInt(int): - __qualname__ = 'NamedInt' - def __new__(cls, *args): - _args = args - name, args = args[0], args[1:] - if len(args) == 0: - raise TypeError("name and value must be specified") - self = int.__new__(cls, *args) - self._intname = name - self._args = _args - return self - @property - def __name__(self): - return self._intname - def __repr__(self): - # repr() is updated to include the name and type info - return "%s(%r, %s)" % (type(self).__name__, - self.__name__, - int.__repr__(self)) - def __str__(self): - # str() is unchanged, even if it relies on the repr() fallback - base = int - base_str = base.__str__ - if base_str.__objclass__ is object: - return base.__repr__(self) - return base_str(self) - # for simplicity, we only define one operator that - # propagates expressions - def __add__(self, other): - temp = int(self) + int( other) - if isinstance(self, NamedInt) and isinstance(other, NamedInt): - return NamedInt( - '(%s + %s)' % (self.__name__, other.__name__), - temp ) - else: - return temp - - class NEI(NamedInt, Enum): - __qualname__ = 'NEI' - x = ('the-x', 1) - y = ('the-y', 2) - def __reduce_ex__(self, proto): - return getattr, (self.__class__, self._name_) - - self.assertTrue(NEI.__new__ is Enum.__new__) - self.assertEqual(repr(NEI.x + NEI.y), "NamedInt('(the-x + the-y)', 3)") - globals()['NamedInt'] = NamedInt - globals()['NEI'] = NEI - NI5 = NamedInt('test', 5) - self.assertEqual(NI5, 5) - self.assertEqual(NEI.y.value, 2) - test_pickle_dump_load(self.assertTrue, NEI.y) - test_pickle_dump_load(self.assertTrue, NEI) - - def test_tuple_subclass(self): - class SomeTuple(tuple, Enum): - __qualname__ = 'SomeTuple' - first = (1, 'for the money') - second = (2, 'for the show') - third = (3, 'for the music') - self.assertTrue(type(SomeTuple.first) is SomeTuple) - self.assertTrue(isinstance(SomeTuple.second, tuple)) - self.assertEqual(SomeTuple.third, (3, 'for the music')) - globals()['SomeTuple'] = SomeTuple - test_pickle_dump_load(self.assertTrue, SomeTuple.first) - - def test_duplicate_values_give_unique_enum_items(self): - class AutoNumber(Enum): - __order__ = 'enum_m enum_d enum_y' - enum_m = () - enum_d = () - enum_y = () - def __new__(cls): - value = len(cls.__members__) + 1 - obj = object.__new__(cls) - obj._value_ = value - return obj - def __int__(self): - return int(self._value_) - self.assertEqual(int(AutoNumber.enum_d), 2) - self.assertEqual(AutoNumber.enum_y.value, 3) - self.assertTrue(AutoNumber(1) is AutoNumber.enum_m) - self.assertEqual( - list(AutoNumber), - [AutoNumber.enum_m, AutoNumber.enum_d, AutoNumber.enum_y], - ) - - def test_inherited_new_from_enhanced_enum(self): - class AutoNumber2(Enum): - def __new__(cls): - value = len(cls.__members__) + 1 - obj = object.__new__(cls) - obj._value_ = value - return obj - def __int__(self): - return int(self._value_) - class Color(AutoNumber2): - _order_ = 'red green blue' - red = () - green = () - blue = () - self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) - self.assertEqual(list(Color), [Color.red, Color.green, Color.blue]) - if pyver >= 3.0: - self.assertEqual(list(map(int, Color)), [1, 2, 3]) - - def test_inherited_new_from_mixed_enum(self): - class AutoNumber3(IntEnum): - def __new__(cls): - value = len(cls.__members__) + 1 - obj = int.__new__(cls, value) - obj._value_ = value - return obj - class Color(AutoNumber3): - red = () - green = () - blue = () - self.assertEqual(len(Color), 3, "wrong number of elements: %d (should be %d)" % (len(Color), 3)) - Color.red - Color.green - Color.blue - - def test_equality(self): - class AlwaysEqual: - def __eq__(self, other): - return True - class OrdinaryEnum(Enum): - a = 1 - self.assertEqual(AlwaysEqual(), OrdinaryEnum.a) - self.assertEqual(OrdinaryEnum.a, AlwaysEqual()) - - def test_ordered_mixin(self): - class OrderedEnum(Enum): - def __ge__(self, other): - if self.__class__ is other.__class__: - return self._value_ >= other._value_ - return NotImplemented - def __gt__(self, other): - if self.__class__ is other.__class__: - return self._value_ > other._value_ - return NotImplemented - def __le__(self, other): - if self.__class__ is other.__class__: - return self._value_ <= other._value_ - return NotImplemented - def __lt__(self, other): - if self.__class__ is other.__class__: - return self._value_ < other._value_ - return NotImplemented - class Grade(OrderedEnum): - __order__ = 'A B C D F' - A = 5 - B = 4 - C = 3 - D = 2 - F = 1 - self.assertEqual(list(Grade), [Grade.A, Grade.B, Grade.C, Grade.D, Grade.F]) - self.assertTrue(Grade.A > Grade.B) - self.assertTrue(Grade.F <= Grade.C) - self.assertTrue(Grade.D < Grade.A) - self.assertTrue(Grade.B >= Grade.B) - - def test_extending2(self): - def bad_extension(): - class Shade(Enum): - def shade(self): - print(self.name) - class Color(Shade): - red = 1 - green = 2 - blue = 3 - class MoreColor(Color): - cyan = 4 - magenta = 5 - yellow = 6 - self.assertRaises(TypeError, bad_extension) - - def test_extending3(self): - class Shade(Enum): - def shade(self): - return self.name - class Color(Shade): - def hex(self): - return '%s hexlified!' % self.value - class MoreColor(Color): - cyan = 4 - magenta = 5 - yellow = 6 - self.assertEqual(MoreColor.magenta.hex(), '5 hexlified!') - - def test_no_duplicates(self): - def bad_duplicates(): - class UniqueEnum(Enum): - def __init__(self, *args): - cls = self.__class__ - if any(self.value == e.value for e in cls): - a = self.name - e = cls(self.value).name - raise ValueError( - "aliases not allowed in UniqueEnum: %r --> %r" - % (a, e) - ) - class Color(UniqueEnum): - red = 1 - green = 2 - blue = 3 - class Color(UniqueEnum): - red = 1 - green = 2 - blue = 3 - grene = 2 - self.assertRaises(ValueError, bad_duplicates) - - def test_init(self): - class Planet(Enum): - MERCURY = (3.303e+23, 2.4397e6) - VENUS = (4.869e+24, 6.0518e6) - EARTH = (5.976e+24, 6.37814e6) - MARS = (6.421e+23, 3.3972e6) - JUPITER = (1.9e+27, 7.1492e7) - SATURN = (5.688e+26, 6.0268e7) - URANUS = (8.686e+25, 2.5559e7) - NEPTUNE = (1.024e+26, 2.4746e7) - def __init__(self, mass, radius): - self.mass = mass # in kilograms - self.radius = radius # in meters - @property - def surface_gravity(self): - # universal gravitational constant (m3 kg-1 s-2) - G = 6.67300E-11 - return G * self.mass / (self.radius * self.radius) - self.assertEqual(round(Planet.EARTH.surface_gravity, 2), 9.80) - self.assertEqual(Planet.EARTH.value, (5.976e+24, 6.37814e6)) - - def test_nonhash_value(self): - class AutoNumberInAList(Enum): - def __new__(cls): - value = [len(cls.__members__) + 1] - obj = object.__new__(cls) - obj._value_ = value - return obj - class ColorInAList(AutoNumberInAList): - _order_ = 'red green blue' - red = () - green = () - blue = () - self.assertEqual(list(ColorInAList), [ColorInAList.red, ColorInAList.green, ColorInAList.blue]) - self.assertEqual(ColorInAList.red.value, [1]) - self.assertEqual(ColorInAList([1]), ColorInAList.red) - - def test_conflicting_types_resolved_in_new(self): - class LabelledIntEnum(int, Enum): - def __new__(cls, *args): - value, label = args - obj = int.__new__(cls, value) - obj.label = label - obj._value_ = value - return obj - - class LabelledList(LabelledIntEnum): - unprocessed = (1, "Unprocessed") - payment_complete = (2, "Payment Complete") - - self.assertEqual(list(LabelledList), [LabelledList.unprocessed, LabelledList.payment_complete]) - self.assertEqual(LabelledList.unprocessed, 1) - self.assertEqual(LabelledList(1), LabelledList.unprocessed) - - def test_empty_with_functional_api(self): - empty = enum.IntEnum('Foo', {}) - self.assertEqual(len(empty), 0) - - -class TestUnique(unittest.TestCase): - """2.4 doesn't allow class decorators, use function syntax.""" - - def test_unique_clean(self): - class Clean(Enum): - one = 1 - two = 'dos' - tres = 4.0 - unique(Clean) - class Cleaner(IntEnum): - single = 1 - double = 2 - triple = 3 - unique(Cleaner) - - def test_unique_dirty(self): - try: - class Dirty(Enum): - __order__ = 'one two tres' - one = 1 - two = 'dos' - tres = 1 - unique(Dirty) - except ValueError: - exc = sys.exc_info()[1] - message = exc.args[0] - self.assertTrue('tres -> one' in message) - - try: - class Dirtier(IntEnum): - _order_ = 'single double triple turkey' - single = 1 - double = 1 - triple = 3 - turkey = 3 - unique(Dirtier) - except ValueError: - exc = sys.exc_info()[1] - message = exc.args[0] - self.assertTrue('double -> single' in message) - self.assertTrue('turkey -> triple' in message) - - -class TestMe(unittest.TestCase): - - pass - -if __name__ == '__main__': - unittest.main() diff --git a/libs/pretty/__init__.py b/libs/pretty/__init__.py index 64b586338..424d1537c 100644 --- a/libs/pretty/__init__.py +++ b/libs/pretty/__init__.py @@ -8,10 +8,16 @@ __copyright__ = "Copyright 2010, S Anand" __license__ = "WTFPL" from datetime import datetime +from six import PY3 def _df(seconds, denominator=1, text='', past=True): - if past: return str(round(seconds / denominator)) + text + ' ago' - else: return 'in ' + str(round(seconds / denominator)) + text + if PY3: + result = str(round(seconds / denominator)) + else: + result = str((seconds + denominator/2)/ denominator) + + if past: return result + text + ' ago' + else: return 'in ' + result + text def date(time=False, asdays=False, short=False): '''Returns a pretty formatted date. diff --git a/libs/pyprobe/pyprobe.py b/libs/pyprobe/pyprobe.py index d47791ac8..5afccfbe4 100644 --- a/libs/pyprobe/pyprobe.py +++ b/libs/pyprobe/pyprobe.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +from six import PY3 import json import subprocess from os import path @@ -176,14 +177,25 @@ class VideoFileParser: """ command = [parser] + commandArgs + [inputFile] - completedProcess = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" - ) - if completedProcess.returncode != 0: - raise IOError( - "Error occurred during execution - " + completedProcess.stderr + if PY3: + completedProcess = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8" ) - return completedProcess.stdout + if completedProcess.returncode != 0: + raise IOError( + "Error occurred during execution - " + completedProcess.stderr + ) + return completedProcess.stdout + else: + try: + completedProcess = subprocess.check_output( + command, stderr=subprocess.STDOUT + ) + except subprocess.CalledProcessError as e: + raise IOError( + "Error occurred during execution - " + e.output + ) + return completedProcess @staticmethod def _checkExecutable(executable): diff --git a/libs/subscene_api/subscene.py b/libs/subscene_api/subscene.py index 099370a48..633b72f36 100644 --- a/libs/subscene_api/subscene.py +++ b/libs/subscene_api/subscene.py @@ -26,7 +26,13 @@ this script that does the job by parsing the website"s pages. # imports import re -import enum +from six import PY3 + +if PY3: + import enum +else: + import enum2 as enum + import sys import requests import time diff --git a/libs/subzero/modification/mods/common.py b/libs/subzero/modification/mods/common.py index c0a4a3eb9..f4b22180c 100644 --- a/libs/subzero/modification/mods/common.py +++ b/libs/subzero/modification/mods/common.py @@ -1,6 +1,7 @@ # coding=utf-8 from __future__ import absolute_import +from __future__ import unicode_literals import re from subzero.language import Language diff --git a/libs/subzero/modification/mods/hearing_impaired.py b/libs/subzero/modification/mods/hearing_impaired.py index dd42b5514..b4de7c737 100644 --- a/libs/subzero/modification/mods/hearing_impaired.py +++ b/libs/subzero/modification/mods/hearing_impaired.py @@ -1,5 +1,6 @@ # coding=utf-8 from __future__ import absolute_import +from __future__ import unicode_literals import re from subzero.modification.mods import SubtitleTextModification, empty_line_post_processors, EmptyEntryError, TAG diff --git a/libs/yaml/__init__.py b/libs/yaml/__init__.py deleted file mode 100644 index 3f499d366..000000000 --- a/libs/yaml/__init__.py +++ /dev/null @@ -1,402 +0,0 @@ - -from .error import * - -from .tokens import * -from .events import * -from .nodes import * - -from .loader import * -from .dumper import * - -__version__ = '5.1.2' -try: - from .cyaml import * - __with_libyaml__ = True -except ImportError: - __with_libyaml__ = False - -import io - -#------------------------------------------------------------------------------ -# Warnings control -#------------------------------------------------------------------------------ - -# 'Global' warnings state: -_warnings_enabled = { - 'YAMLLoadWarning': True, -} - -# Get or set global warnings' state -def warnings(settings=None): - if settings is None: - return _warnings_enabled - - if type(settings) is dict: - for key in settings: - if key in _warnings_enabled: - _warnings_enabled[key] = settings[key] - -# Warn when load() is called without Loader=... -class YAMLLoadWarning(RuntimeWarning): - pass - -def load_warning(method): - if _warnings_enabled['YAMLLoadWarning'] is False: - return - - import warnings - - message = ( - "calling yaml.%s() without Loader=... is deprecated, as the " - "default Loader is unsafe. Please read " - "https://msg.pyyaml.org/load for full details." - ) % method - - warnings.warn(message, YAMLLoadWarning, stacklevel=3) - -#------------------------------------------------------------------------------ -def scan(stream, Loader=Loader): - """ - Scan a YAML stream and produce scanning tokens. - """ - loader = Loader(stream) - try: - while loader.check_token(): - yield loader.get_token() - finally: - loader.dispose() - -def parse(stream, Loader=Loader): - """ - Parse a YAML stream and produce parsing events. - """ - loader = Loader(stream) - try: - while loader.check_event(): - yield loader.get_event() - finally: - loader.dispose() - -def compose(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding representation tree. - """ - loader = Loader(stream) - try: - return loader.get_single_node() - finally: - loader.dispose() - -def compose_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding representation trees. - """ - loader = Loader(stream) - try: - while loader.check_node(): - yield loader.get_node() - finally: - loader.dispose() - -def load(stream, Loader=None): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - """ - if Loader is None: - load_warning('load') - Loader = FullLoader - - loader = Loader(stream) - try: - return loader.get_single_data() - finally: - loader.dispose() - -def load_all(stream, Loader=None): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - """ - if Loader is None: - load_warning('load_all') - Loader = FullLoader - - loader = Loader(stream) - try: - while loader.check_data(): - yield loader.get_data() - finally: - loader.dispose() - -def full_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve all tags except those known to be - unsafe on untrusted input. - """ - return load(stream, FullLoader) - -def full_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve all tags except those known to be - unsafe on untrusted input. - """ - return load_all(stream, FullLoader) - -def safe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve only basic YAML tags. This is known - to be safe for untrusted input. - """ - return load(stream, SafeLoader) - -def safe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve only basic YAML tags. This is known - to be safe for untrusted input. - """ - return load_all(stream, SafeLoader) - -def unsafe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve all tags, even those known to be - unsafe on untrusted input. - """ - return load(stream, UnsafeLoader) - -def unsafe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve all tags, even those known to be - unsafe on untrusted input. - """ - return load_all(stream, UnsafeLoader) - -def emit(events, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - """ - Emit YAML parsing events into a stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - stream = io.StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - try: - for event in events: - dumper.emit(event) - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def serialize_all(nodes, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of representation trees into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - stream = io.StringIO() - else: - stream = io.BytesIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - try: - dumper.open() - for node in nodes: - dumper.serialize(node) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def serialize(node, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a representation tree into a YAML stream. - If stream is None, return the produced string instead. - """ - return serialize_all([node], stream, Dumper=Dumper, **kwds) - -def dump_all(documents, stream=None, Dumper=Dumper, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - """ - Serialize a sequence of Python objects into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - stream = io.StringIO() - else: - stream = io.BytesIO() - getvalue = stream.getvalue - dumper = Dumper(stream, default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys) - try: - dumper.open() - for data in documents: - dumper.represent(data) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def dump(data, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a Python object into a YAML stream. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=Dumper, **kwds) - -def safe_dump_all(documents, stream=None, **kwds): - """ - Serialize a sequence of Python objects into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all(documents, stream, Dumper=SafeDumper, **kwds) - -def safe_dump(data, stream=None, **kwds): - """ - Serialize a Python object into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=SafeDumper, **kwds) - -def add_implicit_resolver(tag, regexp, first=None, - Loader=Loader, Dumper=Dumper): - """ - Add an implicit scalar detector. - If an implicit scalar value matches the given regexp, - the corresponding tag is assigned to the scalar. - first is a sequence of possible initial characters or None. - """ - Loader.add_implicit_resolver(tag, regexp, first) - Dumper.add_implicit_resolver(tag, regexp, first) - -def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): - """ - Add a path based resolver for the given tag. - A path is a list of keys that forms a path - to a node in the representation tree. - Keys can be string values, integers, or None. - """ - Loader.add_path_resolver(tag, path, kind) - Dumper.add_path_resolver(tag, path, kind) - -def add_constructor(tag, constructor, Loader=Loader): - """ - Add a constructor for the given tag. - Constructor is a function that accepts a Loader instance - and a node object and produces the corresponding Python object. - """ - Loader.add_constructor(tag, constructor) - -def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): - """ - Add a multi-constructor for the given tag prefix. - Multi-constructor is called for a node if its tag starts with tag_prefix. - Multi-constructor accepts a Loader instance, a tag suffix, - and a node object and produces the corresponding Python object. - """ - Loader.add_multi_constructor(tag_prefix, multi_constructor) - -def add_representer(data_type, representer, Dumper=Dumper): - """ - Add a representer for the given type. - Representer is a function accepting a Dumper instance - and an instance of the given data type - and producing the corresponding representation node. - """ - Dumper.add_representer(data_type, representer) - -def add_multi_representer(data_type, multi_representer, Dumper=Dumper): - """ - Add a representer for the given type. - Multi-representer is a function accepting a Dumper instance - and an instance of the given data type or subtype - and producing the corresponding representation node. - """ - Dumper.add_multi_representer(data_type, multi_representer) - -class YAMLObjectMetaclass(type): - """ - The metaclass for YAMLObject. - """ - def __init__(cls, name, bases, kwds): - super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) - if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: - cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) - cls.yaml_dumper.add_representer(cls, cls.to_yaml) - -class YAMLObject(metaclass=YAMLObjectMetaclass): - """ - An object that can dump itself to a YAML stream - and load itself from a YAML stream. - """ - - __slots__ = () # no direct instantiation, so allow immutable subclasses - - yaml_loader = Loader - yaml_dumper = Dumper - - yaml_tag = None - yaml_flow_style = None - - @classmethod - def from_yaml(cls, loader, node): - """ - Convert a representation node to a Python object. - """ - return loader.construct_yaml_object(node, cls) - - @classmethod - def to_yaml(cls, dumper, data): - """ - Convert a Python object to a representation node. - """ - return dumper.represent_yaml_object(cls.yaml_tag, data, cls, - flow_style=cls.yaml_flow_style) - diff --git a/libs/yaml/composer.py b/libs/yaml/composer.py deleted file mode 100644 index 6d15cb40e..000000000 --- a/libs/yaml/composer.py +++ /dev/null @@ -1,139 +0,0 @@ - -__all__ = ['Composer', 'ComposerError'] - -from .error import MarkedYAMLError -from .events import * -from .nodes import * - -class ComposerError(MarkedYAMLError): - pass - -class Composer: - - def __init__(self): - self.anchors = {} - - def check_node(self): - # Drop the STREAM-START event. - if self.check_event(StreamStartEvent): - self.get_event() - - # If there are more documents available? - return not self.check_event(StreamEndEvent) - - def get_node(self): - # Get the root node of the next document. - if not self.check_event(StreamEndEvent): - return self.compose_document() - - def get_single_node(self): - # Drop the STREAM-START event. - self.get_event() - - # Compose a document if the stream is not empty. - document = None - if not self.check_event(StreamEndEvent): - document = self.compose_document() - - # Ensure that the stream contains no more documents. - if not self.check_event(StreamEndEvent): - event = self.get_event() - raise ComposerError("expected a single document in the stream", - document.start_mark, "but found another document", - event.start_mark) - - # Drop the STREAM-END event. - self.get_event() - - return document - - def compose_document(self): - # Drop the DOCUMENT-START event. - self.get_event() - - # Compose the root node. - node = self.compose_node(None, None) - - # Drop the DOCUMENT-END event. - self.get_event() - - self.anchors = {} - return node - - def compose_node(self, parent, index): - if self.check_event(AliasEvent): - event = self.get_event() - anchor = event.anchor - if anchor not in self.anchors: - raise ComposerError(None, None, "found undefined alias %r" - % anchor, event.start_mark) - return self.anchors[anchor] - event = self.peek_event() - anchor = event.anchor - if anchor is not None: - if anchor in self.anchors: - raise ComposerError("found duplicate anchor %r; first occurrence" - % anchor, self.anchors[anchor].start_mark, - "second occurrence", event.start_mark) - self.descend_resolver(parent, index) - if self.check_event(ScalarEvent): - node = self.compose_scalar_node(anchor) - elif self.check_event(SequenceStartEvent): - node = self.compose_sequence_node(anchor) - elif self.check_event(MappingStartEvent): - node = self.compose_mapping_node(anchor) - self.ascend_resolver() - return node - - def compose_scalar_node(self, anchor): - event = self.get_event() - tag = event.tag - if tag is None or tag == '!': - tag = self.resolve(ScalarNode, event.value, event.implicit) - node = ScalarNode(tag, event.value, - event.start_mark, event.end_mark, style=event.style) - if anchor is not None: - self.anchors[anchor] = node - return node - - def compose_sequence_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == '!': - tag = self.resolve(SequenceNode, None, start_event.implicit) - node = SequenceNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - index = 0 - while not self.check_event(SequenceEndEvent): - node.value.append(self.compose_node(node, index)) - index += 1 - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - - def compose_mapping_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == '!': - tag = self.resolve(MappingNode, None, start_event.implicit) - node = MappingNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - while not self.check_event(MappingEndEvent): - #key_event = self.peek_event() - item_key = self.compose_node(node, None) - #if item_key in node.value: - # raise ComposerError("while composing a mapping", start_event.start_mark, - # "found duplicate key", key_event.start_mark) - item_value = self.compose_node(node, item_key) - #node.value[item_key] = item_value - node.value.append((item_key, item_value)) - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - diff --git a/libs/yaml/constructor.py b/libs/yaml/constructor.py deleted file mode 100644 index 34fc1ae92..000000000 --- a/libs/yaml/constructor.py +++ /dev/null @@ -1,720 +0,0 @@ - -__all__ = [ - 'BaseConstructor', - 'SafeConstructor', - 'FullConstructor', - 'UnsafeConstructor', - 'Constructor', - 'ConstructorError' -] - -from .error import * -from .nodes import * - -import collections.abc, datetime, base64, binascii, re, sys, types - -class ConstructorError(MarkedYAMLError): - pass - -class BaseConstructor: - - yaml_constructors = {} - yaml_multi_constructors = {} - - def __init__(self): - self.constructed_objects = {} - self.recursive_objects = {} - self.state_generators = [] - self.deep_construct = False - - def check_data(self): - # If there are more documents available? - return self.check_node() - - def get_data(self): - # Construct and return the next document. - if self.check_node(): - return self.construct_document(self.get_node()) - - def get_single_data(self): - # Ensure that the stream contains a single document and construct it. - node = self.get_single_node() - if node is not None: - return self.construct_document(node) - return None - - def construct_document(self, node): - data = self.construct_object(node) - while self.state_generators: - state_generators = self.state_generators - self.state_generators = [] - for generator in state_generators: - for dummy in generator: - pass - self.constructed_objects = {} - self.recursive_objects = {} - self.deep_construct = False - return data - - def construct_object(self, node, deep=False): - if node in self.constructed_objects: - return self.constructed_objects[node] - if deep: - old_deep = self.deep_construct - self.deep_construct = True - if node in self.recursive_objects: - raise ConstructorError(None, None, - "found unconstructable recursive node", node.start_mark) - self.recursive_objects[node] = None - constructor = None - tag_suffix = None - if node.tag in self.yaml_constructors: - constructor = self.yaml_constructors[node.tag] - else: - for tag_prefix in self.yaml_multi_constructors: - if node.tag.startswith(tag_prefix): - tag_suffix = node.tag[len(tag_prefix):] - constructor = self.yaml_multi_constructors[tag_prefix] - break - else: - if None in self.yaml_multi_constructors: - tag_suffix = node.tag - constructor = self.yaml_multi_constructors[None] - elif None in self.yaml_constructors: - constructor = self.yaml_constructors[None] - elif isinstance(node, ScalarNode): - constructor = self.__class__.construct_scalar - elif isinstance(node, SequenceNode): - constructor = self.__class__.construct_sequence - elif isinstance(node, MappingNode): - constructor = self.__class__.construct_mapping - if tag_suffix is None: - data = constructor(self, node) - else: - data = constructor(self, tag_suffix, node) - if isinstance(data, types.GeneratorType): - generator = data - data = next(generator) - if self.deep_construct: - for dummy in generator: - pass - else: - self.state_generators.append(generator) - self.constructed_objects[node] = data - del self.recursive_objects[node] - if deep: - self.deep_construct = old_deep - return data - - def construct_scalar(self, node): - if not isinstance(node, ScalarNode): - raise ConstructorError(None, None, - "expected a scalar node, but found %s" % node.id, - node.start_mark) - return node.value - - def construct_sequence(self, node, deep=False): - if not isinstance(node, SequenceNode): - raise ConstructorError(None, None, - "expected a sequence node, but found %s" % node.id, - node.start_mark) - return [self.construct_object(child, deep=deep) - for child in node.value] - - def construct_mapping(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - mapping = {} - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - if not isinstance(key, collections.abc.Hashable): - raise ConstructorError("while constructing a mapping", node.start_mark, - "found unhashable key", key_node.start_mark) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - def construct_pairs(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - pairs = [] - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - value = self.construct_object(value_node, deep=deep) - pairs.append((key, value)) - return pairs - - @classmethod - def add_constructor(cls, tag, constructor): - if not 'yaml_constructors' in cls.__dict__: - cls.yaml_constructors = cls.yaml_constructors.copy() - cls.yaml_constructors[tag] = constructor - - @classmethod - def add_multi_constructor(cls, tag_prefix, multi_constructor): - if not 'yaml_multi_constructors' in cls.__dict__: - cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() - cls.yaml_multi_constructors[tag_prefix] = multi_constructor - -class SafeConstructor(BaseConstructor): - - def construct_scalar(self, node): - if isinstance(node, MappingNode): - for key_node, value_node in node.value: - if key_node.tag == 'tag:yaml.org,2002:value': - return self.construct_scalar(value_node) - return super().construct_scalar(node) - - def flatten_mapping(self, node): - merge = [] - index = 0 - while index < len(node.value): - key_node, value_node = node.value[index] - if key_node.tag == 'tag:yaml.org,2002:merge': - del node.value[index] - if isinstance(value_node, MappingNode): - self.flatten_mapping(value_node) - merge.extend(value_node.value) - elif isinstance(value_node, SequenceNode): - submerge = [] - for subnode in value_node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing a mapping", - node.start_mark, - "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_mark) - self.flatten_mapping(subnode) - submerge.append(subnode.value) - submerge.reverse() - for value in submerge: - merge.extend(value) - else: - raise ConstructorError("while constructing a mapping", node.start_mark, - "expected a mapping or list of mappings for merging, but found %s" - % value_node.id, value_node.start_mark) - elif key_node.tag == 'tag:yaml.org,2002:value': - key_node.tag = 'tag:yaml.org,2002:str' - index += 1 - else: - index += 1 - if merge: - node.value = merge + node.value - - def construct_mapping(self, node, deep=False): - if isinstance(node, MappingNode): - self.flatten_mapping(node) - return super().construct_mapping(node, deep=deep) - - def construct_yaml_null(self, node): - self.construct_scalar(node) - return None - - bool_values = { - 'yes': True, - 'no': False, - 'true': True, - 'false': False, - 'on': True, - 'off': False, - } - - def construct_yaml_bool(self, node): - value = self.construct_scalar(node) - return self.bool_values[value.lower()] - - def construct_yaml_int(self, node): - value = self.construct_scalar(node) - value = value.replace('_', '') - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '0': - return 0 - elif value.startswith('0b'): - return sign*int(value[2:], 2) - elif value.startswith('0x'): - return sign*int(value[2:], 16) - elif value[0] == '0': - return sign*int(value, 8) - elif ':' in value: - digits = [int(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*int(value) - - inf_value = 1e300 - while inf_value != inf_value*inf_value: - inf_value *= inf_value - nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). - - def construct_yaml_float(self, node): - value = self.construct_scalar(node) - value = value.replace('_', '').lower() - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '.inf': - return sign*self.inf_value - elif value == '.nan': - return self.nan_value - elif ':' in value: - digits = [float(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0.0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*float(value) - - def construct_yaml_binary(self, node): - try: - value = self.construct_scalar(node).encode('ascii') - except UnicodeEncodeError as exc: - raise ConstructorError(None, None, - "failed to convert base64 data into ascii: %s" % exc, - node.start_mark) - try: - if hasattr(base64, 'decodebytes'): - return base64.decodebytes(value) - else: - return base64.decodestring(value) - except binascii.Error as exc: - raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - timestamp_regexp = re.compile( - r'''^(?P<year>[0-9][0-9][0-9][0-9]) - -(?P<month>[0-9][0-9]?) - -(?P<day>[0-9][0-9]?) - (?:(?:[Tt]|[ \t]+) - (?P<hour>[0-9][0-9]?) - :(?P<minute>[0-9][0-9]) - :(?P<second>[0-9][0-9]) - (?:\.(?P<fraction>[0-9]*))? - (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) - (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X) - - def construct_yaml_timestamp(self, node): - value = self.construct_scalar(node) - match = self.timestamp_regexp.match(node.value) - values = match.groupdict() - year = int(values['year']) - month = int(values['month']) - day = int(values['day']) - if not values['hour']: - return datetime.date(year, month, day) - hour = int(values['hour']) - minute = int(values['minute']) - second = int(values['second']) - fraction = 0 - if values['fraction']: - fraction = values['fraction'][:6] - while len(fraction) < 6: - fraction += '0' - fraction = int(fraction) - delta = None - if values['tz_sign']: - tz_hour = int(values['tz_hour']) - tz_minute = int(values['tz_minute'] or 0) - delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) - if values['tz_sign'] == '-': - delta = -delta - data = datetime.datetime(year, month, day, hour, minute, second, fraction) - if delta: - data -= delta - return data - - def construct_yaml_omap(self, node): - # Note: we do not check for duplicate keys, because it's too - # CPU-expensive. - omap = [] - yield omap - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - omap.append((key, value)) - - def construct_yaml_pairs(self, node): - # Note: the same code as `construct_yaml_omap`. - pairs = [] - yield pairs - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - pairs.append((key, value)) - - def construct_yaml_set(self, node): - data = set() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_str(self, node): - return self.construct_scalar(node) - - def construct_yaml_seq(self, node): - data = [] - yield data - data.extend(self.construct_sequence(node)) - - def construct_yaml_map(self, node): - data = {} - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_object(self, node, cls): - data = cls.__new__(cls) - yield data - if hasattr(data, '__setstate__'): - state = self.construct_mapping(node, deep=True) - data.__setstate__(state) - else: - state = self.construct_mapping(node) - data.__dict__.update(state) - - def construct_undefined(self, node): - raise ConstructorError(None, None, - "could not determine a constructor for the tag %r" % node.tag, - node.start_mark) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:null', - SafeConstructor.construct_yaml_null) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:bool', - SafeConstructor.construct_yaml_bool) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:int', - SafeConstructor.construct_yaml_int) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:float', - SafeConstructor.construct_yaml_float) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:binary', - SafeConstructor.construct_yaml_binary) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:timestamp', - SafeConstructor.construct_yaml_timestamp) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:omap', - SafeConstructor.construct_yaml_omap) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:pairs', - SafeConstructor.construct_yaml_pairs) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:set', - SafeConstructor.construct_yaml_set) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:str', - SafeConstructor.construct_yaml_str) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:seq', - SafeConstructor.construct_yaml_seq) - -SafeConstructor.add_constructor( - 'tag:yaml.org,2002:map', - SafeConstructor.construct_yaml_map) - -SafeConstructor.add_constructor(None, - SafeConstructor.construct_undefined) - -class FullConstructor(SafeConstructor): - - def construct_python_str(self, node): - return self.construct_scalar(node) - - def construct_python_unicode(self, node): - return self.construct_scalar(node) - - def construct_python_bytes(self, node): - try: - value = self.construct_scalar(node).encode('ascii') - except UnicodeEncodeError as exc: - raise ConstructorError(None, None, - "failed to convert base64 data into ascii: %s" % exc, - node.start_mark) - try: - if hasattr(base64, 'decodebytes'): - return base64.decodebytes(value) - else: - return base64.decodestring(value) - except binascii.Error as exc: - raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - def construct_python_long(self, node): - return self.construct_yaml_int(node) - - def construct_python_complex(self, node): - return complex(self.construct_scalar(node)) - - def construct_python_tuple(self, node): - return tuple(self.construct_sequence(node)) - - def find_python_module(self, name, mark, unsafe=False): - if not name: - raise ConstructorError("while constructing a Python module", mark, - "expected non-empty name appended to the tag", mark) - if unsafe: - try: - __import__(name) - except ImportError as exc: - raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name, exc), mark) - if not name in sys.modules: - raise ConstructorError("while constructing a Python module", mark, - "module %r is not imported" % name, mark) - return sys.modules[name] - - def find_python_name(self, name, mark, unsafe=False): - if not name: - raise ConstructorError("while constructing a Python object", mark, - "expected non-empty name appended to the tag", mark) - if '.' in name: - module_name, object_name = name.rsplit('.', 1) - else: - module_name = 'builtins' - object_name = name - if unsafe: - try: - __import__(module_name) - except ImportError as exc: - raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name, exc), mark) - if not module_name in sys.modules: - raise ConstructorError("while constructing a Python object", mark, - "module %r is not imported" % module_name, mark) - module = sys.modules[module_name] - if not hasattr(module, object_name): - raise ConstructorError("while constructing a Python object", mark, - "cannot find %r in the module %r" - % (object_name, module.__name__), mark) - return getattr(module, object_name) - - def construct_python_name(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % value, node.start_mark) - return self.find_python_name(suffix, node.start_mark) - - def construct_python_module(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % value, node.start_mark) - return self.find_python_module(suffix, node.start_mark) - - def make_python_instance(self, suffix, node, - args=None, kwds=None, newobj=False, unsafe=False): - if not args: - args = [] - if not kwds: - kwds = {} - cls = self.find_python_name(suffix, node.start_mark) - if not (unsafe or isinstance(cls, type)): - raise ConstructorError("while constructing a Python instance", node.start_mark, - "expected a class, but found %r" % type(cls), - node.start_mark) - if newobj and isinstance(cls, type): - return cls.__new__(cls, *args, **kwds) - else: - return cls(*args, **kwds) - - def set_python_instance_state(self, instance, state): - if hasattr(instance, '__setstate__'): - instance.__setstate__(state) - else: - slotstate = {} - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - if hasattr(instance, '__dict__'): - instance.__dict__.update(state) - elif state: - slotstate.update(state) - for key, value in slotstate.items(): - setattr(object, key, value) - - def construct_python_object(self, suffix, node): - # Format: - # !!python/object:module.name { ... state ... } - instance = self.make_python_instance(suffix, node, newobj=True) - yield instance - deep = hasattr(instance, '__setstate__') - state = self.construct_mapping(node, deep=deep) - self.set_python_instance_state(instance, state) - - def construct_python_object_apply(self, suffix, node, newobj=False): - # Format: - # !!python/object/apply # (or !!python/object/new) - # args: [ ... arguments ... ] - # kwds: { ... keywords ... } - # state: ... state ... - # listitems: [ ... listitems ... ] - # dictitems: { ... dictitems ... } - # or short format: - # !!python/object/apply [ ... arguments ... ] - # The difference between !!python/object/apply and !!python/object/new - # is how an object is created, check make_python_instance for details. - if isinstance(node, SequenceNode): - args = self.construct_sequence(node, deep=True) - kwds = {} - state = {} - listitems = [] - dictitems = {} - else: - value = self.construct_mapping(node, deep=True) - args = value.get('args', []) - kwds = value.get('kwds', {}) - state = value.get('state', {}) - listitems = value.get('listitems', []) - dictitems = value.get('dictitems', {}) - instance = self.make_python_instance(suffix, node, args, kwds, newobj) - if state: - self.set_python_instance_state(instance, state) - if listitems: - instance.extend(listitems) - if dictitems: - for key in dictitems: - instance[key] = dictitems[key] - return instance - - def construct_python_object_new(self, suffix, node): - return self.construct_python_object_apply(suffix, node, newobj=True) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/none', - FullConstructor.construct_yaml_null) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/bool', - FullConstructor.construct_yaml_bool) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/str', - FullConstructor.construct_python_str) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/unicode', - FullConstructor.construct_python_unicode) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/bytes', - FullConstructor.construct_python_bytes) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/int', - FullConstructor.construct_yaml_int) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/long', - FullConstructor.construct_python_long) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/float', - FullConstructor.construct_yaml_float) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/complex', - FullConstructor.construct_python_complex) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/list', - FullConstructor.construct_yaml_seq) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/tuple', - FullConstructor.construct_python_tuple) - -FullConstructor.add_constructor( - 'tag:yaml.org,2002:python/dict', - FullConstructor.construct_yaml_map) - -FullConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/name:', - FullConstructor.construct_python_name) - -FullConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/module:', - FullConstructor.construct_python_module) - -FullConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/object:', - FullConstructor.construct_python_object) - -FullConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/object/apply:', - FullConstructor.construct_python_object_apply) - -FullConstructor.add_multi_constructor( - 'tag:yaml.org,2002:python/object/new:', - FullConstructor.construct_python_object_new) - -class UnsafeConstructor(FullConstructor): - - def find_python_module(self, name, mark): - return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True) - - def find_python_name(self, name, mark): - return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True) - - def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): - return super(UnsafeConstructor, self).make_python_instance( - suffix, node, args, kwds, newobj, unsafe=True) - -# Constructor is same as UnsafeConstructor. Need to leave this in place in case -# people have extended it directly. -class Constructor(UnsafeConstructor): - pass diff --git a/libs/yaml/cyaml.py b/libs/yaml/cyaml.py deleted file mode 100644 index 1e606c74b..000000000 --- a/libs/yaml/cyaml.py +++ /dev/null @@ -1,101 +0,0 @@ - -__all__ = [ - 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader', - 'CBaseDumper', 'CSafeDumper', 'CDumper' -] - -from _yaml import CParser, CEmitter - -from .constructor import * - -from .serializer import * -from .representer import * - -from .resolver import * - -class CBaseLoader(CParser, BaseConstructor, BaseResolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class CSafeLoader(CParser, SafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class CFullLoader(CParser, FullConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - FullConstructor.__init__(self) - Resolver.__init__(self) - -class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - UnsafeConstructor.__init__(self) - Resolver.__init__(self) - -class CLoader(CParser, Constructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - Constructor.__init__(self) - Resolver.__init__(self) - -class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class CSafeDumper(CEmitter, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class CDumper(CEmitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - diff --git a/libs/yaml/dumper.py b/libs/yaml/dumper.py deleted file mode 100644 index 6aadba551..000000000 --- a/libs/yaml/dumper.py +++ /dev/null @@ -1,62 +0,0 @@ - -__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] - -from .emitter import * -from .serializer import * -from .representer import * -from .resolver import * - -class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class Dumper(Emitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - diff --git a/libs/yaml/emitter.py b/libs/yaml/emitter.py deleted file mode 100644 index a664d0111..000000000 --- a/libs/yaml/emitter.py +++ /dev/null @@ -1,1137 +0,0 @@ - -# Emitter expects events obeying the following grammar: -# stream ::= STREAM-START document* STREAM-END -# document ::= DOCUMENT-START node DOCUMENT-END -# node ::= SCALAR | sequence | mapping -# sequence ::= SEQUENCE-START node* SEQUENCE-END -# mapping ::= MAPPING-START (node node)* MAPPING-END - -__all__ = ['Emitter', 'EmitterError'] - -from .error import YAMLError -from .events import * - -class EmitterError(YAMLError): - pass - -class ScalarAnalysis: - def __init__(self, scalar, empty, multiline, - allow_flow_plain, allow_block_plain, - allow_single_quoted, allow_double_quoted, - allow_block): - self.scalar = scalar - self.empty = empty - self.multiline = multiline - self.allow_flow_plain = allow_flow_plain - self.allow_block_plain = allow_block_plain - self.allow_single_quoted = allow_single_quoted - self.allow_double_quoted = allow_double_quoted - self.allow_block = allow_block - -class Emitter: - - DEFAULT_TAG_PREFIXES = { - '!' : '!', - 'tag:yaml.org,2002:' : '!!', - } - - def __init__(self, stream, canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - - # The stream should have the methods `write` and possibly `flush`. - self.stream = stream - - # Encoding can be overridden by STREAM-START. - self.encoding = None - - # Emitter is a state machine with a stack of states to handle nested - # structures. - self.states = [] - self.state = self.expect_stream_start - - # Current event and the event queue. - self.events = [] - self.event = None - - # The current indentation level and the stack of previous indents. - self.indents = [] - self.indent = None - - # Flow level. - self.flow_level = 0 - - # Contexts. - self.root_context = False - self.sequence_context = False - self.mapping_context = False - self.simple_key_context = False - - # Characteristics of the last emitted character: - # - current position. - # - is it a whitespace? - # - is it an indention character - # (indentation space, '-', '?', or ':')? - self.line = 0 - self.column = 0 - self.whitespace = True - self.indention = True - - # Whether the document requires an explicit document indicator - self.open_ended = False - - # Formatting details. - self.canonical = canonical - self.allow_unicode = allow_unicode - self.best_indent = 2 - if indent and 1 < indent < 10: - self.best_indent = indent - self.best_width = 80 - if width and width > self.best_indent*2: - self.best_width = width - self.best_line_break = '\n' - if line_break in ['\r', '\n', '\r\n']: - self.best_line_break = line_break - - # Tag prefixes. - self.tag_prefixes = None - - # Prepared anchor and tag. - self.prepared_anchor = None - self.prepared_tag = None - - # Scalar analysis and style. - self.analysis = None - self.style = None - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def emit(self, event): - self.events.append(event) - while not self.need_more_events(): - self.event = self.events.pop(0) - self.state() - self.event = None - - # In some cases, we wait for a few next events before emitting. - - def need_more_events(self): - if not self.events: - return True - event = self.events[0] - if isinstance(event, DocumentStartEvent): - return self.need_events(1) - elif isinstance(event, SequenceStartEvent): - return self.need_events(2) - elif isinstance(event, MappingStartEvent): - return self.need_events(3) - else: - return False - - def need_events(self, count): - level = 0 - for event in self.events[1:]: - if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): - level += 1 - elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): - level -= 1 - elif isinstance(event, StreamEndEvent): - level = -1 - if level < 0: - return False - return (len(self.events) < count+1) - - def increase_indent(self, flow=False, indentless=False): - self.indents.append(self.indent) - if self.indent is None: - if flow: - self.indent = self.best_indent - else: - self.indent = 0 - elif not indentless: - self.indent += self.best_indent - - # States. - - # Stream handlers. - - def expect_stream_start(self): - if isinstance(self.event, StreamStartEvent): - if self.event.encoding and not hasattr(self.stream, 'encoding'): - self.encoding = self.event.encoding - self.write_stream_start() - self.state = self.expect_first_document_start - else: - raise EmitterError("expected StreamStartEvent, but got %s" - % self.event) - - def expect_nothing(self): - raise EmitterError("expected nothing, but got %s" % self.event) - - # Document handlers. - - def expect_first_document_start(self): - return self.expect_document_start(first=True) - - def expect_document_start(self, first=False): - if isinstance(self.event, DocumentStartEvent): - if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator('...', True) - self.write_indent() - if self.event.version: - version_text = self.prepare_version(self.event.version) - self.write_version_directive(version_text) - self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() - if self.event.tags: - handles = sorted(self.event.tags.keys()) - for handle in handles: - prefix = self.event.tags[handle] - self.tag_prefixes[prefix] = handle - handle_text = self.prepare_tag_handle(handle) - prefix_text = self.prepare_tag_prefix(prefix) - self.write_tag_directive(handle_text, prefix_text) - implicit = (first and not self.event.explicit and not self.canonical - and not self.event.version and not self.event.tags - and not self.check_empty_document()) - if not implicit: - self.write_indent() - self.write_indicator('---', True) - if self.canonical: - self.write_indent() - self.state = self.expect_document_root - elif isinstance(self.event, StreamEndEvent): - if self.open_ended: - self.write_indicator('...', True) - self.write_indent() - self.write_stream_end() - self.state = self.expect_nothing - else: - raise EmitterError("expected DocumentStartEvent, but got %s" - % self.event) - - def expect_document_end(self): - if isinstance(self.event, DocumentEndEvent): - self.write_indent() - if self.event.explicit: - self.write_indicator('...', True) - self.write_indent() - self.flush_stream() - self.state = self.expect_document_start - else: - raise EmitterError("expected DocumentEndEvent, but got %s" - % self.event) - - def expect_document_root(self): - self.states.append(self.expect_document_end) - self.expect_node(root=True) - - # Node handlers. - - def expect_node(self, root=False, sequence=False, mapping=False, - simple_key=False): - self.root_context = root - self.sequence_context = sequence - self.mapping_context = mapping - self.simple_key_context = simple_key - if isinstance(self.event, AliasEvent): - self.expect_alias() - elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor('&') - self.process_tag() - if isinstance(self.event, ScalarEvent): - self.expect_scalar() - elif isinstance(self.event, SequenceStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_sequence(): - self.expect_flow_sequence() - else: - self.expect_block_sequence() - elif isinstance(self.event, MappingStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_mapping(): - self.expect_flow_mapping() - else: - self.expect_block_mapping() - else: - raise EmitterError("expected NodeEvent, but got %s" % self.event) - - def expect_alias(self): - if self.event.anchor is None: - raise EmitterError("anchor is not specified for alias") - self.process_anchor('*') - self.state = self.states.pop() - - def expect_scalar(self): - self.increase_indent(flow=True) - self.process_scalar() - self.indent = self.indents.pop() - self.state = self.states.pop() - - # Flow sequence handlers. - - def expect_flow_sequence(self): - self.write_indicator('[', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_sequence_item - - def expect_first_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(']', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - def expect_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(',', False) - self.write_indent() - self.write_indicator(']', False) - self.state = self.states.pop() - else: - self.write_indicator(',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - # Flow mapping handlers. - - def expect_flow_mapping(self): - self.write_indicator('{', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_mapping_key - - def expect_first_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator('}', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator('?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(',', False) - self.write_indent() - self.write_indicator('}', False) - self.state = self.states.pop() - else: - self.write_indicator(',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator('?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_simple_value(self): - self.write_indicator(':', False) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - def expect_flow_mapping_value(self): - if self.canonical or self.column > self.best_width: - self.write_indent() - self.write_indicator(':', True) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - # Block sequence handlers. - - def expect_block_sequence(self): - indentless = (self.mapping_context and not self.indention) - self.increase_indent(flow=False, indentless=indentless) - self.state = self.expect_first_block_sequence_item - - def expect_first_block_sequence_item(self): - return self.expect_block_sequence_item(first=True) - - def expect_block_sequence_item(self, first=False): - if not first and isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - self.write_indicator('-', True, indention=True) - self.states.append(self.expect_block_sequence_item) - self.expect_node(sequence=True) - - # Block mapping handlers. - - def expect_block_mapping(self): - self.increase_indent(flow=False) - self.state = self.expect_first_block_mapping_key - - def expect_first_block_mapping_key(self): - return self.expect_block_mapping_key(first=True) - - def expect_block_mapping_key(self, first=False): - if not first and isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - if self.check_simple_key(): - self.states.append(self.expect_block_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator('?', True, indention=True) - self.states.append(self.expect_block_mapping_value) - self.expect_node(mapping=True) - - def expect_block_mapping_simple_value(self): - self.write_indicator(':', False) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - def expect_block_mapping_value(self): - self.write_indent() - self.write_indicator(':', True, indention=True) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - # Checkers. - - def check_empty_sequence(self): - return (isinstance(self.event, SequenceStartEvent) and self.events - and isinstance(self.events[0], SequenceEndEvent)) - - def check_empty_mapping(self): - return (isinstance(self.event, MappingStartEvent) and self.events - and isinstance(self.events[0], MappingEndEvent)) - - def check_empty_document(self): - if not isinstance(self.event, DocumentStartEvent) or not self.events: - return False - event = self.events[0] - return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and event.value == '') - - def check_simple_key(self): - length = 0 - if isinstance(self.event, NodeEvent) and self.event.anchor is not None: - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - length += len(self.prepared_anchor) - if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ - and self.event.tag is not None: - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(self.event.tag) - length += len(self.prepared_tag) - if isinstance(self.event, ScalarEvent): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - length += len(self.analysis.scalar) - return (length < 128 and (isinstance(self.event, AliasEvent) - or (isinstance(self.event, ScalarEvent) - and not self.analysis.empty and not self.analysis.multiline) - or self.check_empty_sequence() or self.check_empty_mapping())) - - # Anchor, Tag, and Scalar processors. - - def process_anchor(self, indicator): - if self.event.anchor is None: - self.prepared_anchor = None - return - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - if self.prepared_anchor: - self.write_indicator(indicator+self.prepared_anchor, True) - self.prepared_anchor = None - - def process_tag(self): - tag = self.event.tag - if isinstance(self.event, ScalarEvent): - if self.style is None: - self.style = self.choose_scalar_style() - if ((not self.canonical or tag is None) and - ((self.style == '' and self.event.implicit[0]) - or (self.style != '' and self.event.implicit[1]))): - self.prepared_tag = None - return - if self.event.implicit[0] and tag is None: - tag = '!' - self.prepared_tag = None - else: - if (not self.canonical or tag is None) and self.event.implicit: - self.prepared_tag = None - return - if tag is None: - raise EmitterError("tag is not specified") - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(tag) - if self.prepared_tag: - self.write_indicator(self.prepared_tag, True) - self.prepared_tag = None - - def choose_scalar_style(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.event.style == '"' or self.canonical: - return '"' - if not self.event.style and self.event.implicit[0]: - if (not (self.simple_key_context and - (self.analysis.empty or self.analysis.multiline)) - and (self.flow_level and self.analysis.allow_flow_plain - or (not self.flow_level and self.analysis.allow_block_plain))): - return '' - if self.event.style and self.event.style in '|>': - if (not self.flow_level and not self.simple_key_context - and self.analysis.allow_block): - return self.event.style - if not self.event.style or self.event.style == '\'': - if (self.analysis.allow_single_quoted and - not (self.simple_key_context and self.analysis.multiline)): - return '\'' - return '"' - - def process_scalar(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.style is None: - self.style = self.choose_scalar_style() - split = (not self.simple_key_context) - #if self.analysis.multiline and split \ - # and (not self.style or self.style in '\'\"'): - # self.write_indent() - if self.style == '"': - self.write_double_quoted(self.analysis.scalar, split) - elif self.style == '\'': - self.write_single_quoted(self.analysis.scalar, split) - elif self.style == '>': - self.write_folded(self.analysis.scalar) - elif self.style == '|': - self.write_literal(self.analysis.scalar) - else: - self.write_plain(self.analysis.scalar, split) - self.analysis = None - self.style = None - - # Analyzers. - - def prepare_version(self, version): - major, minor = version - if major != 1: - raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) - return '%d.%d' % (major, minor) - - def prepare_tag_handle(self, handle): - if not handle: - raise EmitterError("tag handle must not be empty") - if handle[0] != '!' or handle[-1] != '!': - raise EmitterError("tag handle must start and end with '!': %r" % handle) - for ch in handle[1:-1]: - if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_'): - raise EmitterError("invalid character %r in the tag handle: %r" - % (ch, handle)) - return handle - - def prepare_tag_prefix(self, prefix): - if not prefix: - raise EmitterError("tag prefix must not be empty") - chunks = [] - start = end = 0 - if prefix[0] == '!': - end = 1 - while end < len(prefix): - ch = prefix[end] - if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-;/?!:@&=+$,_.~*\'()[]': - end += 1 - else: - if start < end: - chunks.append(prefix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append('%%%02X' % ord(ch)) - if start < end: - chunks.append(prefix[start:end]) - return ''.join(chunks) - - def prepare_tag(self, tag): - if not tag: - raise EmitterError("tag must not be empty") - if tag == '!': - return tag - handle = None - suffix = tag - prefixes = sorted(self.tag_prefixes.keys()) - for prefix in prefixes: - if tag.startswith(prefix) \ - and (prefix == '!' or len(prefix) < len(tag)): - handle = self.tag_prefixes[prefix] - suffix = tag[len(prefix):] - chunks = [] - start = end = 0 - while end < len(suffix): - ch = suffix[end] - if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-;/?:@&=+$,_.~*\'()[]' \ - or (ch == '!' and handle != '!'): - end += 1 - else: - if start < end: - chunks.append(suffix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append('%%%02X' % ch) - if start < end: - chunks.append(suffix[start:end]) - suffix_text = ''.join(chunks) - if handle: - return '%s%s' % (handle, suffix_text) - else: - return '!<%s>' % suffix_text - - def prepare_anchor(self, anchor): - if not anchor: - raise EmitterError("anchor must not be empty") - for ch in anchor: - if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_'): - raise EmitterError("invalid character %r in the anchor: %r" - % (ch, anchor)) - return anchor - - def analyze_scalar(self, scalar): - - # Empty scalar is a special case. - if not scalar: - return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, - allow_flow_plain=False, allow_block_plain=True, - allow_single_quoted=True, allow_double_quoted=True, - allow_block=False) - - # Indicators and special characters. - block_indicators = False - flow_indicators = False - line_breaks = False - special_characters = False - - # Important whitespace combinations. - leading_space = False - leading_break = False - trailing_space = False - trailing_break = False - break_space = False - space_break = False - - # Check document indicators. - if scalar.startswith('---') or scalar.startswith('...'): - block_indicators = True - flow_indicators = True - - # First character or preceded by a whitespace. - preceded_by_whitespace = True - - # Last character or followed by a whitespace. - followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in '\0 \t\r\n\x85\u2028\u2029') - - # The previous character is a space. - previous_space = False - - # The previous character is a break. - previous_break = False - - index = 0 - while index < len(scalar): - ch = scalar[index] - - # Check for indicators. - if index == 0: - # Leading indicators are special characters. - if ch in '#,[]{}&*!|>\'\"%@`': - flow_indicators = True - block_indicators = True - if ch in '?:': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == '-' and followed_by_whitespace: - flow_indicators = True - block_indicators = True - else: - # Some indicators cannot appear within a scalar as well. - if ch in ',?[]{}': - flow_indicators = True - if ch == ':': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == '#' and preceded_by_whitespace: - flow_indicators = True - block_indicators = True - - # Check for line breaks, special, and unicode characters. - if ch in '\n\x85\u2028\u2029': - line_breaks = True - if not (ch == '\n' or '\x20' <= ch <= '\x7E'): - if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF' - or '\uE000' <= ch <= '\uFFFD' - or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF': - unicode_characters = True - if not self.allow_unicode: - special_characters = True - else: - special_characters = True - - # Detect important whitespace combinations. - if ch == ' ': - if index == 0: - leading_space = True - if index == len(scalar)-1: - trailing_space = True - if previous_break: - break_space = True - previous_space = True - previous_break = False - elif ch in '\n\x85\u2028\u2029': - if index == 0: - leading_break = True - if index == len(scalar)-1: - trailing_break = True - if previous_space: - space_break = True - previous_space = False - previous_break = True - else: - previous_space = False - previous_break = False - - # Prepare for the next character. - index += 1 - preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029') - followed_by_whitespace = (index+1 >= len(scalar) or - scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029') - - # Let's decide what styles are allowed. - allow_flow_plain = True - allow_block_plain = True - allow_single_quoted = True - allow_double_quoted = True - allow_block = True - - # Leading and trailing whitespaces are bad for plain scalars. - if (leading_space or leading_break - or trailing_space or trailing_break): - allow_flow_plain = allow_block_plain = False - - # We do not permit trailing spaces for block scalars. - if trailing_space: - allow_block = False - - # Spaces at the beginning of a new line are only acceptable for block - # scalars. - if break_space: - allow_flow_plain = allow_block_plain = allow_single_quoted = False - - # Spaces followed by breaks, as well as special character are only - # allowed for double quoted scalars. - if space_break or special_characters: - allow_flow_plain = allow_block_plain = \ - allow_single_quoted = allow_block = False - - # Although the plain scalar writer supports breaks, we never emit - # multiline plain scalars. - if line_breaks: - allow_flow_plain = allow_block_plain = False - - # Flow indicators are forbidden for flow plain scalars. - if flow_indicators: - allow_flow_plain = False - - # Block indicators are forbidden for block plain scalars. - if block_indicators: - allow_block_plain = False - - return ScalarAnalysis(scalar=scalar, - empty=False, multiline=line_breaks, - allow_flow_plain=allow_flow_plain, - allow_block_plain=allow_block_plain, - allow_single_quoted=allow_single_quoted, - allow_double_quoted=allow_double_quoted, - allow_block=allow_block) - - # Writers. - - def flush_stream(self): - if hasattr(self.stream, 'flush'): - self.stream.flush() - - def write_stream_start(self): - # Write BOM if needed. - if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write('\uFEFF'.encode(self.encoding)) - - def write_stream_end(self): - self.flush_stream() - - def write_indicator(self, indicator, need_whitespace, - whitespace=False, indention=False): - if self.whitespace or not need_whitespace: - data = indicator - else: - data = ' '+indicator - self.whitespace = whitespace - self.indention = self.indention and indention - self.column += len(data) - self.open_ended = False - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_indent(self): - indent = self.indent or 0 - if not self.indention or self.column > indent \ - or (self.column == indent and not self.whitespace): - self.write_line_break() - if self.column < indent: - self.whitespace = True - data = ' '*(indent-self.column) - self.column = indent - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_line_break(self, data=None): - if data is None: - data = self.best_line_break - self.whitespace = True - self.indention = True - self.line += 1 - self.column = 0 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_version_directive(self, version_text): - data = '%%YAML %s' % version_text - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - def write_tag_directive(self, handle_text, prefix_text): - data = '%%TAG %s %s' % (handle_text, prefix_text) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - # Scalar streams. - - def write_single_quoted(self, text, split=True): - self.write_indicator('\'', True) - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch is None or ch != ' ': - if start+1 == end and self.column > self.best_width and split \ - and start != 0 and end != len(text): - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch is None or ch not in '\n\x85\u2028\u2029': - if text[start] == '\n': - self.write_line_break() - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - start = end - else: - if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'': - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch == '\'': - data = '\'\'' - self.column += 2 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end + 1 - if ch is not None: - spaces = (ch == ' ') - breaks = (ch in '\n\x85\u2028\u2029') - end += 1 - self.write_indicator('\'', False) - - ESCAPE_REPLACEMENTS = { - '\0': '0', - '\x07': 'a', - '\x08': 'b', - '\x09': 't', - '\x0A': 'n', - '\x0B': 'v', - '\x0C': 'f', - '\x0D': 'r', - '\x1B': 'e', - '\"': '\"', - '\\': '\\', - '\x85': 'N', - '\xA0': '_', - '\u2028': 'L', - '\u2029': 'P', - } - - def write_double_quoted(self, text, split=True): - self.write_indicator('"', True) - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \ - or not ('\x20' <= ch <= '\x7E' - or (self.allow_unicode - and ('\xA0' <= ch <= '\uD7FF' - or '\uE000' <= ch <= '\uFFFD'))): - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - if ch in self.ESCAPE_REPLACEMENTS: - data = '\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= '\xFF': - data = '\\x%02X' % ord(ch) - elif ch <= '\uFFFF': - data = '\\u%04X' % ord(ch) - else: - data = '\\U%08X' % ord(ch) - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end+1 - if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \ - and self.column+(end-start) > self.best_width and split: - data = text[start:end]+'\\' - if start < end: - start = end - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_indent() - self.whitespace = False - self.indention = False - if text[start] == ' ': - data = '\\' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - end += 1 - self.write_indicator('"', False) - - def determine_block_hints(self, text): - hints = '' - if text: - if text[0] in ' \n\x85\u2028\u2029': - hints += str(self.best_indent) - if text[-1] not in '\n\x85\u2028\u2029': - hints += '-' - elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029': - hints += '+' - return hints - - def write_folded(self, text): - hints = self.determine_block_hints(text) - self.write_indicator('>'+hints, True) - if hints[-1:] == '+': - self.open_ended = True - self.write_line_break() - leading_space = True - spaces = False - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in '\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != ' ' \ - and text[start] == '\n': - self.write_line_break() - leading_space = (ch == ' ') - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - elif spaces: - if ch != ' ': - if start+1 == end and self.column > self.best_width: - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - else: - if ch is None or ch in ' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in '\n\x85\u2028\u2029') - spaces = (ch == ' ') - end += 1 - - def write_literal(self, text): - hints = self.determine_block_hints(text) - self.write_indicator('|'+hints, True) - if hints[-1:] == '+': - self.open_ended = True - self.write_line_break() - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in '\n\x85\u2028\u2029': - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - else: - if ch is None or ch in '\n\x85\u2028\u2029': - data = text[start:end] - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in '\n\x85\u2028\u2029') - end += 1 - - def write_plain(self, text, split=True): - if self.root_context: - self.open_ended = True - if not text: - return - if not self.whitespace: - data = ' ' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.whitespace = False - self.indention = False - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch != ' ': - if start+1 == end and self.column > self.best_width and split: - self.write_indent() - self.whitespace = False - self.indention = False - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch not in '\n\x85\u2028\u2029': - if text[start] == '\n': - self.write_line_break() - for br in text[start:end]: - if br == '\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - self.whitespace = False - self.indention = False - start = end - else: - if ch is None or ch in ' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - spaces = (ch == ' ') - breaks = (ch in '\n\x85\u2028\u2029') - end += 1 diff --git a/libs/yaml/error.py b/libs/yaml/error.py deleted file mode 100644 index b796b4dc5..000000000 --- a/libs/yaml/error.py +++ /dev/null @@ -1,75 +0,0 @@ - -__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] - -class Mark: - - def __init__(self, name, index, line, column, buffer, pointer): - self.name = name - self.index = index - self.line = line - self.column = column - self.buffer = buffer - self.pointer = pointer - - def get_snippet(self, indent=4, max_length=75): - if self.buffer is None: - return None - head = '' - start = self.pointer - while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029': - start -= 1 - if self.pointer-start > max_length/2-1: - head = ' ... ' - start += 5 - break - tail = '' - end = self.pointer - while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029': - end += 1 - if end-self.pointer > max_length/2-1: - tail = ' ... ' - end -= 5 - break - snippet = self.buffer[start:end] - return ' '*indent + head + snippet + tail + '\n' \ - + ' '*(indent+self.pointer-start+len(head)) + '^' - - def __str__(self): - snippet = self.get_snippet() - where = " in \"%s\", line %d, column %d" \ - % (self.name, self.line+1, self.column+1) - if snippet is not None: - where += ":\n"+snippet - return where - -class YAMLError(Exception): - pass - -class MarkedYAMLError(YAMLError): - - def __init__(self, context=None, context_mark=None, - problem=None, problem_mark=None, note=None): - self.context = context - self.context_mark = context_mark - self.problem = problem - self.problem_mark = problem_mark - self.note = note - - def __str__(self): - lines = [] - if self.context is not None: - lines.append(self.context) - if self.context_mark is not None \ - and (self.problem is None or self.problem_mark is None - or self.context_mark.name != self.problem_mark.name - or self.context_mark.line != self.problem_mark.line - or self.context_mark.column != self.problem_mark.column): - lines.append(str(self.context_mark)) - if self.problem is not None: - lines.append(self.problem) - if self.problem_mark is not None: - lines.append(str(self.problem_mark)) - if self.note is not None: - lines.append(self.note) - return '\n'.join(lines) - diff --git a/libs/yaml/events.py b/libs/yaml/events.py deleted file mode 100644 index f79ad389c..000000000 --- a/libs/yaml/events.py +++ /dev/null @@ -1,86 +0,0 @@ - -# Abstract classes. - -class Event(object): - def __init__(self, start_mark=None, end_mark=None): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] - if hasattr(self, key)] - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -class NodeEvent(Event): - def __init__(self, anchor, start_mark=None, end_mark=None): - self.anchor = anchor - self.start_mark = start_mark - self.end_mark = end_mark - -class CollectionStartEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, - flow_style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class CollectionEndEvent(Event): - pass - -# Implementations. - -class StreamStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndEvent(Event): - pass - -class DocumentStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None, version=None, tags=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - self.version = version - self.tags = tags - -class DocumentEndEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - -class AliasEvent(NodeEvent): - pass - -class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, value, - start_mark=None, end_mark=None, style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class SequenceStartEvent(CollectionStartEvent): - pass - -class SequenceEndEvent(CollectionEndEvent): - pass - -class MappingStartEvent(CollectionStartEvent): - pass - -class MappingEndEvent(CollectionEndEvent): - pass - diff --git a/libs/yaml/loader.py b/libs/yaml/loader.py deleted file mode 100644 index 414cb2c15..000000000 --- a/libs/yaml/loader.py +++ /dev/null @@ -1,63 +0,0 @@ - -__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] - -from .reader import * -from .scanner import * -from .parser import * -from .composer import * -from .constructor import * -from .resolver import * - -class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - FullConstructor.__init__(self) - Resolver.__init__(self) - -class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) - -# UnsafeLoader is the same as Loader (which is and was always unsafe on -# untrusted input). Use of either Loader or UnsafeLoader should be rare, since -# FullLoad should be able to load almost all YAML safely. Loader is left intact -# to ensure backwards compatability. -class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) diff --git a/libs/yaml/nodes.py b/libs/yaml/nodes.py deleted file mode 100644 index c4f070c41..000000000 --- a/libs/yaml/nodes.py +++ /dev/null @@ -1,49 +0,0 @@ - -class Node(object): - def __init__(self, tag, value, start_mark, end_mark): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - value = self.value - #if isinstance(value, list): - # if len(value) == 0: - # value = '<empty>' - # elif len(value) == 1: - # value = '<1 item>' - # else: - # value = '<%d items>' % len(value) - #else: - # if len(value) > 75: - # value = repr(value[:70]+u' ... ') - # else: - # value = repr(value) - value = repr(value) - return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) - -class ScalarNode(Node): - id = 'scalar' - def __init__(self, tag, value, - start_mark=None, end_mark=None, style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class CollectionNode(Node): - def __init__(self, tag, value, - start_mark=None, end_mark=None, flow_style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class SequenceNode(CollectionNode): - id = 'sequence' - -class MappingNode(CollectionNode): - id = 'mapping' - diff --git a/libs/yaml/parser.py b/libs/yaml/parser.py deleted file mode 100644 index 13a5995d2..000000000 --- a/libs/yaml/parser.py +++ /dev/null @@ -1,589 +0,0 @@ - -# The following YAML grammar is LL(1) and is parsed by a recursive descent -# parser. -# -# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END -# implicit_document ::= block_node DOCUMENT-END* -# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -# block_node_or_indentless_sequence ::= -# ALIAS -# | properties (block_content | indentless_block_sequence)? -# | block_content -# | indentless_block_sequence -# block_node ::= ALIAS -# | properties block_content? -# | block_content -# flow_node ::= ALIAS -# | properties flow_content? -# | flow_content -# properties ::= TAG ANCHOR? | ANCHOR TAG? -# block_content ::= block_collection | flow_collection | SCALAR -# flow_content ::= flow_collection | SCALAR -# block_collection ::= block_sequence | block_mapping -# flow_collection ::= flow_sequence | flow_mapping -# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END -# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -# block_mapping ::= BLOCK-MAPPING_START -# ((KEY block_node_or_indentless_sequence?)? -# (VALUE block_node_or_indentless_sequence?)?)* -# BLOCK-END -# flow_sequence ::= FLOW-SEQUENCE-START -# (flow_sequence_entry FLOW-ENTRY)* -# flow_sequence_entry? -# FLOW-SEQUENCE-END -# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# flow_mapping ::= FLOW-MAPPING-START -# (flow_mapping_entry FLOW-ENTRY)* -# flow_mapping_entry? -# FLOW-MAPPING-END -# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# -# FIRST sets: -# -# stream: { STREAM-START } -# explicit_document: { DIRECTIVE DOCUMENT-START } -# implicit_document: FIRST(block_node) -# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_sequence: { BLOCK-SEQUENCE-START } -# block_mapping: { BLOCK-MAPPING-START } -# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } -# indentless_sequence: { ENTRY } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_sequence: { FLOW-SEQUENCE-START } -# flow_mapping: { FLOW-MAPPING-START } -# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } -# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } - -__all__ = ['Parser', 'ParserError'] - -from .error import MarkedYAMLError -from .tokens import * -from .events import * -from .scanner import * - -class ParserError(MarkedYAMLError): - pass - -class Parser: - # Since writing a recursive-descendant parser is a straightforward task, we - # do not give many comments here. - - DEFAULT_TAGS = { - '!': '!', - '!!': 'tag:yaml.org,2002:', - } - - def __init__(self): - self.current_event = None - self.yaml_version = None - self.tag_handles = {} - self.states = [] - self.marks = [] - self.state = self.parse_stream_start - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def check_event(self, *choices): - # Check the type of the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - if self.current_event is not None: - if not choices: - return True - for choice in choices: - if isinstance(self.current_event, choice): - return True - return False - - def peek_event(self): - # Get the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - return self.current_event - - def get_event(self): - # Get the next event and proceed further. - if self.current_event is None: - if self.state: - self.current_event = self.state() - value = self.current_event - self.current_event = None - return value - - # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END - # implicit_document ::= block_node DOCUMENT-END* - # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - - def parse_stream_start(self): - - # Parse the stream start. - token = self.get_token() - event = StreamStartEvent(token.start_mark, token.end_mark, - encoding=token.encoding) - - # Prepare the next state. - self.state = self.parse_implicit_document_start - - return event - - def parse_implicit_document_start(self): - - # Parse an implicit document. - if not self.check_token(DirectiveToken, DocumentStartToken, - StreamEndToken): - self.tag_handles = self.DEFAULT_TAGS - token = self.peek_token() - start_mark = end_mark = token.start_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=False) - - # Prepare the next state. - self.states.append(self.parse_document_end) - self.state = self.parse_block_node - - return event - - else: - return self.parse_document_start() - - def parse_document_start(self): - - # Parse any extra document end indicators. - while self.check_token(DocumentEndToken): - self.get_token() - - # Parse an explicit document. - if not self.check_token(StreamEndToken): - token = self.peek_token() - start_mark = token.start_mark - version, tags = self.process_directives() - if not self.check_token(DocumentStartToken): - raise ParserError(None, None, - "expected '<document start>', but found %r" - % self.peek_token().id, - self.peek_token().start_mark) - token = self.get_token() - end_mark = token.end_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=True, version=version, tags=tags) - self.states.append(self.parse_document_end) - self.state = self.parse_document_content - else: - # Parse the end of the stream. - token = self.get_token() - event = StreamEndEvent(token.start_mark, token.end_mark) - assert not self.states - assert not self.marks - self.state = None - return event - - def parse_document_end(self): - - # Parse the document end. - token = self.peek_token() - start_mark = end_mark = token.start_mark - explicit = False - if self.check_token(DocumentEndToken): - token = self.get_token() - end_mark = token.end_mark - explicit = True - event = DocumentEndEvent(start_mark, end_mark, - explicit=explicit) - - # Prepare the next state. - self.state = self.parse_document_start - - return event - - def parse_document_content(self): - if self.check_token(DirectiveToken, - DocumentStartToken, DocumentEndToken, StreamEndToken): - event = self.process_empty_scalar(self.peek_token().start_mark) - self.state = self.states.pop() - return event - else: - return self.parse_block_node() - - def process_directives(self): - self.yaml_version = None - self.tag_handles = {} - while self.check_token(DirectiveToken): - token = self.get_token() - if token.name == 'YAML': - if self.yaml_version is not None: - raise ParserError(None, None, - "found duplicate YAML directive", token.start_mark) - major, minor = token.value - if major != 1: - raise ParserError(None, None, - "found incompatible YAML document (version 1.* is required)", - token.start_mark) - self.yaml_version = token.value - elif token.name == 'TAG': - handle, prefix = token.value - if handle in self.tag_handles: - raise ParserError(None, None, - "duplicate tag handle %r" % handle, - token.start_mark) - self.tag_handles[handle] = prefix - if self.tag_handles: - value = self.yaml_version, self.tag_handles.copy() - else: - value = self.yaml_version, None - for key in self.DEFAULT_TAGS: - if key not in self.tag_handles: - self.tag_handles[key] = self.DEFAULT_TAGS[key] - return value - - # block_node_or_indentless_sequence ::= ALIAS - # | properties (block_content | indentless_block_sequence)? - # | block_content - # | indentless_block_sequence - # block_node ::= ALIAS - # | properties block_content? - # | block_content - # flow_node ::= ALIAS - # | properties flow_content? - # | flow_content - # properties ::= TAG ANCHOR? | ANCHOR TAG? - # block_content ::= block_collection | flow_collection | SCALAR - # flow_content ::= flow_collection | SCALAR - # block_collection ::= block_sequence | block_mapping - # flow_collection ::= flow_sequence | flow_mapping - - def parse_block_node(self): - return self.parse_node(block=True) - - def parse_flow_node(self): - return self.parse_node() - - def parse_block_node_or_indentless_sequence(self): - return self.parse_node(block=True, indentless_sequence=True) - - def parse_node(self, block=False, indentless_sequence=False): - if self.check_token(AliasToken): - token = self.get_token() - event = AliasEvent(token.value, token.start_mark, token.end_mark) - self.state = self.states.pop() - else: - anchor = None - tag = None - start_mark = end_mark = tag_mark = None - if self.check_token(AnchorToken): - token = self.get_token() - start_mark = token.start_mark - end_mark = token.end_mark - anchor = token.value - if self.check_token(TagToken): - token = self.get_token() - tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - elif self.check_token(TagToken): - token = self.get_token() - start_mark = tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - if self.check_token(AnchorToken): - token = self.get_token() - end_mark = token.end_mark - anchor = token.value - if tag is not None: - handle, suffix = tag - if handle is not None: - if handle not in self.tag_handles: - raise ParserError("while parsing a node", start_mark, - "found undefined tag handle %r" % handle, - tag_mark) - tag = self.tag_handles[handle]+suffix - else: - tag = suffix - #if tag == '!': - # raise ParserError("while parsing a node", start_mark, - # "found non-specific tag '!'", tag_mark, - # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") - if start_mark is None: - start_mark = end_mark = self.peek_token().start_mark - event = None - implicit = (tag is None or tag == '!') - if indentless_sequence and self.check_token(BlockEntryToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark) - self.state = self.parse_indentless_sequence_entry - else: - if self.check_token(ScalarToken): - token = self.get_token() - end_mark = token.end_mark - if (token.plain and tag is None) or tag == '!': - implicit = (True, False) - elif tag is None: - implicit = (False, True) - else: - implicit = (False, False) - event = ScalarEvent(anchor, tag, implicit, token.value, - start_mark, end_mark, style=token.style) - self.state = self.states.pop() - elif self.check_token(FlowSequenceStartToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_sequence_first_entry - elif self.check_token(FlowMappingStartToken): - end_mark = self.peek_token().end_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_mapping_first_key - elif block and self.check_token(BlockSequenceStartToken): - end_mark = self.peek_token().start_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_sequence_first_entry - elif block and self.check_token(BlockMappingStartToken): - end_mark = self.peek_token().start_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_mapping_first_key - elif anchor is not None or tag is not None: - # Empty scalars are allowed even if a tag or an anchor is - # specified. - event = ScalarEvent(anchor, tag, (implicit, False), '', - start_mark, end_mark) - self.state = self.states.pop() - else: - if block: - node = 'block' - else: - node = 'flow' - token = self.peek_token() - raise ParserError("while parsing a %s node" % node, start_mark, - "expected the node content, but found %r" % token.id, - token.start_mark) - return event - - # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - - def parse_block_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_sequence_entry() - - def parse_block_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, BlockEndToken): - self.states.append(self.parse_block_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_block_sequence_entry - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block collection", self.marks[-1], - "expected <block end>, but found %r" % token.id, token.start_mark) - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - - def parse_indentless_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, - KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_indentless_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_indentless_sequence_entry - return self.process_empty_scalar(token.end_mark) - token = self.peek_token() - event = SequenceEndEvent(token.start_mark, token.start_mark) - self.state = self.states.pop() - return event - - # block_mapping ::= BLOCK-MAPPING_START - # ((KEY block_node_or_indentless_sequence?)? - # (VALUE block_node_or_indentless_sequence?)?)* - # BLOCK-END - - def parse_block_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_mapping_key() - - def parse_block_mapping_key(self): - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_value) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_value - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block mapping", self.marks[-1], - "expected <block end>, but found %r" % token.id, token.start_mark) - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_block_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_key) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_block_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - # flow_sequence ::= FLOW-SEQUENCE-START - # (flow_sequence_entry FLOW-ENTRY)* - # flow_sequence_entry? - # FLOW-SEQUENCE-END - # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - # - # Note that while production rules for both flow_sequence_entry and - # flow_mapping_entry are equal, their interpretations are different. - # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` - # generate an inline mapping (set syntax). - - def parse_flow_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_sequence_entry(first=True) - - def parse_flow_sequence_entry(self, first=False): - if not self.check_token(FlowSequenceEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow sequence", self.marks[-1], - "expected ',' or ']', but got %r" % token.id, token.start_mark) - - if self.check_token(KeyToken): - token = self.peek_token() - event = MappingStartEvent(None, None, True, - token.start_mark, token.end_mark, - flow_style=True) - self.state = self.parse_flow_sequence_entry_mapping_key - return event - elif not self.check_token(FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry) - return self.parse_flow_node() - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_sequence_entry_mapping_key(self): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_value - return self.process_empty_scalar(token.end_mark) - - def parse_flow_sequence_entry_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_end) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_end - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_sequence_entry_mapping_end - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_sequence_entry_mapping_end(self): - self.state = self.parse_flow_sequence_entry - token = self.peek_token() - return MappingEndEvent(token.start_mark, token.start_mark) - - # flow_mapping ::= FLOW-MAPPING-START - # (flow_mapping_entry FLOW-ENTRY)* - # flow_mapping_entry? - # FLOW-MAPPING-END - # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - - def parse_flow_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_mapping_key(first=True) - - def parse_flow_mapping_key(self, first=False): - if not self.check_token(FlowMappingEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow mapping", self.marks[-1], - "expected ',' or '}', but got %r" % token.id, token.start_mark) - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_value - return self.process_empty_scalar(token.end_mark) - elif not self.check_token(FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_empty_value) - return self.parse_flow_node() - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_key) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_mapping_empty_value(self): - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(self.peek_token().start_mark) - - def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), '', mark, mark) - diff --git a/libs/yaml/reader.py b/libs/yaml/reader.py deleted file mode 100644 index 774b0219b..000000000 --- a/libs/yaml/reader.py +++ /dev/null @@ -1,185 +0,0 @@ -# This module contains abstractions for the input stream. You don't have to -# looks further, there are no pretty code. -# -# We define two classes here. -# -# Mark(source, line, column) -# It's just a record and its only use is producing nice error messages. -# Parser does not use it for any other purposes. -# -# Reader(source, data) -# Reader determines the encoding of `data` and converts it to unicode. -# Reader provides the following methods and attributes: -# reader.peek(length=1) - return the next `length` characters -# reader.forward(length=1) - move the current position to `length` characters. -# reader.index - the number of the current character. -# reader.line, stream.column - the line and the column of the current character. - -__all__ = ['Reader', 'ReaderError'] - -from .error import YAMLError, Mark - -import codecs, re - -class ReaderError(YAMLError): - - def __init__(self, name, position, character, encoding, reason): - self.name = name - self.character = character - self.position = position - self.encoding = encoding - self.reason = reason - - def __str__(self): - if isinstance(self.character, bytes): - return "'%s' codec can't decode byte #x%02x: %s\n" \ - " in \"%s\", position %d" \ - % (self.encoding, ord(self.character), self.reason, - self.name, self.position) - else: - return "unacceptable character #x%04x: %s\n" \ - " in \"%s\", position %d" \ - % (self.character, self.reason, - self.name, self.position) - -class Reader(object): - # Reader: - # - determines the data encoding and converts it to a unicode string, - # - checks if characters are in allowed range, - # - adds '\0' to the end. - - # Reader accepts - # - a `bytes` object, - # - a `str` object, - # - a file-like object with its `read` method returning `str`, - # - a file-like object with its `read` method returning `unicode`. - - # Yeah, it's ugly and slow. - - def __init__(self, stream): - self.name = None - self.stream = None - self.stream_pointer = 0 - self.eof = True - self.buffer = '' - self.pointer = 0 - self.raw_buffer = None - self.raw_decode = None - self.encoding = None - self.index = 0 - self.line = 0 - self.column = 0 - if isinstance(stream, str): - self.name = "<unicode string>" - self.check_printable(stream) - self.buffer = stream+'\0' - elif isinstance(stream, bytes): - self.name = "<byte string>" - self.raw_buffer = stream - self.determine_encoding() - else: - self.stream = stream - self.name = getattr(stream, 'name', "<file>") - self.eof = False - self.raw_buffer = None - self.determine_encoding() - - def peek(self, index=0): - try: - return self.buffer[self.pointer+index] - except IndexError: - self.update(index+1) - return self.buffer[self.pointer+index] - - def prefix(self, length=1): - if self.pointer+length >= len(self.buffer): - self.update(length) - return self.buffer[self.pointer:self.pointer+length] - - def forward(self, length=1): - if self.pointer+length+1 >= len(self.buffer): - self.update(length+1) - while length: - ch = self.buffer[self.pointer] - self.pointer += 1 - self.index += 1 - if ch in '\n\x85\u2028\u2029' \ - or (ch == '\r' and self.buffer[self.pointer] != '\n'): - self.line += 1 - self.column = 0 - elif ch != '\uFEFF': - self.column += 1 - length -= 1 - - def get_mark(self): - if self.stream is None: - return Mark(self.name, self.index, self.line, self.column, - self.buffer, self.pointer) - else: - return Mark(self.name, self.index, self.line, self.column, - None, None) - - def determine_encoding(self): - while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2): - self.update_raw() - if isinstance(self.raw_buffer, bytes): - if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): - self.raw_decode = codecs.utf_16_le_decode - self.encoding = 'utf-16-le' - elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): - self.raw_decode = codecs.utf_16_be_decode - self.encoding = 'utf-16-be' - else: - self.raw_decode = codecs.utf_8_decode - self.encoding = 'utf-8' - self.update(1) - - NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') - def check_printable(self, data): - match = self.NON_PRINTABLE.search(data) - if match: - character = match.group() - position = self.index+(len(self.buffer)-self.pointer)+match.start() - raise ReaderError(self.name, position, ord(character), - 'unicode', "special characters are not allowed") - - def update(self, length): - if self.raw_buffer is None: - return - self.buffer = self.buffer[self.pointer:] - self.pointer = 0 - while len(self.buffer) < length: - if not self.eof: - self.update_raw() - if self.raw_decode is not None: - try: - data, converted = self.raw_decode(self.raw_buffer, - 'strict', self.eof) - except UnicodeDecodeError as exc: - character = self.raw_buffer[exc.start] - if self.stream is not None: - position = self.stream_pointer-len(self.raw_buffer)+exc.start - else: - position = exc.start - raise ReaderError(self.name, position, character, - exc.encoding, exc.reason) - else: - data = self.raw_buffer - converted = len(data) - self.check_printable(data) - self.buffer += data - self.raw_buffer = self.raw_buffer[converted:] - if self.eof: - self.buffer += '\0' - self.raw_buffer = None - break - - def update_raw(self, size=4096): - data = self.stream.read(size) - if self.raw_buffer is None: - self.raw_buffer = data - else: - self.raw_buffer += data - self.stream_pointer += len(data) - if not data: - self.eof = True diff --git a/libs/yaml/representer.py b/libs/yaml/representer.py deleted file mode 100644 index dd144017b..000000000 --- a/libs/yaml/representer.py +++ /dev/null @@ -1,389 +0,0 @@ - -__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', - 'RepresenterError'] - -from .error import * -from .nodes import * - -import datetime, sys, copyreg, types, base64, collections - -class RepresenterError(YAMLError): - pass - -class BaseRepresenter: - - yaml_representers = {} - yaml_multi_representers = {} - - def __init__(self, default_style=None, default_flow_style=False, sort_keys=True): - self.default_style = default_style - self.sort_keys = sort_keys - self.default_flow_style = default_flow_style - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent(self, data): - node = self.represent_data(data) - self.serialize(node) - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent_data(self, data): - if self.ignore_aliases(data): - self.alias_key = None - else: - self.alias_key = id(data) - if self.alias_key is not None: - if self.alias_key in self.represented_objects: - node = self.represented_objects[self.alias_key] - #if node is None: - # raise RepresenterError("recursive objects are not allowed: %r" % data) - return node - #self.represented_objects[alias_key] = None - self.object_keeper.append(data) - data_types = type(data).__mro__ - if data_types[0] in self.yaml_representers: - node = self.yaml_representers[data_types[0]](self, data) - else: - for data_type in data_types: - if data_type in self.yaml_multi_representers: - node = self.yaml_multi_representers[data_type](self, data) - break - else: - if None in self.yaml_multi_representers: - node = self.yaml_multi_representers[None](self, data) - elif None in self.yaml_representers: - node = self.yaml_representers[None](self, data) - else: - node = ScalarNode(None, str(data)) - #if alias_key is not None: - # self.represented_objects[alias_key] = node - return node - - @classmethod - def add_representer(cls, data_type, representer): - if not 'yaml_representers' in cls.__dict__: - cls.yaml_representers = cls.yaml_representers.copy() - cls.yaml_representers[data_type] = representer - - @classmethod - def add_multi_representer(cls, data_type, representer): - if not 'yaml_multi_representers' in cls.__dict__: - cls.yaml_multi_representers = cls.yaml_multi_representers.copy() - cls.yaml_multi_representers[data_type] = representer - - def represent_scalar(self, tag, value, style=None): - if style is None: - style = self.default_style - node = ScalarNode(tag, value, style=style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - return node - - def represent_sequence(self, tag, sequence, flow_style=None): - value = [] - node = SequenceNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - for item in sequence: - node_item = self.represent_data(item) - if not (isinstance(node_item, ScalarNode) and not node_item.style): - best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - if hasattr(mapping, 'items'): - mapping = list(mapping.items()) - if self.sort_keys: - try: - mapping = sorted(mapping) - except TypeError: - pass - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def ignore_aliases(self, data): - return False - -class SafeRepresenter(BaseRepresenter): - - def ignore_aliases(self, data): - if data is None: - return True - if isinstance(data, tuple) and data == (): - return True - if isinstance(data, (str, bytes, bool, int, float)): - return True - - def represent_none(self, data): - return self.represent_scalar('tag:yaml.org,2002:null', 'null') - - def represent_str(self, data): - return self.represent_scalar('tag:yaml.org,2002:str', data) - - def represent_binary(self, data): - if hasattr(base64, 'encodebytes'): - data = base64.encodebytes(data).decode('ascii') - else: - data = base64.encodestring(data).decode('ascii') - return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|') - - def represent_bool(self, data): - if data: - value = 'true' - else: - value = 'false' - return self.represent_scalar('tag:yaml.org,2002:bool', value) - - def represent_int(self, data): - return self.represent_scalar('tag:yaml.org,2002:int', str(data)) - - inf_value = 1e300 - while repr(inf_value) != repr(inf_value*inf_value): - inf_value *= inf_value - - def represent_float(self, data): - if data != data or (data == 0.0 and data == 1.0): - value = '.nan' - elif data == self.inf_value: - value = '.inf' - elif data == -self.inf_value: - value = '-.inf' - else: - value = repr(data).lower() - # Note that in some cases `repr(data)` represents a float number - # without the decimal parts. For instance: - # >>> repr(1e17) - # '1e17' - # Unfortunately, this is not a valid float representation according - # to the definition of the `!!float` tag. We fix this by adding - # '.0' before the 'e' symbol. - if '.' not in value and 'e' in value: - value = value.replace('e', '.0e', 1) - return self.represent_scalar('tag:yaml.org,2002:float', value) - - def represent_list(self, data): - #pairs = (len(data) > 0 and isinstance(data, list)) - #if pairs: - # for item in data: - # if not isinstance(item, tuple) or len(item) != 2: - # pairs = False - # break - #if not pairs: - return self.represent_sequence('tag:yaml.org,2002:seq', data) - #value = [] - #for item_key, item_value in data: - # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', - # [(item_key, item_value)])) - #return SequenceNode(u'tag:yaml.org,2002:pairs', value) - - def represent_dict(self, data): - return self.represent_mapping('tag:yaml.org,2002:map', data) - - def represent_set(self, data): - value = {} - for key in data: - value[key] = None - return self.represent_mapping('tag:yaml.org,2002:set', value) - - def represent_date(self, data): - value = data.isoformat() - return self.represent_scalar('tag:yaml.org,2002:timestamp', value) - - def represent_datetime(self, data): - value = data.isoformat(' ') - return self.represent_scalar('tag:yaml.org,2002:timestamp', value) - - def represent_yaml_object(self, tag, data, cls, flow_style=None): - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__.copy() - return self.represent_mapping(tag, state, flow_style=flow_style) - - def represent_undefined(self, data): - raise RepresenterError("cannot represent an object", data) - -SafeRepresenter.add_representer(type(None), - SafeRepresenter.represent_none) - -SafeRepresenter.add_representer(str, - SafeRepresenter.represent_str) - -SafeRepresenter.add_representer(bytes, - SafeRepresenter.represent_binary) - -SafeRepresenter.add_representer(bool, - SafeRepresenter.represent_bool) - -SafeRepresenter.add_representer(int, - SafeRepresenter.represent_int) - -SafeRepresenter.add_representer(float, - SafeRepresenter.represent_float) - -SafeRepresenter.add_representer(list, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(tuple, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(dict, - SafeRepresenter.represent_dict) - -SafeRepresenter.add_representer(set, - SafeRepresenter.represent_set) - -SafeRepresenter.add_representer(datetime.date, - SafeRepresenter.represent_date) - -SafeRepresenter.add_representer(datetime.datetime, - SafeRepresenter.represent_datetime) - -SafeRepresenter.add_representer(None, - SafeRepresenter.represent_undefined) - -class Representer(SafeRepresenter): - - def represent_complex(self, data): - if data.imag == 0.0: - data = '%r' % data.real - elif data.real == 0.0: - data = '%rj' % data.imag - elif data.imag > 0: - data = '%r+%rj' % (data.real, data.imag) - else: - data = '%r%rj' % (data.real, data.imag) - return self.represent_scalar('tag:yaml.org,2002:python/complex', data) - - def represent_tuple(self, data): - return self.represent_sequence('tag:yaml.org,2002:python/tuple', data) - - def represent_name(self, data): - name = '%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '') - - def represent_module(self, data): - return self.represent_scalar( - 'tag:yaml.org,2002:python/module:'+data.__name__, '') - - def represent_object(self, data): - # We use __reduce__ API to save the data. data.__reduce__ returns - # a tuple of length 2-5: - # (function, args, state, listitems, dictitems) - - # For reconstructing, we calls function(*args), then set its state, - # listitems, and dictitems if they are not None. - - # A special case is when function.__name__ == '__newobj__'. In this - # case we create the object with args[0].__new__(*args). - - # Another special case is when __reduce__ returns a string - we don't - # support it. - - # We produce a !!python/object, !!python/object/new or - # !!python/object/apply node. - - cls = type(data) - if cls in copyreg.dispatch_table: - reduce = copyreg.dispatch_table[cls](data) - elif hasattr(data, '__reduce_ex__'): - reduce = data.__reduce_ex__(2) - elif hasattr(data, '__reduce__'): - reduce = data.__reduce__() - else: - raise RepresenterError("cannot represent an object", data) - reduce = (list(reduce)+[None]*5)[:5] - function, args, state, listitems, dictitems = reduce - args = list(args) - if state is None: - state = {} - if listitems is not None: - listitems = list(listitems) - if dictitems is not None: - dictitems = dict(dictitems) - if function.__name__ == '__newobj__': - function = args[0] - args = args[1:] - tag = 'tag:yaml.org,2002:python/object/new:' - newobj = True - else: - tag = 'tag:yaml.org,2002:python/object/apply:' - newobj = False - function_name = '%s.%s' % (function.__module__, function.__name__) - if not args and not listitems and not dictitems \ - and isinstance(state, dict) and newobj: - return self.represent_mapping( - 'tag:yaml.org,2002:python/object:'+function_name, state) - if not listitems and not dictitems \ - and isinstance(state, dict) and not state: - return self.represent_sequence(tag+function_name, args) - value = {} - if args: - value['args'] = args - if state or not isinstance(state, dict): - value['state'] = state - if listitems: - value['listitems'] = listitems - if dictitems: - value['dictitems'] = dictitems - return self.represent_mapping(tag+function_name, value) - - def represent_ordered_dict(self, data): - # Provide uniform representation across different Python versions. - data_type = type(data) - tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \ - % (data_type.__module__, data_type.__name__) - items = [[key, value] for key, value in data.items()] - return self.represent_sequence(tag, [items]) - -Representer.add_representer(complex, - Representer.represent_complex) - -Representer.add_representer(tuple, - Representer.represent_tuple) - -Representer.add_representer(type, - Representer.represent_name) - -Representer.add_representer(collections.OrderedDict, - Representer.represent_ordered_dict) - -Representer.add_representer(types.FunctionType, - Representer.represent_name) - -Representer.add_representer(types.BuiltinFunctionType, - Representer.represent_name) - -Representer.add_representer(types.ModuleType, - Representer.represent_module) - -Representer.add_multi_representer(object, - Representer.represent_object) - diff --git a/libs/yaml/resolver.py b/libs/yaml/resolver.py deleted file mode 100644 index 02b82e73e..000000000 --- a/libs/yaml/resolver.py +++ /dev/null @@ -1,227 +0,0 @@ - -__all__ = ['BaseResolver', 'Resolver'] - -from .error import * -from .nodes import * - -import re - -class ResolverError(YAMLError): - pass - -class BaseResolver: - - DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' - - yaml_implicit_resolvers = {} - yaml_path_resolvers = {} - - def __init__(self): - self.resolver_exact_paths = [] - self.resolver_prefix_paths = [] - - @classmethod - def add_implicit_resolver(cls, tag, regexp, first): - if not 'yaml_implicit_resolvers' in cls.__dict__: - implicit_resolvers = {} - for key in cls.yaml_implicit_resolvers: - implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:] - cls.yaml_implicit_resolvers = implicit_resolvers - if first is None: - first = [None] - for ch in first: - cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) - - @classmethod - def add_path_resolver(cls, tag, path, kind=None): - # Note: `add_path_resolver` is experimental. The API could be changed. - # `new_path` is a pattern that is matched against the path from the - # root to the node that is being considered. `node_path` elements are - # tuples `(node_check, index_check)`. `node_check` is a node class: - # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` - # matches any kind of a node. `index_check` could be `None`, a boolean - # value, a string value, or a number. `None` and `False` match against - # any _value_ of sequence and mapping nodes. `True` matches against - # any _key_ of a mapping node. A string `index_check` matches against - # a mapping value that corresponds to a scalar key which content is - # equal to the `index_check` value. An integer `index_check` matches - # against a sequence value with the index equal to `index_check`. - if not 'yaml_path_resolvers' in cls.__dict__: - cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() - new_path = [] - for element in path: - if isinstance(element, (list, tuple)): - if len(element) == 2: - node_check, index_check = element - elif len(element) == 1: - node_check = element[0] - index_check = True - else: - raise ResolverError("Invalid path element: %s" % element) - else: - node_check = None - index_check = element - if node_check is str: - node_check = ScalarNode - elif node_check is list: - node_check = SequenceNode - elif node_check is dict: - node_check = MappingNode - elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, str) \ - and node_check is not None: - raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (str, int)) \ - and index_check is not None: - raise ResolverError("Invalid index checker: %s" % index_check) - new_path.append((node_check, index_check)) - if kind is str: - kind = ScalarNode - elif kind is list: - kind = SequenceNode - elif kind is dict: - kind = MappingNode - elif kind not in [ScalarNode, SequenceNode, MappingNode] \ - and kind is not None: - raise ResolverError("Invalid node kind: %s" % kind) - cls.yaml_path_resolvers[tuple(new_path), kind] = tag - - def descend_resolver(self, current_node, current_index): - if not self.yaml_path_resolvers: - return - exact_paths = {} - prefix_paths = [] - if current_node: - depth = len(self.resolver_prefix_paths) - for path, kind in self.resolver_prefix_paths[-1]: - if self.check_resolver_prefix(depth, path, kind, - current_node, current_index): - if len(path) > depth: - prefix_paths.append((path, kind)) - else: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - for path, kind in self.yaml_path_resolvers: - if not path: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - prefix_paths.append((path, kind)) - self.resolver_exact_paths.append(exact_paths) - self.resolver_prefix_paths.append(prefix_paths) - - def ascend_resolver(self): - if not self.yaml_path_resolvers: - return - self.resolver_exact_paths.pop() - self.resolver_prefix_paths.pop() - - def check_resolver_prefix(self, depth, path, kind, - current_node, current_index): - node_check, index_check = path[depth-1] - if isinstance(node_check, str): - if current_node.tag != node_check: - return - elif node_check is not None: - if not isinstance(current_node, node_check): - return - if index_check is True and current_index is not None: - return - if (index_check is False or index_check is None) \ - and current_index is None: - return - if isinstance(index_check, str): - if not (isinstance(current_index, ScalarNode) - and index_check == current_index.value): - return - elif isinstance(index_check, int) and not isinstance(index_check, bool): - if index_check != current_index: - return - return True - - def resolve(self, kind, value, implicit): - if kind is ScalarNode and implicit[0]: - if value == '': - resolvers = self.yaml_implicit_resolvers.get('', []) - else: - resolvers = self.yaml_implicit_resolvers.get(value[0], []) - resolvers += self.yaml_implicit_resolvers.get(None, []) - for tag, regexp in resolvers: - if regexp.match(value): - return tag - implicit = implicit[1] - if self.yaml_path_resolvers: - exact_paths = self.resolver_exact_paths[-1] - if kind in exact_paths: - return exact_paths[kind] - if None in exact_paths: - return exact_paths[None] - if kind is ScalarNode: - return self.DEFAULT_SCALAR_TAG - elif kind is SequenceNode: - return self.DEFAULT_SEQUENCE_TAG - elif kind is MappingNode: - return self.DEFAULT_MAPPING_TAG - -class Resolver(BaseResolver): - pass - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:bool', - re.compile(r'''^(?:yes|Yes|YES|no|No|NO - |true|True|TRUE|false|False|FALSE - |on|On|ON|off|Off|OFF)$''', re.X), - list('yYnNtTfFoO')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:float', - re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? - |\.[0-9_]+(?:[eE][-+][0-9]+)? - |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* - |[-+]?\.(?:inf|Inf|INF) - |\.(?:nan|NaN|NAN))$''', re.X), - list('-+0123456789.')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:int', - re.compile(r'''^(?:[-+]?0b[0-1_]+ - |[-+]?0[0-7_]+ - |[-+]?(?:0|[1-9][0-9_]*) - |[-+]?0x[0-9a-fA-F_]+ - |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list('-+0123456789')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:merge', - re.compile(r'^(?:<<)$'), - ['<']) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:null', - re.compile(r'''^(?: ~ - |null|Null|NULL - | )$''', re.X), - ['~', 'n', 'N', '']) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:timestamp', - re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] - |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? - (?:[Tt]|[ \t]+)[0-9][0-9]? - :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? - (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list('0123456789')) - -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:value', - re.compile(r'^(?:=)$'), - ['=']) - -# The following resolver is only for documentation purposes. It cannot work -# because plain scalars cannot start with '!', '&', or '*'. -Resolver.add_implicit_resolver( - 'tag:yaml.org,2002:yaml', - re.compile(r'^(?:!|&|\*)$'), - list('!&*')) - diff --git a/libs/yaml/scanner.py b/libs/yaml/scanner.py deleted file mode 100644 index 775dbcc6d..000000000 --- a/libs/yaml/scanner.py +++ /dev/null @@ -1,1435 +0,0 @@ - -# Scanner produces tokens of the following types: -# STREAM-START -# STREAM-END -# DIRECTIVE(name, value) -# DOCUMENT-START -# DOCUMENT-END -# BLOCK-SEQUENCE-START -# BLOCK-MAPPING-START -# BLOCK-END -# FLOW-SEQUENCE-START -# FLOW-MAPPING-START -# FLOW-SEQUENCE-END -# FLOW-MAPPING-END -# BLOCK-ENTRY -# FLOW-ENTRY -# KEY -# VALUE -# ALIAS(value) -# ANCHOR(value) -# TAG(value) -# SCALAR(value, plain, style) -# -# Read comments in the Scanner code for more details. -# - -__all__ = ['Scanner', 'ScannerError'] - -from .error import MarkedYAMLError -from .tokens import * - -class ScannerError(MarkedYAMLError): - pass - -class SimpleKey: - # See below simple keys treatment. - - def __init__(self, token_number, required, index, line, column, mark): - self.token_number = token_number - self.required = required - self.index = index - self.line = line - self.column = column - self.mark = mark - -class Scanner: - - def __init__(self): - """Initialize the scanner.""" - # It is assumed that Scanner and Reader will have a common descendant. - # Reader do the dirty work of checking for BOM and converting the - # input data to Unicode. It also adds NUL to the end. - # - # Reader supports the following methods - # self.peek(i=0) # peek the next i-th character - # self.prefix(l=1) # peek the next l characters - # self.forward(l=1) # read the next l characters and move the pointer. - - # Had we reached the end of the stream? - self.done = False - - # The number of unclosed '{' and '['. `flow_level == 0` means block - # context. - self.flow_level = 0 - - # List of processed tokens that are not yet emitted. - self.tokens = [] - - # Add the STREAM-START token. - self.fetch_stream_start() - - # Number of tokens that were emitted through the `get_token` method. - self.tokens_taken = 0 - - # The current indentation level. - self.indent = -1 - - # Past indentation levels. - self.indents = [] - - # Variables related to simple keys treatment. - - # A simple key is a key that is not denoted by the '?' indicator. - # Example of simple keys: - # --- - # block simple key: value - # ? not a simple key: - # : { flow simple key: value } - # We emit the KEY token before all keys, so when we find a potential - # simple key, we try to locate the corresponding ':' indicator. - # Simple keys should be limited to a single line and 1024 characters. - - # Can a simple key start at the current position? A simple key may - # start: - # - at the beginning of the line, not counting indentation spaces - # (in block context), - # - after '{', '[', ',' (in the flow context), - # - after '?', ':', '-' (in the block context). - # In the block context, this flag also signifies if a block collection - # may start at the current position. - self.allow_simple_key = True - - # Keep track of possible simple keys. This is a dictionary. The key - # is `flow_level`; there can be no more that one possible simple key - # for each level. The value is a SimpleKey record: - # (token_number, required, index, line, column, mark) - # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), - # '[', or '{' tokens. - self.possible_simple_keys = {} - - # Public methods. - - def check_token(self, *choices): - # Check if the next token is one of the given types. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - # Return the next token, but do not delete if from the queue. - # Return None if no more tokens. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - return self.tokens[0] - else: - return None - - def get_token(self): - # Return the next token. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - self.tokens_taken += 1 - return self.tokens.pop(0) - - # Private methods. - - def need_more_tokens(self): - if self.done: - return False - if not self.tokens: - return True - # The current token may be a potential simple key, so we - # need to look further. - self.stale_possible_simple_keys() - if self.next_possible_simple_key() == self.tokens_taken: - return True - - def fetch_more_tokens(self): - - # Eat whitespaces and comments until we reach the next token. - self.scan_to_next_token() - - # Remove obsolete possible simple keys. - self.stale_possible_simple_keys() - - # Compare the current indentation and column. It may add some tokens - # and decrease the current indentation level. - self.unwind_indent(self.column) - - # Peek the next character. - ch = self.peek() - - # Is it the end of stream? - if ch == '\0': - return self.fetch_stream_end() - - # Is it a directive? - if ch == '%' and self.check_directive(): - return self.fetch_directive() - - # Is it the document start? - if ch == '-' and self.check_document_start(): - return self.fetch_document_start() - - # Is it the document end? - if ch == '.' and self.check_document_end(): - return self.fetch_document_end() - - # TODO: support for BOM within a stream. - #if ch == '\uFEFF': - # return self.fetch_bom() <-- issue BOMToken - - # Note: the order of the following checks is NOT significant. - - # Is it the flow sequence start indicator? - if ch == '[': - return self.fetch_flow_sequence_start() - - # Is it the flow mapping start indicator? - if ch == '{': - return self.fetch_flow_mapping_start() - - # Is it the flow sequence end indicator? - if ch == ']': - return self.fetch_flow_sequence_end() - - # Is it the flow mapping end indicator? - if ch == '}': - return self.fetch_flow_mapping_end() - - # Is it the flow entry indicator? - if ch == ',': - return self.fetch_flow_entry() - - # Is it the block entry indicator? - if ch == '-' and self.check_block_entry(): - return self.fetch_block_entry() - - # Is it the key indicator? - if ch == '?' and self.check_key(): - return self.fetch_key() - - # Is it the value indicator? - if ch == ':' and self.check_value(): - return self.fetch_value() - - # Is it an alias? - if ch == '*': - return self.fetch_alias() - - # Is it an anchor? - if ch == '&': - return self.fetch_anchor() - - # Is it a tag? - if ch == '!': - return self.fetch_tag() - - # Is it a literal scalar? - if ch == '|' and not self.flow_level: - return self.fetch_literal() - - # Is it a folded scalar? - if ch == '>' and not self.flow_level: - return self.fetch_folded() - - # Is it a single quoted scalar? - if ch == '\'': - return self.fetch_single() - - # Is it a double quoted scalar? - if ch == '\"': - return self.fetch_double() - - # It must be a plain scalar then. - if self.check_plain(): - return self.fetch_plain() - - # No? It's an error. Let's produce a nice error message. - raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" % ch, - self.get_mark()) - - # Simple keys treatment. - - def next_possible_simple_key(self): - # Return the number of the nearest possible simple key. Actually we - # don't need to loop through the whole dictionary. We may replace it - # with the following code: - # if not self.possible_simple_keys: - # return None - # return self.possible_simple_keys[ - # min(self.possible_simple_keys.keys())].token_number - min_token_number = None - for level in self.possible_simple_keys: - key = self.possible_simple_keys[level] - if min_token_number is None or key.token_number < min_token_number: - min_token_number = key.token_number - return min_token_number - - def stale_possible_simple_keys(self): - # Remove entries that are no longer possible simple keys. According to - # the YAML specification, simple keys - # - should be limited to a single line, - # - should be no longer than 1024 characters. - # Disabling this procedure will allow simple keys of any length and - # height (may cause problems if indentation is broken though). - for level in list(self.possible_simple_keys): - key = self.possible_simple_keys[level] - if key.line != self.line \ - or self.index-key.index > 1024: - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - del self.possible_simple_keys[level] - - def save_possible_simple_key(self): - # The next token may start a simple key. We check if it's possible - # and save its position. This function is called for - # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. - - # Check if a simple key is required at the current position. - required = not self.flow_level and self.indent == self.column - - # The next token might be a simple key. Let's save it's number and - # position. - if self.allow_simple_key: - self.remove_possible_simple_key() - token_number = self.tokens_taken+len(self.tokens) - key = SimpleKey(token_number, required, - self.index, self.line, self.column, self.get_mark()) - self.possible_simple_keys[self.flow_level] = key - - def remove_possible_simple_key(self): - # Remove the saved possible key position at the current flow level. - if self.flow_level in self.possible_simple_keys: - key = self.possible_simple_keys[self.flow_level] - - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - - del self.possible_simple_keys[self.flow_level] - - # Indentation functions. - - def unwind_indent(self, column): - - ## In flow context, tokens should respect indentation. - ## Actually the condition should be `self.indent >= column` according to - ## the spec. But this condition will prohibit intuitively correct - ## constructions such as - ## key : { - ## } - #if self.flow_level and self.indent > column: - # raise ScannerError(None, None, - # "invalid intendation or unclosed '[' or '{'", - # self.get_mark()) - - # In the flow context, indentation is ignored. We make the scanner less - # restrictive then specification requires. - if self.flow_level: - return - - # In block context, we may need to issue the BLOCK-END tokens. - while self.indent > column: - mark = self.get_mark() - self.indent = self.indents.pop() - self.tokens.append(BlockEndToken(mark, mark)) - - def add_indent(self, column): - # Check if we need to increase indentation. - if self.indent < column: - self.indents.append(self.indent) - self.indent = column - return True - return False - - # Fetchers. - - def fetch_stream_start(self): - # We always add STREAM-START as the first token and STREAM-END as the - # last token. - - # Read the token. - mark = self.get_mark() - - # Add STREAM-START. - self.tokens.append(StreamStartToken(mark, mark, - encoding=self.encoding)) - - - def fetch_stream_end(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - self.possible_simple_keys = {} - - # Read the token. - mark = self.get_mark() - - # Add STREAM-END. - self.tokens.append(StreamEndToken(mark, mark)) - - # The steam is finished. - self.done = True - - def fetch_directive(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Scan and add DIRECTIVE. - self.tokens.append(self.scan_directive()) - - def fetch_document_start(self): - self.fetch_document_indicator(DocumentStartToken) - - def fetch_document_end(self): - self.fetch_document_indicator(DocumentEndToken) - - def fetch_document_indicator(self, TokenClass): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. Note that there could not be a block collection - # after '---'. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Add DOCUMENT-START or DOCUMENT-END. - start_mark = self.get_mark() - self.forward(3) - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_start(self): - self.fetch_flow_collection_start(FlowSequenceStartToken) - - def fetch_flow_mapping_start(self): - self.fetch_flow_collection_start(FlowMappingStartToken) - - def fetch_flow_collection_start(self, TokenClass): - - # '[' and '{' may start a simple key. - self.save_possible_simple_key() - - # Increase the flow level. - self.flow_level += 1 - - # Simple keys are allowed after '[' and '{'. - self.allow_simple_key = True - - # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_end(self): - self.fetch_flow_collection_end(FlowSequenceEndToken) - - def fetch_flow_mapping_end(self): - self.fetch_flow_collection_end(FlowMappingEndToken) - - def fetch_flow_collection_end(self, TokenClass): - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Decrease the flow level. - self.flow_level -= 1 - - # No simple keys after ']' or '}'. - self.allow_simple_key = False - - # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_entry(self): - - # Simple keys are allowed after ','. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add FLOW-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(FlowEntryToken(start_mark, end_mark)) - - def fetch_block_entry(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a new entry? - if not self.allow_simple_key: - raise ScannerError(None, None, - "sequence entries are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-SEQUENCE-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockSequenceStartToken(mark, mark)) - - # It's an error for the block entry to occur in the flow context, - # but we let the parser detect this. - else: - pass - - # Simple keys are allowed after '-'. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add BLOCK-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(BlockEntryToken(start_mark, end_mark)) - - def fetch_key(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a key (not necessary a simple)? - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping keys are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-MAPPING-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after '?' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add KEY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(KeyToken(start_mark, end_mark)) - - def fetch_value(self): - - # Do we determine a simple key? - if self.flow_level in self.possible_simple_keys: - - # Add KEY. - key = self.possible_simple_keys[self.flow_level] - del self.possible_simple_keys[self.flow_level] - self.tokens.insert(key.token_number-self.tokens_taken, - KeyToken(key.mark, key.mark)) - - # If this key starts a new block mapping, we need to add - # BLOCK-MAPPING-START. - if not self.flow_level: - if self.add_indent(key.column): - self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) - - # There cannot be two simple keys one after another. - self.allow_simple_key = False - - # It must be a part of a complex key. - else: - - # Block context needs additional checks. - # (Do we really need them? They will be caught by the parser - # anyway.) - if not self.flow_level: - - # We are allowed to start a complex value if and only if - # we can start a simple key. - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping values are not allowed here", - self.get_mark()) - - # If this value starts a new block mapping, we need to add - # BLOCK-MAPPING-START. It will be detected as an error later by - # the parser. - if not self.flow_level: - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after ':' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add VALUE. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(ValueToken(start_mark, end_mark)) - - def fetch_alias(self): - - # ALIAS could be a simple key. - self.save_possible_simple_key() - - # No simple keys after ALIAS. - self.allow_simple_key = False - - # Scan and add ALIAS. - self.tokens.append(self.scan_anchor(AliasToken)) - - def fetch_anchor(self): - - # ANCHOR could start a simple key. - self.save_possible_simple_key() - - # No simple keys after ANCHOR. - self.allow_simple_key = False - - # Scan and add ANCHOR. - self.tokens.append(self.scan_anchor(AnchorToken)) - - def fetch_tag(self): - - # TAG could start a simple key. - self.save_possible_simple_key() - - # No simple keys after TAG. - self.allow_simple_key = False - - # Scan and add TAG. - self.tokens.append(self.scan_tag()) - - def fetch_literal(self): - self.fetch_block_scalar(style='|') - - def fetch_folded(self): - self.fetch_block_scalar(style='>') - - def fetch_block_scalar(self, style): - - # A simple key may follow a block scalar. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Scan and add SCALAR. - self.tokens.append(self.scan_block_scalar(style)) - - def fetch_single(self): - self.fetch_flow_scalar(style='\'') - - def fetch_double(self): - self.fetch_flow_scalar(style='"') - - def fetch_flow_scalar(self, style): - - # A flow scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after flow scalars. - self.allow_simple_key = False - - # Scan and add SCALAR. - self.tokens.append(self.scan_flow_scalar(style)) - - def fetch_plain(self): - - # A plain scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after plain scalars. But note that `scan_plain` will - # change this flag if the scan is finished at the beginning of the - # line. - self.allow_simple_key = False - - # Scan and add SCALAR. May change `allow_simple_key`. - self.tokens.append(self.scan_plain()) - - # Checkers. - - def check_directive(self): - - # DIRECTIVE: ^ '%' ... - # The '%' indicator is already checked. - if self.column == 0: - return True - - def check_document_start(self): - - # DOCUMENT-START: ^ '---' (' '|'\n') - if self.column == 0: - if self.prefix(3) == '---' \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return True - - def check_document_end(self): - - # DOCUMENT-END: ^ '...' (' '|'\n') - if self.column == 0: - if self.prefix(3) == '...' \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return True - - def check_block_entry(self): - - # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' - - def check_key(self): - - # KEY(flow context): '?' - if self.flow_level: - return True - - # KEY(block context): '?' (' '|'\n') - else: - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' - - def check_value(self): - - # VALUE(flow context): ':' - if self.flow_level: - return True - - # VALUE(block context): ':' (' '|'\n') - else: - return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029' - - def check_plain(self): - - # A plain scalar may start with any non-space character except: - # '-', '?', ':', ',', '[', ']', '{', '}', - # '#', '&', '*', '!', '|', '>', '\'', '\"', - # '%', '@', '`'. - # - # It may also start with - # '-', '?', ':' - # if it is followed by a non-space character. - # - # Note that we limit the last rule to the block context (except the - # '-' character) because we want the flow context to be space - # independent. - ch = self.peek() - return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029' - and (ch == '-' or (not self.flow_level and ch in '?:'))) - - # Scanners. - - def scan_to_next_token(self): - # We ignore spaces, line breaks and comments. - # If we find a line break in the block context, we set the flag - # `allow_simple_key` on. - # The byte order mark is stripped if it's the first character in the - # stream. We do not yet support BOM inside the stream as the - # specification requires. Any such mark will be considered as a part - # of the document. - # - # TODO: We need to make tab handling rules more sane. A good rule is - # Tabs cannot precede tokens - # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - # KEY(block), VALUE(block), BLOCK-ENTRY - # So the checking code is - # if <TAB>: - # self.allow_simple_keys = False - # We also need to add the check for `allow_simple_keys == True` to - # `unwind_indent` before issuing BLOCK-END. - # Scanners for block, flow, and plain scalars need to be modified. - - if self.index == 0 and self.peek() == '\uFEFF': - self.forward() - found = False - while not found: - while self.peek() == ' ': - self.forward() - if self.peek() == '#': - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - if self.scan_line_break(): - if not self.flow_level: - self.allow_simple_key = True - else: - found = True - - def scan_directive(self): - # See the specification for details. - start_mark = self.get_mark() - self.forward() - name = self.scan_directive_name(start_mark) - value = None - if name == 'YAML': - value = self.scan_yaml_directive_value(start_mark) - end_mark = self.get_mark() - elif name == 'TAG': - value = self.scan_tag_directive_value(start_mark) - end_mark = self.get_mark() - else: - end_mark = self.get_mark() - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - self.scan_directive_ignored_line(start_mark) - return DirectiveToken(name, value, start_mark, end_mark) - - def scan_directive_name(self, start_mark): - # See the specification for details. - length = 0 - ch = self.peek(length) - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - return value - - def scan_yaml_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - major = self.scan_yaml_directive_number(start_mark) - if self.peek() != '.': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or '.', but found %r" % self.peek(), - self.get_mark()) - self.forward() - minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" % self.peek(), - self.get_mark()) - return (major, minor) - - def scan_yaml_directive_number(self, start_mark): - # See the specification for details. - ch = self.peek() - if not ('0' <= ch <= '9'): - raise ScannerError("while scanning a directive", start_mark, - "expected a digit, but found %r" % ch, self.get_mark()) - length = 0 - while '0' <= self.peek(length) <= '9': - length += 1 - value = int(self.prefix(length)) - self.forward(length) - return value - - def scan_tag_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == ' ': - self.forward() - prefix = self.scan_tag_directive_prefix(start_mark) - return (handle, prefix) - - def scan_tag_directive_handle(self, start_mark): - # See the specification for details. - value = self.scan_tag_handle('directive', start_mark) - ch = self.peek() - if ch != ' ': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch, self.get_mark()) - return value - - def scan_tag_directive_prefix(self, start_mark): - # See the specification for details. - value = self.scan_tag_uri('directive', start_mark) - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch, self.get_mark()) - return value - - def scan_directive_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - if self.peek() == '#': - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in '\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a comment or a line break, but found %r" - % ch, self.get_mark()) - self.scan_line_break() - - def scan_anchor(self, TokenClass): - # The specification does not restrict characters for anchors and - # aliases. This may lead to problems, for instance, the document: - # [ *alias, value ] - # can be interpreted in two ways, as - # [ "value" ] - # and - # [ *alias , "value" ] - # Therefore we restrict aliases to numbers and ASCII letters. - start_mark = self.get_mark() - indicator = self.peek() - if indicator == '*': - name = 'alias' - else: - name = 'anchor' - self.forward() - length = 0 - ch = self.peek(length) - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`': - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch, self.get_mark()) - end_mark = self.get_mark() - return TokenClass(value, start_mark, end_mark) - - def scan_tag(self): - # See the specification for details. - start_mark = self.get_mark() - ch = self.peek(1) - if ch == '<': - handle = None - self.forward(2) - suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != '>': - raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.peek(), - self.get_mark()) - self.forward() - elif ch in '\0 \t\r\n\x85\u2028\u2029': - handle = None - suffix = '!' - self.forward() - else: - length = 1 - use_handle = False - while ch not in '\0 \r\n\x85\u2028\u2029': - if ch == '!': - use_handle = True - break - length += 1 - ch = self.peek(length) - handle = '!' - if use_handle: - handle = self.scan_tag_handle('tag', start_mark) - else: - handle = '!' - self.forward() - suffix = self.scan_tag_uri('tag', start_mark) - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % ch, self.get_mark()) - value = (handle, suffix) - end_mark = self.get_mark() - return TagToken(value, start_mark, end_mark) - - def scan_block_scalar(self, style): - # See the specification for details. - - if style == '>': - folded = True - else: - folded = False - - chunks = [] - start_mark = self.get_mark() - - # Scan the header. - self.forward() - chomping, increment = self.scan_block_scalar_indicators(start_mark) - self.scan_block_scalar_ignored_line(start_mark) - - # Determine the indentation level and go to the first non-empty line. - min_indent = self.indent+1 - if min_indent < 1: - min_indent = 1 - if increment is None: - breaks, max_indent, end_mark = self.scan_block_scalar_indentation() - indent = max(min_indent, max_indent) - else: - indent = min_indent+increment-1 - breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = '' - - # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != '\0': - chunks.extend(breaks) - leading_non_space = self.peek() not in ' \t' - length = 0 - while self.peek(length) not in '\0\r\n\x85\u2028\u2029': - length += 1 - chunks.append(self.prefix(length)) - self.forward(length) - line_break = self.scan_line_break() - breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != '\0': - - # Unfortunately, folding rules are ambiguous. - # - # This is the folding according to the specification: - - if folded and line_break == '\n' \ - and leading_non_space and self.peek() not in ' \t': - if not breaks: - chunks.append(' ') - else: - chunks.append(line_break) - - # This is Clark Evans's interpretation (also in the spec - # examples): - # - #if folded and line_break == '\n': - # if not breaks: - # if self.peek() not in ' \t': - # chunks.append(' ') - # else: - # chunks.append(line_break) - #else: - # chunks.append(line_break) - else: - break - - # Chomp the tail. - if chomping is not False: - chunks.append(line_break) - if chomping is True: - chunks.extend(breaks) - - # We are done. - return ScalarToken(''.join(chunks), False, start_mark, end_mark, - style) - - def scan_block_scalar_indicators(self, start_mark): - # See the specification for details. - chomping = None - increment = None - ch = self.peek() - if ch in '+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch in '0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - elif ch in '0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - ch = self.peek() - if ch in '+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch not in '\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found %r" - % ch, self.get_mark()) - return chomping, increment - - def scan_block_scalar_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == ' ': - self.forward() - if self.peek() == '#': - while self.peek() not in '\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in '\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" % ch, - self.get_mark()) - self.scan_line_break() - - def scan_block_scalar_indentation(self): - # See the specification for details. - chunks = [] - max_indent = 0 - end_mark = self.get_mark() - while self.peek() in ' \r\n\x85\u2028\u2029': - if self.peek() != ' ': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - else: - self.forward() - if self.column > max_indent: - max_indent = self.column - return chunks, max_indent, end_mark - - def scan_block_scalar_breaks(self, indent): - # See the specification for details. - chunks = [] - end_mark = self.get_mark() - while self.column < indent and self.peek() == ' ': - self.forward() - while self.peek() in '\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - while self.column < indent and self.peek() == ' ': - self.forward() - return chunks, end_mark - - def scan_flow_scalar(self, style): - # See the specification for details. - # Note that we loose indentation rules for quoted scalars. Quoted - # scalars don't need to adhere indentation because " and ' clearly - # mark the beginning and the end of them. Therefore we are less - # restrictive then the specification requires. We only need to check - # that document separators are not included in scalars. - if style == '"': - double = True - else: - double = False - chunks = [] - start_mark = self.get_mark() - quote = self.peek() - self.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - while self.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - self.forward() - end_mark = self.get_mark() - return ScalarToken(''.join(chunks), False, start_mark, end_mark, - style) - - ESCAPE_REPLACEMENTS = { - '0': '\0', - 'a': '\x07', - 'b': '\x08', - 't': '\x09', - '\t': '\x09', - 'n': '\x0A', - 'v': '\x0B', - 'f': '\x0C', - 'r': '\x0D', - 'e': '\x1B', - ' ': '\x20', - '\"': '\"', - '\\': '\\', - '/': '/', - 'N': '\x85', - '_': '\xA0', - 'L': '\u2028', - 'P': '\u2029', - } - - ESCAPE_CODES = { - 'x': 2, - 'u': 4, - 'U': 8, - } - - def scan_flow_scalar_non_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - length = 0 - while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029': - length += 1 - if length: - chunks.append(self.prefix(length)) - self.forward(length) - ch = self.peek() - if not double and ch == '\'' and self.peek(1) == '\'': - chunks.append('\'') - self.forward(2) - elif (double and ch == '\'') or (not double and ch in '\"\\'): - chunks.append(ch) - self.forward() - elif double and ch == '\\': - self.forward() - ch = self.peek() - if ch in self.ESCAPE_REPLACEMENTS: - chunks.append(self.ESCAPE_REPLACEMENTS[ch]) - self.forward() - elif ch in self.ESCAPE_CODES: - length = self.ESCAPE_CODES[ch] - self.forward() - for k in range(length): - if self.peek(k) not in '0123456789ABCDEFabcdef': - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.peek(k)), self.get_mark()) - code = int(self.prefix(length), 16) - chunks.append(chr(code)) - self.forward(length) - elif ch in '\r\n\x85\u2028\u2029': - self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) - else: - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch, self.get_mark()) - else: - return chunks - - def scan_flow_scalar_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - length = 0 - while self.peek(length) in ' \t': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch == '\0': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.get_mark()) - elif ch in '\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != '\n': - chunks.append(line_break) - elif not breaks: - chunks.append(' ') - chunks.extend(breaks) - else: - chunks.append(whitespaces) - return chunks - - def scan_flow_scalar_breaks(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - # Instead of checking indentation, we check for document - # separators. - prefix = self.prefix(3) - if (prefix == '---' or prefix == '...') \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document separator", self.get_mark()) - while self.peek() in ' \t': - self.forward() - if self.peek() in '\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - else: - return chunks - - def scan_plain(self): - # See the specification for details. - # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ',' or '?'. - # We also keep track of the `allow_simple_key` flag here. - # Indentation rules are loosed for the flow context. - chunks = [] - start_mark = self.get_mark() - end_mark = start_mark - indent = self.indent+1 - # We allow zero indentation for scalars, but then we need to check for - # document separators at the beginning of the line. - #if indent == 0: - # indent = 1 - spaces = [] - while True: - length = 0 - if self.peek() == '#': - break - while True: - ch = self.peek(length) - if ch in '\0 \t\r\n\x85\u2028\u2029' \ - or (ch == ':' and - self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029' - + (u',[]{}' if self.flow_level else u''))\ - or (self.flow_level and ch in ',?[]{}'): - break - length += 1 - if length == 0: - break - self.allow_simple_key = False - chunks.extend(spaces) - chunks.append(self.prefix(length)) - self.forward(length) - end_mark = self.get_mark() - spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == '#' \ - or (not self.flow_level and self.column < indent): - break - return ScalarToken(''.join(chunks), True, start_mark, end_mark) - - def scan_plain_spaces(self, indent, start_mark): - # See the specification for details. - # The specification is really confusing about tabs in plain scalars. - # We just forbid them completely. Do not use tabs in YAML! - chunks = [] - length = 0 - while self.peek(length) in ' ': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch in '\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - self.allow_simple_key = True - prefix = self.prefix(3) - if (prefix == '---' or prefix == '...') \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return - breaks = [] - while self.peek() in ' \r\n\x85\u2028\u2029': - if self.peek() == ' ': - self.forward() - else: - breaks.append(self.scan_line_break()) - prefix = self.prefix(3) - if (prefix == '---' or prefix == '...') \ - and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029': - return - if line_break != '\n': - chunks.append(line_break) - elif not breaks: - chunks.append(' ') - chunks.extend(breaks) - elif whitespaces: - chunks.append(whitespaces) - return chunks - - def scan_tag_handle(self, name, start_mark): - # See the specification for details. - # For some strange reasons, the specification does not allow '_' in - # tag handles. I have allowed it anyway. - ch = self.peek() - if ch != '!': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch, self.get_mark()) - length = 1 - ch = self.peek(length) - if ch != ' ': - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-_': - length += 1 - ch = self.peek(length) - if ch != '!': - self.forward(length) - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch, self.get_mark()) - length += 1 - value = self.prefix(length) - self.forward(length) - return value - - def scan_tag_uri(self, name, start_mark): - # See the specification for details. - # Note: we do not check if URI is well-formed. - chunks = [] - length = 0 - ch = self.peek(length) - while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \ - or ch in '-;/?:@&=+$,_.!~*\'()[]%': - if ch == '%': - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - chunks.append(self.scan_uri_escapes(name, start_mark)) - else: - length += 1 - ch = self.peek(length) - if length: - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - if not chunks: - raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % ch, self.get_mark()) - return ''.join(chunks) - - def scan_uri_escapes(self, name, start_mark): - # See the specification for details. - codes = [] - mark = self.get_mark() - while self.peek() == '%': - self.forward() - for k in range(2): - if self.peek(k) not in '0123456789ABCDEFabcdef': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers, but found %r" - % self.peek(k), self.get_mark()) - codes.append(int(self.prefix(2), 16)) - self.forward(2) - try: - value = bytes(codes).decode('utf-8') - except UnicodeDecodeError as exc: - raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) - return value - - def scan_line_break(self): - # Transforms: - # '\r\n' : '\n' - # '\r' : '\n' - # '\n' : '\n' - # '\x85' : '\n' - # '\u2028' : '\u2028' - # '\u2029 : '\u2029' - # default : '' - ch = self.peek() - if ch in '\r\n\x85': - if self.prefix(2) == '\r\n': - self.forward(2) - else: - self.forward() - return '\n' - elif ch in '\u2028\u2029': - self.forward() - return ch - return '' diff --git a/libs/yaml/serializer.py b/libs/yaml/serializer.py deleted file mode 100644 index fe911e67a..000000000 --- a/libs/yaml/serializer.py +++ /dev/null @@ -1,111 +0,0 @@ - -__all__ = ['Serializer', 'SerializerError'] - -from .error import YAMLError -from .events import * -from .nodes import * - -class SerializerError(YAMLError): - pass - -class Serializer: - - ANCHOR_TEMPLATE = 'id%03d' - - def __init__(self, encoding=None, - explicit_start=None, explicit_end=None, version=None, tags=None): - self.use_encoding = encoding - self.use_explicit_start = explicit_start - self.use_explicit_end = explicit_end - self.use_version = version - self.use_tags = tags - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - self.closed = None - - def open(self): - if self.closed is None: - self.emit(StreamStartEvent(encoding=self.use_encoding)) - self.closed = False - elif self.closed: - raise SerializerError("serializer is closed") - else: - raise SerializerError("serializer is already opened") - - def close(self): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif not self.closed: - self.emit(StreamEndEvent()) - self.closed = True - - #def __del__(self): - # self.close() - - def serialize(self, node): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif self.closed: - raise SerializerError("serializer is closed") - self.emit(DocumentStartEvent(explicit=self.use_explicit_start, - version=self.use_version, tags=self.use_tags)) - self.anchor_node(node) - self.serialize_node(node, None, None) - self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - - def anchor_node(self, node): - if node in self.anchors: - if self.anchors[node] is None: - self.anchors[node] = self.generate_anchor(node) - else: - self.anchors[node] = None - if isinstance(node, SequenceNode): - for item in node.value: - self.anchor_node(item) - elif isinstance(node, MappingNode): - for key, value in node.value: - self.anchor_node(key) - self.anchor_node(value) - - def generate_anchor(self, node): - self.last_anchor_id += 1 - return self.ANCHOR_TEMPLATE % self.last_anchor_id - - def serialize_node(self, node, parent, index): - alias = self.anchors[node] - if node in self.serialized_nodes: - self.emit(AliasEvent(alias)) - else: - self.serialized_nodes[node] = True - self.descend_resolver(parent, index) - if isinstance(node, ScalarNode): - detected_tag = self.resolve(ScalarNode, node.value, (True, False)) - default_tag = self.resolve(ScalarNode, node.value, (False, True)) - implicit = (node.tag == detected_tag), (node.tag == default_tag) - self.emit(ScalarEvent(alias, node.tag, implicit, node.value, - style=node.style)) - elif isinstance(node, SequenceNode): - implicit = (node.tag - == self.resolve(SequenceNode, node.value, True)) - self.emit(SequenceStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - index = 0 - for item in node.value: - self.serialize_node(item, node, index) - index += 1 - self.emit(SequenceEndEvent()) - elif isinstance(node, MappingNode): - implicit = (node.tag - == self.resolve(MappingNode, node.value, True)) - self.emit(MappingStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - for key, value in node.value: - self.serialize_node(key, node, None) - self.serialize_node(value, node, key) - self.emit(MappingEndEvent()) - self.ascend_resolver() - diff --git a/libs/yaml/tokens.py b/libs/yaml/tokens.py deleted file mode 100644 index 4d0b48a39..000000000 --- a/libs/yaml/tokens.py +++ /dev/null @@ -1,104 +0,0 @@ - -class Token(object): - def __init__(self, start_mark, end_mark): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in self.__dict__ - if not key.endswith('_mark')] - attributes.sort() - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -#class BOMToken(Token): -# id = '<byte order mark>' - -class DirectiveToken(Token): - id = '<directive>' - def __init__(self, name, value, start_mark, end_mark): - self.name = name - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class DocumentStartToken(Token): - id = '<document start>' - -class DocumentEndToken(Token): - id = '<document end>' - -class StreamStartToken(Token): - id = '<stream start>' - def __init__(self, start_mark=None, end_mark=None, - encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndToken(Token): - id = '<stream end>' - -class BlockSequenceStartToken(Token): - id = '<block sequence start>' - -class BlockMappingStartToken(Token): - id = '<block mapping start>' - -class BlockEndToken(Token): - id = '<block end>' - -class FlowSequenceStartToken(Token): - id = '[' - -class FlowMappingStartToken(Token): - id = '{' - -class FlowSequenceEndToken(Token): - id = ']' - -class FlowMappingEndToken(Token): - id = '}' - -class KeyToken(Token): - id = '?' - -class ValueToken(Token): - id = ':' - -class BlockEntryToken(Token): - id = '-' - -class FlowEntryToken(Token): - id = ',' - -class AliasToken(Token): - id = '<alias>' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class AnchorToken(Token): - id = '<anchor>' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class TagToken(Token): - id = '<tag>' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class ScalarToken(Token): - id = '<scalar>' - def __init__(self, value, plain, start_mark, end_mark, style=None): - self.value = value - self.plain = plain - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - diff --git a/libs/yaml2.7/__init__.py b/libs/yaml2.7/__init__.py deleted file mode 100644 index e7a419dd2..000000000 --- a/libs/yaml2.7/__init__.py +++ /dev/null @@ -1,406 +0,0 @@ - -from error import * - -from tokens import * -from events import * -from nodes import * - -from loader import * -from dumper import * - -__version__ = '5.1' - -try: - from cyaml import * - __with_libyaml__ = True -except ImportError: - __with_libyaml__ = False - - -#------------------------------------------------------------------------------ -# Warnings control -#------------------------------------------------------------------------------ - -# 'Global' warnings state: -_warnings_enabled = { - 'YAMLLoadWarning': True, -} - -# Get or set global warnings' state -def warnings(settings=None): - if settings is None: - return _warnings_enabled - - if type(settings) is dict: - for key in settings: - if key in _warnings_enabled: - _warnings_enabled[key] = settings[key] - -# Warn when load() is called without Loader=... -class YAMLLoadWarning(RuntimeWarning): - pass - -def load_warning(method): - if _warnings_enabled['YAMLLoadWarning'] is False: - return - - import warnings - - message = ( - "calling yaml.%s() without Loader=... is deprecated, as the " - "default Loader is unsafe. Please read " - "https://msg.pyyaml.org/load for full details." - ) % method - - warnings.warn(message, YAMLLoadWarning, stacklevel=3) - -#------------------------------------------------------------------------------ -def scan(stream, Loader=Loader): - """ - Scan a YAML stream and produce scanning tokens. - """ - loader = Loader(stream) - try: - while loader.check_token(): - yield loader.get_token() - finally: - loader.dispose() - -def parse(stream, Loader=Loader): - """ - Parse a YAML stream and produce parsing events. - """ - loader = Loader(stream) - try: - while loader.check_event(): - yield loader.get_event() - finally: - loader.dispose() - -def compose(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding representation tree. - """ - loader = Loader(stream) - try: - return loader.get_single_node() - finally: - loader.dispose() - -def compose_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding representation trees. - """ - loader = Loader(stream) - try: - while loader.check_node(): - yield loader.get_node() - finally: - loader.dispose() - -def load(stream, Loader=None): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - """ - if Loader is None: - load_warning('load') - Loader = FullLoader - - loader = Loader(stream) - try: - return loader.get_single_data() - finally: - loader.dispose() - -def load_all(stream, Loader=None): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - """ - if Loader is None: - load_warning('load_all') - Loader = FullLoader - - loader = Loader(stream) - try: - while loader.check_data(): - yield loader.get_data() - finally: - loader.dispose() - -def full_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve all tags except those known to be - unsafe on untrusted input. - """ - return load(stream, FullLoader) - -def full_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve all tags except those known to be - unsafe on untrusted input. - """ - return load_all(stream, FullLoader) - -def safe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve only basic YAML tags. This is known - to be safe for untrusted input. - """ - return load(stream, SafeLoader) - -def safe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve only basic YAML tags. This is known - to be safe for untrusted input. - """ - return load_all(stream, SafeLoader) - -def unsafe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - - Resolve all tags, even those known to be - unsafe on untrusted input. - """ - return load(stream, UnsafeLoader) - -def unsafe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - - Resolve all tags, even those known to be - unsafe on untrusted input. - """ - return load_all(stream, UnsafeLoader) - -def emit(events, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - """ - Emit YAML parsing events into a stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - from StringIO import StringIO - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - try: - for event in events: - dumper.emit(event) - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def serialize_all(nodes, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of representation trees into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - from StringIO import StringIO - else: - from cStringIO import StringIO - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - try: - dumper.open() - for node in nodes: - dumper.serialize(node) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def serialize(node, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a representation tree into a YAML stream. - If stream is None, return the produced string instead. - """ - return serialize_all([node], stream, Dumper=Dumper, **kwds) - -def dump_all(documents, stream=None, Dumper=Dumper, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - """ - Serialize a sequence of Python objects into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - from StringIO import StringIO - else: - from cStringIO import StringIO - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys) - try: - dumper.open() - for data in documents: - dumper.represent(data) - dumper.close() - finally: - dumper.dispose() - if getvalue: - return getvalue() - -def dump(data, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a Python object into a YAML stream. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=Dumper, **kwds) - -def safe_dump_all(documents, stream=None, **kwds): - """ - Serialize a sequence of Python objects into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all(documents, stream, Dumper=SafeDumper, **kwds) - -def safe_dump(data, stream=None, **kwds): - """ - Serialize a Python object into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=SafeDumper, **kwds) - -def add_implicit_resolver(tag, regexp, first=None, - Loader=Loader, Dumper=Dumper): - """ - Add an implicit scalar detector. - If an implicit scalar value matches the given regexp, - the corresponding tag is assigned to the scalar. - first is a sequence of possible initial characters or None. - """ - Loader.add_implicit_resolver(tag, regexp, first) - Dumper.add_implicit_resolver(tag, regexp, first) - -def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): - """ - Add a path based resolver for the given tag. - A path is a list of keys that forms a path - to a node in the representation tree. - Keys can be string values, integers, or None. - """ - Loader.add_path_resolver(tag, path, kind) - Dumper.add_path_resolver(tag, path, kind) - -def add_constructor(tag, constructor, Loader=Loader): - """ - Add a constructor for the given tag. - Constructor is a function that accepts a Loader instance - and a node object and produces the corresponding Python object. - """ - Loader.add_constructor(tag, constructor) - -def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): - """ - Add a multi-constructor for the given tag prefix. - Multi-constructor is called for a node if its tag starts with tag_prefix. - Multi-constructor accepts a Loader instance, a tag suffix, - and a node object and produces the corresponding Python object. - """ - Loader.add_multi_constructor(tag_prefix, multi_constructor) - -def add_representer(data_type, representer, Dumper=Dumper): - """ - Add a representer for the given type. - Representer is a function accepting a Dumper instance - and an instance of the given data type - and producing the corresponding representation node. - """ - Dumper.add_representer(data_type, representer) - -def add_multi_representer(data_type, multi_representer, Dumper=Dumper): - """ - Add a representer for the given type. - Multi-representer is a function accepting a Dumper instance - and an instance of the given data type or subtype - and producing the corresponding representation node. - """ - Dumper.add_multi_representer(data_type, multi_representer) - -class YAMLObjectMetaclass(type): - """ - The metaclass for YAMLObject. - """ - def __init__(cls, name, bases, kwds): - super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) - if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: - cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) - cls.yaml_dumper.add_representer(cls, cls.to_yaml) - -class YAMLObject(object): - """ - An object that can dump itself to a YAML stream - and load itself from a YAML stream. - """ - - __metaclass__ = YAMLObjectMetaclass - __slots__ = () # no direct instantiation, so allow immutable subclasses - - yaml_loader = Loader - yaml_dumper = Dumper - - yaml_tag = None - yaml_flow_style = None - - def from_yaml(cls, loader, node): - """ - Convert a representation node to a Python object. - """ - return loader.construct_yaml_object(node, cls) - from_yaml = classmethod(from_yaml) - - def to_yaml(cls, dumper, data): - """ - Convert a Python object to a representation node. - """ - return dumper.represent_yaml_object(cls.yaml_tag, data, cls, - flow_style=cls.yaml_flow_style) - to_yaml = classmethod(to_yaml) - diff --git a/libs/yaml2.7/composer.py b/libs/yaml2.7/composer.py deleted file mode 100644 index df85ef653..000000000 --- a/libs/yaml2.7/composer.py +++ /dev/null @@ -1,139 +0,0 @@ - -__all__ = ['Composer', 'ComposerError'] - -from error import MarkedYAMLError -from events import * -from nodes import * - -class ComposerError(MarkedYAMLError): - pass - -class Composer(object): - - def __init__(self): - self.anchors = {} - - def check_node(self): - # Drop the STREAM-START event. - if self.check_event(StreamStartEvent): - self.get_event() - - # If there are more documents available? - return not self.check_event(StreamEndEvent) - - def get_node(self): - # Get the root node of the next document. - if not self.check_event(StreamEndEvent): - return self.compose_document() - - def get_single_node(self): - # Drop the STREAM-START event. - self.get_event() - - # Compose a document if the stream is not empty. - document = None - if not self.check_event(StreamEndEvent): - document = self.compose_document() - - # Ensure that the stream contains no more documents. - if not self.check_event(StreamEndEvent): - event = self.get_event() - raise ComposerError("expected a single document in the stream", - document.start_mark, "but found another document", - event.start_mark) - - # Drop the STREAM-END event. - self.get_event() - - return document - - def compose_document(self): - # Drop the DOCUMENT-START event. - self.get_event() - - # Compose the root node. - node = self.compose_node(None, None) - - # Drop the DOCUMENT-END event. - self.get_event() - - self.anchors = {} - return node - - def compose_node(self, parent, index): - if self.check_event(AliasEvent): - event = self.get_event() - anchor = event.anchor - if anchor not in self.anchors: - raise ComposerError(None, None, "found undefined alias %r" - % anchor.encode('utf-8'), event.start_mark) - return self.anchors[anchor] - event = self.peek_event() - anchor = event.anchor - if anchor is not None: - if anchor in self.anchors: - raise ComposerError("found duplicate anchor %r; first occurrence" - % anchor.encode('utf-8'), self.anchors[anchor].start_mark, - "second occurrence", event.start_mark) - self.descend_resolver(parent, index) - if self.check_event(ScalarEvent): - node = self.compose_scalar_node(anchor) - elif self.check_event(SequenceStartEvent): - node = self.compose_sequence_node(anchor) - elif self.check_event(MappingStartEvent): - node = self.compose_mapping_node(anchor) - self.ascend_resolver() - return node - - def compose_scalar_node(self, anchor): - event = self.get_event() - tag = event.tag - if tag is None or tag == u'!': - tag = self.resolve(ScalarNode, event.value, event.implicit) - node = ScalarNode(tag, event.value, - event.start_mark, event.end_mark, style=event.style) - if anchor is not None: - self.anchors[anchor] = node - return node - - def compose_sequence_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == u'!': - tag = self.resolve(SequenceNode, None, start_event.implicit) - node = SequenceNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - index = 0 - while not self.check_event(SequenceEndEvent): - node.value.append(self.compose_node(node, index)) - index += 1 - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - - def compose_mapping_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == u'!': - tag = self.resolve(MappingNode, None, start_event.implicit) - node = MappingNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - while not self.check_event(MappingEndEvent): - #key_event = self.peek_event() - item_key = self.compose_node(node, None) - #if item_key in node.value: - # raise ComposerError("while composing a mapping", start_event.start_mark, - # "found duplicate key", key_event.start_mark) - item_value = self.compose_node(node, item_key) - #node.value[item_key] = item_value - node.value.append((item_key, item_value)) - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - diff --git a/libs/yaml2.7/constructor.py b/libs/yaml2.7/constructor.py deleted file mode 100644 index 516dad1ce..000000000 --- a/libs/yaml2.7/constructor.py +++ /dev/null @@ -1,709 +0,0 @@ - -__all__ = [ - 'BaseConstructor', - 'SafeConstructor', - 'FullConstructor', - 'UnsafeConstructor', - 'Constructor', - 'ConstructorError' -] - -from error import * -from nodes import * - -import datetime - -import binascii, re, sys, types - -class ConstructorError(MarkedYAMLError): - pass - -class BaseConstructor(object): - - yaml_constructors = {} - yaml_multi_constructors = {} - - def __init__(self): - self.constructed_objects = {} - self.recursive_objects = {} - self.state_generators = [] - self.deep_construct = False - - def check_data(self): - # If there are more documents available? - return self.check_node() - - def get_data(self): - # Construct and return the next document. - if self.check_node(): - return self.construct_document(self.get_node()) - - def get_single_data(self): - # Ensure that the stream contains a single document and construct it. - node = self.get_single_node() - if node is not None: - return self.construct_document(node) - return None - - def construct_document(self, node): - data = self.construct_object(node) - while self.state_generators: - state_generators = self.state_generators - self.state_generators = [] - for generator in state_generators: - for dummy in generator: - pass - self.constructed_objects = {} - self.recursive_objects = {} - self.deep_construct = False - return data - - def construct_object(self, node, deep=False): - if node in self.constructed_objects: - return self.constructed_objects[node] - if deep: - old_deep = self.deep_construct - self.deep_construct = True - if node in self.recursive_objects: - raise ConstructorError(None, None, - "found unconstructable recursive node", node.start_mark) - self.recursive_objects[node] = None - constructor = None - tag_suffix = None - if node.tag in self.yaml_constructors: - constructor = self.yaml_constructors[node.tag] - else: - for tag_prefix in self.yaml_multi_constructors: - if node.tag.startswith(tag_prefix): - tag_suffix = node.tag[len(tag_prefix):] - constructor = self.yaml_multi_constructors[tag_prefix] - break - else: - if None in self.yaml_multi_constructors: - tag_suffix = node.tag - constructor = self.yaml_multi_constructors[None] - elif None in self.yaml_constructors: - constructor = self.yaml_constructors[None] - elif isinstance(node, ScalarNode): - constructor = self.__class__.construct_scalar - elif isinstance(node, SequenceNode): - constructor = self.__class__.construct_sequence - elif isinstance(node, MappingNode): - constructor = self.__class__.construct_mapping - if tag_suffix is None: - data = constructor(self, node) - else: - data = constructor(self, tag_suffix, node) - if isinstance(data, types.GeneratorType): - generator = data - data = generator.next() - if self.deep_construct: - for dummy in generator: - pass - else: - self.state_generators.append(generator) - self.constructed_objects[node] = data - del self.recursive_objects[node] - if deep: - self.deep_construct = old_deep - return data - - def construct_scalar(self, node): - if not isinstance(node, ScalarNode): - raise ConstructorError(None, None, - "expected a scalar node, but found %s" % node.id, - node.start_mark) - return node.value - - def construct_sequence(self, node, deep=False): - if not isinstance(node, SequenceNode): - raise ConstructorError(None, None, - "expected a sequence node, but found %s" % node.id, - node.start_mark) - return [self.construct_object(child, deep=deep) - for child in node.value] - - def construct_mapping(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - mapping = {} - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError, exc: - raise ConstructorError("while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % exc, key_node.start_mark) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - def construct_pairs(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - pairs = [] - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - value = self.construct_object(value_node, deep=deep) - pairs.append((key, value)) - return pairs - - def add_constructor(cls, tag, constructor): - if not 'yaml_constructors' in cls.__dict__: - cls.yaml_constructors = cls.yaml_constructors.copy() - cls.yaml_constructors[tag] = constructor - add_constructor = classmethod(add_constructor) - - def add_multi_constructor(cls, tag_prefix, multi_constructor): - if not 'yaml_multi_constructors' in cls.__dict__: - cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() - cls.yaml_multi_constructors[tag_prefix] = multi_constructor - add_multi_constructor = classmethod(add_multi_constructor) - -class SafeConstructor(BaseConstructor): - - def construct_scalar(self, node): - if isinstance(node, MappingNode): - for key_node, value_node in node.value: - if key_node.tag == u'tag:yaml.org,2002:value': - return self.construct_scalar(value_node) - return BaseConstructor.construct_scalar(self, node) - - def flatten_mapping(self, node): - merge = [] - index = 0 - while index < len(node.value): - key_node, value_node = node.value[index] - if key_node.tag == u'tag:yaml.org,2002:merge': - del node.value[index] - if isinstance(value_node, MappingNode): - self.flatten_mapping(value_node) - merge.extend(value_node.value) - elif isinstance(value_node, SequenceNode): - submerge = [] - for subnode in value_node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing a mapping", - node.start_mark, - "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_mark) - self.flatten_mapping(subnode) - submerge.append(subnode.value) - submerge.reverse() - for value in submerge: - merge.extend(value) - else: - raise ConstructorError("while constructing a mapping", node.start_mark, - "expected a mapping or list of mappings for merging, but found %s" - % value_node.id, value_node.start_mark) - elif key_node.tag == u'tag:yaml.org,2002:value': - key_node.tag = u'tag:yaml.org,2002:str' - index += 1 - else: - index += 1 - if merge: - node.value = merge + node.value - - def construct_mapping(self, node, deep=False): - if isinstance(node, MappingNode): - self.flatten_mapping(node) - return BaseConstructor.construct_mapping(self, node, deep=deep) - - def construct_yaml_null(self, node): - self.construct_scalar(node) - return None - - bool_values = { - u'yes': True, - u'no': False, - u'true': True, - u'false': False, - u'on': True, - u'off': False, - } - - def construct_yaml_bool(self, node): - value = self.construct_scalar(node) - return self.bool_values[value.lower()] - - def construct_yaml_int(self, node): - value = str(self.construct_scalar(node)) - value = value.replace('_', '') - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '0': - return 0 - elif value.startswith('0b'): - return sign*int(value[2:], 2) - elif value.startswith('0x'): - return sign*int(value[2:], 16) - elif value[0] == '0': - return sign*int(value, 8) - elif ':' in value: - digits = [int(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*int(value) - - inf_value = 1e300 - while inf_value != inf_value*inf_value: - inf_value *= inf_value - nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). - - def construct_yaml_float(self, node): - value = str(self.construct_scalar(node)) - value = value.replace('_', '').lower() - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '.inf': - return sign*self.inf_value - elif value == '.nan': - return self.nan_value - elif ':' in value: - digits = [float(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0.0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*float(value) - - def construct_yaml_binary(self, node): - value = self.construct_scalar(node) - try: - return str(value).decode('base64') - except (binascii.Error, UnicodeEncodeError), exc: - raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - timestamp_regexp = re.compile( - ur'''^(?P<year>[0-9][0-9][0-9][0-9]) - -(?P<month>[0-9][0-9]?) - -(?P<day>[0-9][0-9]?) - (?:(?:[Tt]|[ \t]+) - (?P<hour>[0-9][0-9]?) - :(?P<minute>[0-9][0-9]) - :(?P<second>[0-9][0-9]) - (?:\.(?P<fraction>[0-9]*))? - (?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?) - (?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X) - - def construct_yaml_timestamp(self, node): - value = self.construct_scalar(node) - match = self.timestamp_regexp.match(node.value) - values = match.groupdict() - year = int(values['year']) - month = int(values['month']) - day = int(values['day']) - if not values['hour']: - return datetime.date(year, month, day) - hour = int(values['hour']) - minute = int(values['minute']) - second = int(values['second']) - fraction = 0 - if values['fraction']: - fraction = values['fraction'][:6] - while len(fraction) < 6: - fraction += '0' - fraction = int(fraction) - delta = None - if values['tz_sign']: - tz_hour = int(values['tz_hour']) - tz_minute = int(values['tz_minute'] or 0) - delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) - if values['tz_sign'] == '-': - delta = -delta - data = datetime.datetime(year, month, day, hour, minute, second, fraction) - if delta: - data -= delta - return data - - def construct_yaml_omap(self, node): - # Note: we do not check for duplicate keys, because it's too - # CPU-expensive. - omap = [] - yield omap - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - omap.append((key, value)) - - def construct_yaml_pairs(self, node): - # Note: the same code as `construct_yaml_omap`. - pairs = [] - yield pairs - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - pairs.append((key, value)) - - def construct_yaml_set(self, node): - data = set() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_str(self, node): - value = self.construct_scalar(node) - try: - return value.encode('ascii') - except UnicodeEncodeError: - return value - - def construct_yaml_seq(self, node): - data = [] - yield data - data.extend(self.construct_sequence(node)) - - def construct_yaml_map(self, node): - data = {} - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_object(self, node, cls): - data = cls.__new__(cls) - yield data - if hasattr(data, '__setstate__'): - state = self.construct_mapping(node, deep=True) - data.__setstate__(state) - else: - state = self.construct_mapping(node) - data.__dict__.update(state) - - def construct_undefined(self, node): - raise ConstructorError(None, None, - "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), - node.start_mark) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:null', - SafeConstructor.construct_yaml_null) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:bool', - SafeConstructor.construct_yaml_bool) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:int', - SafeConstructor.construct_yaml_int) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:float', - SafeConstructor.construct_yaml_float) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:binary', - SafeConstructor.construct_yaml_binary) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:timestamp', - SafeConstructor.construct_yaml_timestamp) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:omap', - SafeConstructor.construct_yaml_omap) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:pairs', - SafeConstructor.construct_yaml_pairs) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:set', - SafeConstructor.construct_yaml_set) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:str', - SafeConstructor.construct_yaml_str) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:seq', - SafeConstructor.construct_yaml_seq) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:map', - SafeConstructor.construct_yaml_map) - -SafeConstructor.add_constructor(None, - SafeConstructor.construct_undefined) - -class FullConstructor(SafeConstructor): - - def construct_python_str(self, node): - return self.construct_scalar(node).encode('utf-8') - - def construct_python_unicode(self, node): - return self.construct_scalar(node) - - def construct_python_long(self, node): - return long(self.construct_yaml_int(node)) - - def construct_python_complex(self, node): - return complex(self.construct_scalar(node)) - - def construct_python_tuple(self, node): - return tuple(self.construct_sequence(node)) - - def find_python_module(self, name, mark, unsafe=False): - if not name: - raise ConstructorError("while constructing a Python module", mark, - "expected non-empty name appended to the tag", mark) - if unsafe: - try: - __import__(name) - except ImportError, exc: - raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) - if not name in sys.modules: - raise ConstructorError("while constructing a Python module", mark, - "module %r is not imported" % name.encode('utf-8'), mark) - return sys.modules[name] - - def find_python_name(self, name, mark, unsafe=False): - if not name: - raise ConstructorError("while constructing a Python object", mark, - "expected non-empty name appended to the tag", mark) - if u'.' in name: - module_name, object_name = name.rsplit('.', 1) - else: - module_name = '__builtin__' - object_name = name - if unsafe: - try: - __import__(module_name) - except ImportError, exc: - raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) - if not module_name in sys.modules: - raise ConstructorError("while constructing a Python object", mark, - "module %r is not imported" % module_name.encode('utf-8'), mark) - module = sys.modules[module_name] - if not hasattr(module, object_name): - raise ConstructorError("while constructing a Python object", mark, - "cannot find %r in the module %r" % (object_name.encode('utf-8'), - module.__name__), mark) - return getattr(module, object_name) - - def construct_python_name(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) - return self.find_python_name(suffix, node.start_mark) - - def construct_python_module(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) - return self.find_python_module(suffix, node.start_mark) - - class classobj: pass - - def make_python_instance(self, suffix, node, - args=None, kwds=None, newobj=False, unsafe=False): - if not args: - args = [] - if not kwds: - kwds = {} - cls = self.find_python_name(suffix, node.start_mark) - if not (unsafe or isinstance(cls, type) or isinstance(cls, type(self.classobj))): - raise ConstructorError("while constructing a Python instance", node.start_mark, - "expected a class, but found %r" % type(cls), - node.start_mark) - if newobj and isinstance(cls, type(self.classobj)) \ - and not args and not kwds: - instance = self.classobj() - instance.__class__ = cls - return instance - elif newobj and isinstance(cls, type): - return cls.__new__(cls, *args, **kwds) - else: - return cls(*args, **kwds) - - def set_python_instance_state(self, instance, state): - if hasattr(instance, '__setstate__'): - instance.__setstate__(state) - else: - slotstate = {} - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - if hasattr(instance, '__dict__'): - instance.__dict__.update(state) - elif state: - slotstate.update(state) - for key, value in slotstate.items(): - setattr(object, key, value) - - def construct_python_object(self, suffix, node): - # Format: - # !!python/object:module.name { ... state ... } - instance = self.make_python_instance(suffix, node, newobj=True) - yield instance - deep = hasattr(instance, '__setstate__') - state = self.construct_mapping(node, deep=deep) - self.set_python_instance_state(instance, state) - - def construct_python_object_apply(self, suffix, node, newobj=False): - # Format: - # !!python/object/apply # (or !!python/object/new) - # args: [ ... arguments ... ] - # kwds: { ... keywords ... } - # state: ... state ... - # listitems: [ ... listitems ... ] - # dictitems: { ... dictitems ... } - # or short format: - # !!python/object/apply [ ... arguments ... ] - # The difference between !!python/object/apply and !!python/object/new - # is how an object is created, check make_python_instance for details. - if isinstance(node, SequenceNode): - args = self.construct_sequence(node, deep=True) - kwds = {} - state = {} - listitems = [] - dictitems = {} - else: - value = self.construct_mapping(node, deep=True) - args = value.get('args', []) - kwds = value.get('kwds', {}) - state = value.get('state', {}) - listitems = value.get('listitems', []) - dictitems = value.get('dictitems', {}) - instance = self.make_python_instance(suffix, node, args, kwds, newobj) - if state: - self.set_python_instance_state(instance, state) - if listitems: - instance.extend(listitems) - if dictitems: - for key in dictitems: - instance[key] = dictitems[key] - return instance - - def construct_python_object_new(self, suffix, node): - return self.construct_python_object_apply(suffix, node, newobj=True) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/none', - FullConstructor.construct_yaml_null) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/bool', - FullConstructor.construct_yaml_bool) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/str', - FullConstructor.construct_python_str) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/unicode', - FullConstructor.construct_python_unicode) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/int', - FullConstructor.construct_yaml_int) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/long', - FullConstructor.construct_python_long) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/float', - FullConstructor.construct_yaml_float) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/complex', - FullConstructor.construct_python_complex) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/list', - FullConstructor.construct_yaml_seq) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/tuple', - FullConstructor.construct_python_tuple) - -FullConstructor.add_constructor( - u'tag:yaml.org,2002:python/dict', - FullConstructor.construct_yaml_map) - -FullConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/name:', - FullConstructor.construct_python_name) - -FullConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/module:', - FullConstructor.construct_python_module) - -FullConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object:', - FullConstructor.construct_python_object) - -FullConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/apply:', - FullConstructor.construct_python_object_apply) - -FullConstructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/new:', - FullConstructor.construct_python_object_new) - -class UnsafeConstructor(FullConstructor): - - def find_python_module(self, name, mark): - return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True) - - def find_python_name(self, name, mark): - return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True) - - def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False): - return super(UnsafeConstructor, self).make_python_instance( - suffix, node, args, kwds, newobj, unsafe=True) - -# Constructor is same as UnsafeConstructor. Need to leave this in place in case -# people have extended it directly. -class Constructor(UnsafeConstructor): - pass diff --git a/libs/yaml2.7/cyaml.py b/libs/yaml2.7/cyaml.py deleted file mode 100644 index ebb895935..000000000 --- a/libs/yaml2.7/cyaml.py +++ /dev/null @@ -1,101 +0,0 @@ - -__all__ = [ - 'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader', - 'CBaseDumper', 'CSafeDumper', 'CDumper' -] - -from _yaml import CParser, CEmitter - -from constructor import * - -from serializer import * -from representer import * - -from resolver import * - -class CBaseLoader(CParser, BaseConstructor, BaseResolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class CSafeLoader(CParser, SafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class CFullLoader(CParser, FullConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - FullConstructor.__init__(self) - Resolver.__init__(self) - -class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - UnsafeConstructor.__init__(self) - Resolver.__init__(self) - -class CLoader(CParser, Constructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - Constructor.__init__(self) - Resolver.__init__(self) - -class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class CSafeDumper(CEmitter, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class CDumper(CEmitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - diff --git a/libs/yaml2.7/dumper.py b/libs/yaml2.7/dumper.py deleted file mode 100644 index f9cd49fda..000000000 --- a/libs/yaml2.7/dumper.py +++ /dev/null @@ -1,62 +0,0 @@ - -__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] - -from emitter import * -from serializer import * -from representer import * -from resolver import * - -class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - -class Dumper(Emitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=False, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None, sort_keys=True): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style, sort_keys=sort_keys) - Resolver.__init__(self) - diff --git a/libs/yaml2.7/emitter.py b/libs/yaml2.7/emitter.py deleted file mode 100644 index 9561a8274..000000000 --- a/libs/yaml2.7/emitter.py +++ /dev/null @@ -1,1144 +0,0 @@ - -# Emitter expects events obeying the following grammar: -# stream ::= STREAM-START document* STREAM-END -# document ::= DOCUMENT-START node DOCUMENT-END -# node ::= SCALAR | sequence | mapping -# sequence ::= SEQUENCE-START node* SEQUENCE-END -# mapping ::= MAPPING-START (node node)* MAPPING-END - -__all__ = ['Emitter', 'EmitterError'] - -import sys - -from error import YAMLError -from events import * - -has_ucs4 = sys.maxunicode > 0xffff - -class EmitterError(YAMLError): - pass - -class ScalarAnalysis(object): - def __init__(self, scalar, empty, multiline, - allow_flow_plain, allow_block_plain, - allow_single_quoted, allow_double_quoted, - allow_block): - self.scalar = scalar - self.empty = empty - self.multiline = multiline - self.allow_flow_plain = allow_flow_plain - self.allow_block_plain = allow_block_plain - self.allow_single_quoted = allow_single_quoted - self.allow_double_quoted = allow_double_quoted - self.allow_block = allow_block - -class Emitter(object): - - DEFAULT_TAG_PREFIXES = { - u'!' : u'!', - u'tag:yaml.org,2002:' : u'!!', - } - - def __init__(self, stream, canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - - # The stream should have the methods `write` and possibly `flush`. - self.stream = stream - - # Encoding can be overridden by STREAM-START. - self.encoding = None - - # Emitter is a state machine with a stack of states to handle nested - # structures. - self.states = [] - self.state = self.expect_stream_start - - # Current event and the event queue. - self.events = [] - self.event = None - - # The current indentation level and the stack of previous indents. - self.indents = [] - self.indent = None - - # Flow level. - self.flow_level = 0 - - # Contexts. - self.root_context = False - self.sequence_context = False - self.mapping_context = False - self.simple_key_context = False - - # Characteristics of the last emitted character: - # - current position. - # - is it a whitespace? - # - is it an indention character - # (indentation space, '-', '?', or ':')? - self.line = 0 - self.column = 0 - self.whitespace = True - self.indention = True - - # Whether the document requires an explicit document indicator - self.open_ended = False - - # Formatting details. - self.canonical = canonical - self.allow_unicode = allow_unicode - self.best_indent = 2 - if indent and 1 < indent < 10: - self.best_indent = indent - self.best_width = 80 - if width and width > self.best_indent*2: - self.best_width = width - self.best_line_break = u'\n' - if line_break in [u'\r', u'\n', u'\r\n']: - self.best_line_break = line_break - - # Tag prefixes. - self.tag_prefixes = None - - # Prepared anchor and tag. - self.prepared_anchor = None - self.prepared_tag = None - - # Scalar analysis and style. - self.analysis = None - self.style = None - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def emit(self, event): - self.events.append(event) - while not self.need_more_events(): - self.event = self.events.pop(0) - self.state() - self.event = None - - # In some cases, we wait for a few next events before emitting. - - def need_more_events(self): - if not self.events: - return True - event = self.events[0] - if isinstance(event, DocumentStartEvent): - return self.need_events(1) - elif isinstance(event, SequenceStartEvent): - return self.need_events(2) - elif isinstance(event, MappingStartEvent): - return self.need_events(3) - else: - return False - - def need_events(self, count): - level = 0 - for event in self.events[1:]: - if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): - level += 1 - elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): - level -= 1 - elif isinstance(event, StreamEndEvent): - level = -1 - if level < 0: - return False - return (len(self.events) < count+1) - - def increase_indent(self, flow=False, indentless=False): - self.indents.append(self.indent) - if self.indent is None: - if flow: - self.indent = self.best_indent - else: - self.indent = 0 - elif not indentless: - self.indent += self.best_indent - - # States. - - # Stream handlers. - - def expect_stream_start(self): - if isinstance(self.event, StreamStartEvent): - if self.event.encoding and not getattr(self.stream, 'encoding', None): - self.encoding = self.event.encoding - self.write_stream_start() - self.state = self.expect_first_document_start - else: - raise EmitterError("expected StreamStartEvent, but got %s" - % self.event) - - def expect_nothing(self): - raise EmitterError("expected nothing, but got %s" % self.event) - - # Document handlers. - - def expect_first_document_start(self): - return self.expect_document_start(first=True) - - def expect_document_start(self, first=False): - if isinstance(self.event, DocumentStartEvent): - if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator(u'...', True) - self.write_indent() - if self.event.version: - version_text = self.prepare_version(self.event.version) - self.write_version_directive(version_text) - self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() - if self.event.tags: - handles = self.event.tags.keys() - handles.sort() - for handle in handles: - prefix = self.event.tags[handle] - self.tag_prefixes[prefix] = handle - handle_text = self.prepare_tag_handle(handle) - prefix_text = self.prepare_tag_prefix(prefix) - self.write_tag_directive(handle_text, prefix_text) - implicit = (first and not self.event.explicit and not self.canonical - and not self.event.version and not self.event.tags - and not self.check_empty_document()) - if not implicit: - self.write_indent() - self.write_indicator(u'---', True) - if self.canonical: - self.write_indent() - self.state = self.expect_document_root - elif isinstance(self.event, StreamEndEvent): - if self.open_ended: - self.write_indicator(u'...', True) - self.write_indent() - self.write_stream_end() - self.state = self.expect_nothing - else: - raise EmitterError("expected DocumentStartEvent, but got %s" - % self.event) - - def expect_document_end(self): - if isinstance(self.event, DocumentEndEvent): - self.write_indent() - if self.event.explicit: - self.write_indicator(u'...', True) - self.write_indent() - self.flush_stream() - self.state = self.expect_document_start - else: - raise EmitterError("expected DocumentEndEvent, but got %s" - % self.event) - - def expect_document_root(self): - self.states.append(self.expect_document_end) - self.expect_node(root=True) - - # Node handlers. - - def expect_node(self, root=False, sequence=False, mapping=False, - simple_key=False): - self.root_context = root - self.sequence_context = sequence - self.mapping_context = mapping - self.simple_key_context = simple_key - if isinstance(self.event, AliasEvent): - self.expect_alias() - elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor(u'&') - self.process_tag() - if isinstance(self.event, ScalarEvent): - self.expect_scalar() - elif isinstance(self.event, SequenceStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_sequence(): - self.expect_flow_sequence() - else: - self.expect_block_sequence() - elif isinstance(self.event, MappingStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_mapping(): - self.expect_flow_mapping() - else: - self.expect_block_mapping() - else: - raise EmitterError("expected NodeEvent, but got %s" % self.event) - - def expect_alias(self): - if self.event.anchor is None: - raise EmitterError("anchor is not specified for alias") - self.process_anchor(u'*') - self.state = self.states.pop() - - def expect_scalar(self): - self.increase_indent(flow=True) - self.process_scalar() - self.indent = self.indents.pop() - self.state = self.states.pop() - - # Flow sequence handlers. - - def expect_flow_sequence(self): - self.write_indicator(u'[', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_sequence_item - - def expect_first_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(u']', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - def expect_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(u',', False) - self.write_indent() - self.write_indicator(u']', False) - self.state = self.states.pop() - else: - self.write_indicator(u',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - # Flow mapping handlers. - - def expect_flow_mapping(self): - self.write_indicator(u'{', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_mapping_key - - def expect_first_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(u'}', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(u',', False) - self.write_indent() - self.write_indicator(u'}', False) - self.state = self.states.pop() - else: - self.write_indicator(u',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_simple_value(self): - self.write_indicator(u':', False) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - def expect_flow_mapping_value(self): - if self.canonical or self.column > self.best_width: - self.write_indent() - self.write_indicator(u':', True) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - # Block sequence handlers. - - def expect_block_sequence(self): - indentless = (self.mapping_context and not self.indention) - self.increase_indent(flow=False, indentless=indentless) - self.state = self.expect_first_block_sequence_item - - def expect_first_block_sequence_item(self): - return self.expect_block_sequence_item(first=True) - - def expect_block_sequence_item(self, first=False): - if not first and isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - self.write_indicator(u'-', True, indention=True) - self.states.append(self.expect_block_sequence_item) - self.expect_node(sequence=True) - - # Block mapping handlers. - - def expect_block_mapping(self): - self.increase_indent(flow=False) - self.state = self.expect_first_block_mapping_key - - def expect_first_block_mapping_key(self): - return self.expect_block_mapping_key(first=True) - - def expect_block_mapping_key(self, first=False): - if not first and isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - if self.check_simple_key(): - self.states.append(self.expect_block_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True, indention=True) - self.states.append(self.expect_block_mapping_value) - self.expect_node(mapping=True) - - def expect_block_mapping_simple_value(self): - self.write_indicator(u':', False) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - def expect_block_mapping_value(self): - self.write_indent() - self.write_indicator(u':', True, indention=True) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - # Checkers. - - def check_empty_sequence(self): - return (isinstance(self.event, SequenceStartEvent) and self.events - and isinstance(self.events[0], SequenceEndEvent)) - - def check_empty_mapping(self): - return (isinstance(self.event, MappingStartEvent) and self.events - and isinstance(self.events[0], MappingEndEvent)) - - def check_empty_document(self): - if not isinstance(self.event, DocumentStartEvent) or not self.events: - return False - event = self.events[0] - return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and event.value == u'') - - def check_simple_key(self): - length = 0 - if isinstance(self.event, NodeEvent) and self.event.anchor is not None: - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - length += len(self.prepared_anchor) - if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ - and self.event.tag is not None: - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(self.event.tag) - length += len(self.prepared_tag) - if isinstance(self.event, ScalarEvent): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - length += len(self.analysis.scalar) - return (length < 128 and (isinstance(self.event, AliasEvent) - or (isinstance(self.event, ScalarEvent) - and not self.analysis.empty and not self.analysis.multiline) - or self.check_empty_sequence() or self.check_empty_mapping())) - - # Anchor, Tag, and Scalar processors. - - def process_anchor(self, indicator): - if self.event.anchor is None: - self.prepared_anchor = None - return - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - if self.prepared_anchor: - self.write_indicator(indicator+self.prepared_anchor, True) - self.prepared_anchor = None - - def process_tag(self): - tag = self.event.tag - if isinstance(self.event, ScalarEvent): - if self.style is None: - self.style = self.choose_scalar_style() - if ((not self.canonical or tag is None) and - ((self.style == '' and self.event.implicit[0]) - or (self.style != '' and self.event.implicit[1]))): - self.prepared_tag = None - return - if self.event.implicit[0] and tag is None: - tag = u'!' - self.prepared_tag = None - else: - if (not self.canonical or tag is None) and self.event.implicit: - self.prepared_tag = None - return - if tag is None: - raise EmitterError("tag is not specified") - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(tag) - if self.prepared_tag: - self.write_indicator(self.prepared_tag, True) - self.prepared_tag = None - - def choose_scalar_style(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.event.style == '"' or self.canonical: - return '"' - if not self.event.style and self.event.implicit[0]: - if (not (self.simple_key_context and - (self.analysis.empty or self.analysis.multiline)) - and (self.flow_level and self.analysis.allow_flow_plain - or (not self.flow_level and self.analysis.allow_block_plain))): - return '' - if self.event.style and self.event.style in '|>': - if (not self.flow_level and not self.simple_key_context - and self.analysis.allow_block): - return self.event.style - if not self.event.style or self.event.style == '\'': - if (self.analysis.allow_single_quoted and - not (self.simple_key_context and self.analysis.multiline)): - return '\'' - return '"' - - def process_scalar(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.style is None: - self.style = self.choose_scalar_style() - split = (not self.simple_key_context) - #if self.analysis.multiline and split \ - # and (not self.style or self.style in '\'\"'): - # self.write_indent() - if self.style == '"': - self.write_double_quoted(self.analysis.scalar, split) - elif self.style == '\'': - self.write_single_quoted(self.analysis.scalar, split) - elif self.style == '>': - self.write_folded(self.analysis.scalar) - elif self.style == '|': - self.write_literal(self.analysis.scalar) - else: - self.write_plain(self.analysis.scalar, split) - self.analysis = None - self.style = None - - # Analyzers. - - def prepare_version(self, version): - major, minor = version - if major != 1: - raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) - return u'%d.%d' % (major, minor) - - def prepare_tag_handle(self, handle): - if not handle: - raise EmitterError("tag handle must not be empty") - if handle[0] != u'!' or handle[-1] != u'!': - raise EmitterError("tag handle must start and end with '!': %r" - % (handle.encode('utf-8'))) - for ch in handle[1:-1]: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): - raise EmitterError("invalid character %r in the tag handle: %r" - % (ch.encode('utf-8'), handle.encode('utf-8'))) - return handle - - def prepare_tag_prefix(self, prefix): - if not prefix: - raise EmitterError("tag prefix must not be empty") - chunks = [] - start = end = 0 - if prefix[0] == u'!': - end = 1 - while end < len(prefix): - ch = prefix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?!:@&=+$,_.~*\'()[]': - end += 1 - else: - if start < end: - chunks.append(prefix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append(u'%%%02X' % ord(ch)) - if start < end: - chunks.append(prefix[start:end]) - return u''.join(chunks) - - def prepare_tag(self, tag): - if not tag: - raise EmitterError("tag must not be empty") - if tag == u'!': - return tag - handle = None - suffix = tag - prefixes = self.tag_prefixes.keys() - prefixes.sort() - for prefix in prefixes: - if tag.startswith(prefix) \ - and (prefix == u'!' or len(prefix) < len(tag)): - handle = self.tag_prefixes[prefix] - suffix = tag[len(prefix):] - chunks = [] - start = end = 0 - while end < len(suffix): - ch = suffix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.~*\'()[]' \ - or (ch == u'!' and handle != u'!'): - end += 1 - else: - if start < end: - chunks.append(suffix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append(u'%%%02X' % ord(ch)) - if start < end: - chunks.append(suffix[start:end]) - suffix_text = u''.join(chunks) - if handle: - return u'%s%s' % (handle, suffix_text) - else: - return u'!<%s>' % suffix_text - - def prepare_anchor(self, anchor): - if not anchor: - raise EmitterError("anchor must not be empty") - for ch in anchor: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): - raise EmitterError("invalid character %r in the anchor: %r" - % (ch.encode('utf-8'), anchor.encode('utf-8'))) - return anchor - - def analyze_scalar(self, scalar): - - # Empty scalar is a special case. - if not scalar: - return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, - allow_flow_plain=False, allow_block_plain=True, - allow_single_quoted=True, allow_double_quoted=True, - allow_block=False) - - # Indicators and special characters. - block_indicators = False - flow_indicators = False - line_breaks = False - special_characters = False - - # Important whitespace combinations. - leading_space = False - leading_break = False - trailing_space = False - trailing_break = False - break_space = False - space_break = False - - # Check document indicators. - if scalar.startswith(u'---') or scalar.startswith(u'...'): - block_indicators = True - flow_indicators = True - - # First character or preceded by a whitespace. - preceded_by_whitespace = True - - # Last character or followed by a whitespace. - followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') - - # The previous character is a space. - previous_space = False - - # The previous character is a break. - previous_break = False - - index = 0 - while index < len(scalar): - ch = scalar[index] - - # Check for indicators. - if index == 0: - # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': - flow_indicators = True - block_indicators = True - if ch in u'?:': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == u'-' and followed_by_whitespace: - flow_indicators = True - block_indicators = True - else: - # Some indicators cannot appear within a scalar as well. - if ch in u',?[]{}': - flow_indicators = True - if ch == u':': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == u'#' and preceded_by_whitespace: - flow_indicators = True - block_indicators = True - - # Check for line breaks, special, and unicode characters. - if ch in u'\n\x85\u2028\u2029': - line_breaks = True - if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD' - or ((not has_ucs4) or (u'\U00010000' <= ch < u'\U0010ffff'))) and ch != u'\uFEFF': - unicode_characters = True - if not self.allow_unicode: - special_characters = True - else: - special_characters = True - - # Detect important whitespace combinations. - if ch == u' ': - if index == 0: - leading_space = True - if index == len(scalar)-1: - trailing_space = True - if previous_break: - break_space = True - previous_space = True - previous_break = False - elif ch in u'\n\x85\u2028\u2029': - if index == 0: - leading_break = True - if index == len(scalar)-1: - trailing_break = True - if previous_space: - space_break = True - previous_space = False - previous_break = True - else: - previous_space = False - previous_break = False - - # Prepare for the next character. - index += 1 - preceded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') - followed_by_whitespace = (index+1 >= len(scalar) or - scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') - - # Let's decide what styles are allowed. - allow_flow_plain = True - allow_block_plain = True - allow_single_quoted = True - allow_double_quoted = True - allow_block = True - - # Leading and trailing whitespaces are bad for plain scalars. - if (leading_space or leading_break - or trailing_space or trailing_break): - allow_flow_plain = allow_block_plain = False - - # We do not permit trailing spaces for block scalars. - if trailing_space: - allow_block = False - - # Spaces at the beginning of a new line are only acceptable for block - # scalars. - if break_space: - allow_flow_plain = allow_block_plain = allow_single_quoted = False - - # Spaces followed by breaks, as well as special character are only - # allowed for double quoted scalars. - if space_break or special_characters: - allow_flow_plain = allow_block_plain = \ - allow_single_quoted = allow_block = False - - # Although the plain scalar writer supports breaks, we never emit - # multiline plain scalars. - if line_breaks: - allow_flow_plain = allow_block_plain = False - - # Flow indicators are forbidden for flow plain scalars. - if flow_indicators: - allow_flow_plain = False - - # Block indicators are forbidden for block plain scalars. - if block_indicators: - allow_block_plain = False - - return ScalarAnalysis(scalar=scalar, - empty=False, multiline=line_breaks, - allow_flow_plain=allow_flow_plain, - allow_block_plain=allow_block_plain, - allow_single_quoted=allow_single_quoted, - allow_double_quoted=allow_double_quoted, - allow_block=allow_block) - - # Writers. - - def flush_stream(self): - if hasattr(self.stream, 'flush'): - self.stream.flush() - - def write_stream_start(self): - # Write BOM if needed. - if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write(u'\uFEFF'.encode(self.encoding)) - - def write_stream_end(self): - self.flush_stream() - - def write_indicator(self, indicator, need_whitespace, - whitespace=False, indention=False): - if self.whitespace or not need_whitespace: - data = indicator - else: - data = u' '+indicator - self.whitespace = whitespace - self.indention = self.indention and indention - self.column += len(data) - self.open_ended = False - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_indent(self): - indent = self.indent or 0 - if not self.indention or self.column > indent \ - or (self.column == indent and not self.whitespace): - self.write_line_break() - if self.column < indent: - self.whitespace = True - data = u' '*(indent-self.column) - self.column = indent - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_line_break(self, data=None): - if data is None: - data = self.best_line_break - self.whitespace = True - self.indention = True - self.line += 1 - self.column = 0 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_version_directive(self, version_text): - data = u'%%YAML %s' % version_text - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - def write_tag_directive(self, handle_text, prefix_text): - data = u'%%TAG %s %s' % (handle_text, prefix_text) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - # Scalar streams. - - def write_single_quoted(self, text, split=True): - self.write_indicator(u'\'', True) - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch is None or ch != u' ': - if start+1 == end and self.column > self.best_width and split \ - and start != 0 and end != len(text): - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': - self.write_line_break() - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch == u'\'': - data = u'\'\'' - self.column += 2 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end + 1 - if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - self.write_indicator(u'\'', False) - - ESCAPE_REPLACEMENTS = { - u'\0': u'0', - u'\x07': u'a', - u'\x08': u'b', - u'\x09': u't', - u'\x0A': u'n', - u'\x0B': u'v', - u'\x0C': u'f', - u'\x0D': u'r', - u'\x1B': u'e', - u'\"': u'\"', - u'\\': u'\\', - u'\x85': u'N', - u'\xA0': u'_', - u'\u2028': u'L', - u'\u2029': u'P', - } - - def write_double_quoted(self, text, split=True): - self.write_indicator(u'"', True) - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ - or not (u'\x20' <= ch <= u'\x7E' - or (self.allow_unicode - and (u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD'))): - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - if ch in self.ESCAPE_REPLACEMENTS: - data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= u'\xFF': - data = u'\\x%02X' % ord(ch) - elif ch <= u'\uFFFF': - data = u'\\u%04X' % ord(ch) - else: - data = u'\\U%08X' % ord(ch) - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end+1 - if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ - and self.column+(end-start) > self.best_width and split: - data = text[start:end]+u'\\' - if start < end: - start = end - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_indent() - self.whitespace = False - self.indention = False - if text[start] == u' ': - data = u'\\' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - end += 1 - self.write_indicator(u'"', False) - - def determine_block_hints(self, text): - hints = u'' - if text: - if text[0] in u' \n\x85\u2028\u2029': - hints += unicode(self.best_indent) - if text[-1] not in u'\n\x85\u2028\u2029': - hints += u'-' - elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': - hints += u'+' - return hints - - def write_folded(self, text): - hints = self.determine_block_hints(text) - self.write_indicator(u'>'+hints, True) - if hints[-1:] == u'+': - self.open_ended = True - self.write_line_break() - leading_space = True - spaces = False - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != u' ' \ - and text[start] == u'\n': - self.write_line_break() - leading_space = (ch == u' ') - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - elif spaces: - if ch != u' ': - if start+1 == end and self.column > self.best_width: - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - spaces = (ch == u' ') - end += 1 - - def write_literal(self, text): - hints = self.determine_block_hints(text) - self.write_indicator(u'|'+hints, True) - if hints[-1:] == u'+': - self.open_ended = True - self.write_line_break() - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - else: - if ch is None or ch in u'\n\x85\u2028\u2029': - data = text[start:end] - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - - def write_plain(self, text, split=True): - if self.root_context: - self.open_ended = True - if not text: - return - if not self.whitespace: - data = u' ' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.whitespace = False - self.indention = False - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch != u' ': - if start+1 == end and self.column > self.best_width and split: - self.write_indent() - self.whitespace = False - self.indention = False - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': - self.write_line_break() - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - self.whitespace = False - self.indention = False - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 diff --git a/libs/yaml2.7/error.py b/libs/yaml2.7/error.py deleted file mode 100644 index 577686db5..000000000 --- a/libs/yaml2.7/error.py +++ /dev/null @@ -1,75 +0,0 @@ - -__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] - -class Mark(object): - - def __init__(self, name, index, line, column, buffer, pointer): - self.name = name - self.index = index - self.line = line - self.column = column - self.buffer = buffer - self.pointer = pointer - - def get_snippet(self, indent=4, max_length=75): - if self.buffer is None: - return None - head = '' - start = self.pointer - while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': - start -= 1 - if self.pointer-start > max_length/2-1: - head = ' ... ' - start += 5 - break - tail = '' - end = self.pointer - while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': - end += 1 - if end-self.pointer > max_length/2-1: - tail = ' ... ' - end -= 5 - break - snippet = self.buffer[start:end].encode('utf-8') - return ' '*indent + head + snippet + tail + '\n' \ - + ' '*(indent+self.pointer-start+len(head)) + '^' - - def __str__(self): - snippet = self.get_snippet() - where = " in \"%s\", line %d, column %d" \ - % (self.name, self.line+1, self.column+1) - if snippet is not None: - where += ":\n"+snippet - return where - -class YAMLError(Exception): - pass - -class MarkedYAMLError(YAMLError): - - def __init__(self, context=None, context_mark=None, - problem=None, problem_mark=None, note=None): - self.context = context - self.context_mark = context_mark - self.problem = problem - self.problem_mark = problem_mark - self.note = note - - def __str__(self): - lines = [] - if self.context is not None: - lines.append(self.context) - if self.context_mark is not None \ - and (self.problem is None or self.problem_mark is None - or self.context_mark.name != self.problem_mark.name - or self.context_mark.line != self.problem_mark.line - or self.context_mark.column != self.problem_mark.column): - lines.append(str(self.context_mark)) - if self.problem is not None: - lines.append(self.problem) - if self.problem_mark is not None: - lines.append(str(self.problem_mark)) - if self.note is not None: - lines.append(self.note) - return '\n'.join(lines) - diff --git a/libs/yaml2.7/events.py b/libs/yaml2.7/events.py deleted file mode 100644 index f79ad389c..000000000 --- a/libs/yaml2.7/events.py +++ /dev/null @@ -1,86 +0,0 @@ - -# Abstract classes. - -class Event(object): - def __init__(self, start_mark=None, end_mark=None): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] - if hasattr(self, key)] - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -class NodeEvent(Event): - def __init__(self, anchor, start_mark=None, end_mark=None): - self.anchor = anchor - self.start_mark = start_mark - self.end_mark = end_mark - -class CollectionStartEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, - flow_style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class CollectionEndEvent(Event): - pass - -# Implementations. - -class StreamStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndEvent(Event): - pass - -class DocumentStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None, version=None, tags=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - self.version = version - self.tags = tags - -class DocumentEndEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - -class AliasEvent(NodeEvent): - pass - -class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, value, - start_mark=None, end_mark=None, style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class SequenceStartEvent(CollectionStartEvent): - pass - -class SequenceEndEvent(CollectionEndEvent): - pass - -class MappingStartEvent(CollectionStartEvent): - pass - -class MappingEndEvent(CollectionEndEvent): - pass - diff --git a/libs/yaml2.7/loader.py b/libs/yaml2.7/loader.py deleted file mode 100644 index a79182eaf..000000000 --- a/libs/yaml2.7/loader.py +++ /dev/null @@ -1,63 +0,0 @@ - -__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader'] - -from reader import * -from scanner import * -from parser import * -from composer import * -from constructor import * -from resolver import * - -class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - FullConstructor.__init__(self) - Resolver.__init__(self) - -class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) - -# UnsafeLoader is the same as Loader (which is and was always unsafe on -# untrusted input). Use of either Loader or UnsafeLoader should be rare, since -# FullLoad should be able to load almost all YAML safely. Loader is left intact -# to ensure backwards compatability. -class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) diff --git a/libs/yaml2.7/nodes.py b/libs/yaml2.7/nodes.py deleted file mode 100644 index c4f070c41..000000000 --- a/libs/yaml2.7/nodes.py +++ /dev/null @@ -1,49 +0,0 @@ - -class Node(object): - def __init__(self, tag, value, start_mark, end_mark): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - value = self.value - #if isinstance(value, list): - # if len(value) == 0: - # value = '<empty>' - # elif len(value) == 1: - # value = '<1 item>' - # else: - # value = '<%d items>' % len(value) - #else: - # if len(value) > 75: - # value = repr(value[:70]+u' ... ') - # else: - # value = repr(value) - value = repr(value) - return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) - -class ScalarNode(Node): - id = 'scalar' - def __init__(self, tag, value, - start_mark=None, end_mark=None, style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class CollectionNode(Node): - def __init__(self, tag, value, - start_mark=None, end_mark=None, flow_style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class SequenceNode(CollectionNode): - id = 'sequence' - -class MappingNode(CollectionNode): - id = 'mapping' - diff --git a/libs/yaml2.7/parser.py b/libs/yaml2.7/parser.py deleted file mode 100644 index f9e3057f3..000000000 --- a/libs/yaml2.7/parser.py +++ /dev/null @@ -1,589 +0,0 @@ - -# The following YAML grammar is LL(1) and is parsed by a recursive descent -# parser. -# -# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END -# implicit_document ::= block_node DOCUMENT-END* -# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -# block_node_or_indentless_sequence ::= -# ALIAS -# | properties (block_content | indentless_block_sequence)? -# | block_content -# | indentless_block_sequence -# block_node ::= ALIAS -# | properties block_content? -# | block_content -# flow_node ::= ALIAS -# | properties flow_content? -# | flow_content -# properties ::= TAG ANCHOR? | ANCHOR TAG? -# block_content ::= block_collection | flow_collection | SCALAR -# flow_content ::= flow_collection | SCALAR -# block_collection ::= block_sequence | block_mapping -# flow_collection ::= flow_sequence | flow_mapping -# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END -# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -# block_mapping ::= BLOCK-MAPPING_START -# ((KEY block_node_or_indentless_sequence?)? -# (VALUE block_node_or_indentless_sequence?)?)* -# BLOCK-END -# flow_sequence ::= FLOW-SEQUENCE-START -# (flow_sequence_entry FLOW-ENTRY)* -# flow_sequence_entry? -# FLOW-SEQUENCE-END -# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# flow_mapping ::= FLOW-MAPPING-START -# (flow_mapping_entry FLOW-ENTRY)* -# flow_mapping_entry? -# FLOW-MAPPING-END -# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# -# FIRST sets: -# -# stream: { STREAM-START } -# explicit_document: { DIRECTIVE DOCUMENT-START } -# implicit_document: FIRST(block_node) -# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_sequence: { BLOCK-SEQUENCE-START } -# block_mapping: { BLOCK-MAPPING-START } -# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } -# indentless_sequence: { ENTRY } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_sequence: { FLOW-SEQUENCE-START } -# flow_mapping: { FLOW-MAPPING-START } -# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } -# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } - -__all__ = ['Parser', 'ParserError'] - -from error import MarkedYAMLError -from tokens import * -from events import * -from scanner import * - -class ParserError(MarkedYAMLError): - pass - -class Parser(object): - # Since writing a recursive-descendant parser is a straightforward task, we - # do not give many comments here. - - DEFAULT_TAGS = { - u'!': u'!', - u'!!': u'tag:yaml.org,2002:', - } - - def __init__(self): - self.current_event = None - self.yaml_version = None - self.tag_handles = {} - self.states = [] - self.marks = [] - self.state = self.parse_stream_start - - def dispose(self): - # Reset the state attributes (to clear self-references) - self.states = [] - self.state = None - - def check_event(self, *choices): - # Check the type of the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - if self.current_event is not None: - if not choices: - return True - for choice in choices: - if isinstance(self.current_event, choice): - return True - return False - - def peek_event(self): - # Get the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - return self.current_event - - def get_event(self): - # Get the next event and proceed further. - if self.current_event is None: - if self.state: - self.current_event = self.state() - value = self.current_event - self.current_event = None - return value - - # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END - # implicit_document ::= block_node DOCUMENT-END* - # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - - def parse_stream_start(self): - - # Parse the stream start. - token = self.get_token() - event = StreamStartEvent(token.start_mark, token.end_mark, - encoding=token.encoding) - - # Prepare the next state. - self.state = self.parse_implicit_document_start - - return event - - def parse_implicit_document_start(self): - - # Parse an implicit document. - if not self.check_token(DirectiveToken, DocumentStartToken, - StreamEndToken): - self.tag_handles = self.DEFAULT_TAGS - token = self.peek_token() - start_mark = end_mark = token.start_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=False) - - # Prepare the next state. - self.states.append(self.parse_document_end) - self.state = self.parse_block_node - - return event - - else: - return self.parse_document_start() - - def parse_document_start(self): - - # Parse any extra document end indicators. - while self.check_token(DocumentEndToken): - self.get_token() - - # Parse an explicit document. - if not self.check_token(StreamEndToken): - token = self.peek_token() - start_mark = token.start_mark - version, tags = self.process_directives() - if not self.check_token(DocumentStartToken): - raise ParserError(None, None, - "expected '<document start>', but found %r" - % self.peek_token().id, - self.peek_token().start_mark) - token = self.get_token() - end_mark = token.end_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=True, version=version, tags=tags) - self.states.append(self.parse_document_end) - self.state = self.parse_document_content - else: - # Parse the end of the stream. - token = self.get_token() - event = StreamEndEvent(token.start_mark, token.end_mark) - assert not self.states - assert not self.marks - self.state = None - return event - - def parse_document_end(self): - - # Parse the document end. - token = self.peek_token() - start_mark = end_mark = token.start_mark - explicit = False - if self.check_token(DocumentEndToken): - token = self.get_token() - end_mark = token.end_mark - explicit = True - event = DocumentEndEvent(start_mark, end_mark, - explicit=explicit) - - # Prepare the next state. - self.state = self.parse_document_start - - return event - - def parse_document_content(self): - if self.check_token(DirectiveToken, - DocumentStartToken, DocumentEndToken, StreamEndToken): - event = self.process_empty_scalar(self.peek_token().start_mark) - self.state = self.states.pop() - return event - else: - return self.parse_block_node() - - def process_directives(self): - self.yaml_version = None - self.tag_handles = {} - while self.check_token(DirectiveToken): - token = self.get_token() - if token.name == u'YAML': - if self.yaml_version is not None: - raise ParserError(None, None, - "found duplicate YAML directive", token.start_mark) - major, minor = token.value - if major != 1: - raise ParserError(None, None, - "found incompatible YAML document (version 1.* is required)", - token.start_mark) - self.yaml_version = token.value - elif token.name == u'TAG': - handle, prefix = token.value - if handle in self.tag_handles: - raise ParserError(None, None, - "duplicate tag handle %r" % handle.encode('utf-8'), - token.start_mark) - self.tag_handles[handle] = prefix - if self.tag_handles: - value = self.yaml_version, self.tag_handles.copy() - else: - value = self.yaml_version, None - for key in self.DEFAULT_TAGS: - if key not in self.tag_handles: - self.tag_handles[key] = self.DEFAULT_TAGS[key] - return value - - # block_node_or_indentless_sequence ::= ALIAS - # | properties (block_content | indentless_block_sequence)? - # | block_content - # | indentless_block_sequence - # block_node ::= ALIAS - # | properties block_content? - # | block_content - # flow_node ::= ALIAS - # | properties flow_content? - # | flow_content - # properties ::= TAG ANCHOR? | ANCHOR TAG? - # block_content ::= block_collection | flow_collection | SCALAR - # flow_content ::= flow_collection | SCALAR - # block_collection ::= block_sequence | block_mapping - # flow_collection ::= flow_sequence | flow_mapping - - def parse_block_node(self): - return self.parse_node(block=True) - - def parse_flow_node(self): - return self.parse_node() - - def parse_block_node_or_indentless_sequence(self): - return self.parse_node(block=True, indentless_sequence=True) - - def parse_node(self, block=False, indentless_sequence=False): - if self.check_token(AliasToken): - token = self.get_token() - event = AliasEvent(token.value, token.start_mark, token.end_mark) - self.state = self.states.pop() - else: - anchor = None - tag = None - start_mark = end_mark = tag_mark = None - if self.check_token(AnchorToken): - token = self.get_token() - start_mark = token.start_mark - end_mark = token.end_mark - anchor = token.value - if self.check_token(TagToken): - token = self.get_token() - tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - elif self.check_token(TagToken): - token = self.get_token() - start_mark = tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - if self.check_token(AnchorToken): - token = self.get_token() - end_mark = token.end_mark - anchor = token.value - if tag is not None: - handle, suffix = tag - if handle is not None: - if handle not in self.tag_handles: - raise ParserError("while parsing a node", start_mark, - "found undefined tag handle %r" % handle.encode('utf-8'), - tag_mark) - tag = self.tag_handles[handle]+suffix - else: - tag = suffix - #if tag == u'!': - # raise ParserError("while parsing a node", start_mark, - # "found non-specific tag '!'", tag_mark, - # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") - if start_mark is None: - start_mark = end_mark = self.peek_token().start_mark - event = None - implicit = (tag is None or tag == u'!') - if indentless_sequence and self.check_token(BlockEntryToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark) - self.state = self.parse_indentless_sequence_entry - else: - if self.check_token(ScalarToken): - token = self.get_token() - end_mark = token.end_mark - if (token.plain and tag is None) or tag == u'!': - implicit = (True, False) - elif tag is None: - implicit = (False, True) - else: - implicit = (False, False) - event = ScalarEvent(anchor, tag, implicit, token.value, - start_mark, end_mark, style=token.style) - self.state = self.states.pop() - elif self.check_token(FlowSequenceStartToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_sequence_first_entry - elif self.check_token(FlowMappingStartToken): - end_mark = self.peek_token().end_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_mapping_first_key - elif block and self.check_token(BlockSequenceStartToken): - end_mark = self.peek_token().start_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_sequence_first_entry - elif block and self.check_token(BlockMappingStartToken): - end_mark = self.peek_token().start_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_mapping_first_key - elif anchor is not None or tag is not None: - # Empty scalars are allowed even if a tag or an anchor is - # specified. - event = ScalarEvent(anchor, tag, (implicit, False), u'', - start_mark, end_mark) - self.state = self.states.pop() - else: - if block: - node = 'block' - else: - node = 'flow' - token = self.peek_token() - raise ParserError("while parsing a %s node" % node, start_mark, - "expected the node content, but found %r" % token.id, - token.start_mark) - return event - - # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - - def parse_block_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_sequence_entry() - - def parse_block_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, BlockEndToken): - self.states.append(self.parse_block_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_block_sequence_entry - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block collection", self.marks[-1], - "expected <block end>, but found %r" % token.id, token.start_mark) - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - - def parse_indentless_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, - KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_indentless_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_indentless_sequence_entry - return self.process_empty_scalar(token.end_mark) - token = self.peek_token() - event = SequenceEndEvent(token.start_mark, token.start_mark) - self.state = self.states.pop() - return event - - # block_mapping ::= BLOCK-MAPPING_START - # ((KEY block_node_or_indentless_sequence?)? - # (VALUE block_node_or_indentless_sequence?)?)* - # BLOCK-END - - def parse_block_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_mapping_key() - - def parse_block_mapping_key(self): - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_value) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_value - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block mapping", self.marks[-1], - "expected <block end>, but found %r" % token.id, token.start_mark) - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_block_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_key) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_block_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - # flow_sequence ::= FLOW-SEQUENCE-START - # (flow_sequence_entry FLOW-ENTRY)* - # flow_sequence_entry? - # FLOW-SEQUENCE-END - # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - # - # Note that while production rules for both flow_sequence_entry and - # flow_mapping_entry are equal, their interpretations are different. - # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` - # generate an inline mapping (set syntax). - - def parse_flow_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_sequence_entry(first=True) - - def parse_flow_sequence_entry(self, first=False): - if not self.check_token(FlowSequenceEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow sequence", self.marks[-1], - "expected ',' or ']', but got %r" % token.id, token.start_mark) - - if self.check_token(KeyToken): - token = self.peek_token() - event = MappingStartEvent(None, None, True, - token.start_mark, token.end_mark, - flow_style=True) - self.state = self.parse_flow_sequence_entry_mapping_key - return event - elif not self.check_token(FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry) - return self.parse_flow_node() - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_sequence_entry_mapping_key(self): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_value - return self.process_empty_scalar(token.end_mark) - - def parse_flow_sequence_entry_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_end) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_end - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_sequence_entry_mapping_end - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_sequence_entry_mapping_end(self): - self.state = self.parse_flow_sequence_entry - token = self.peek_token() - return MappingEndEvent(token.start_mark, token.start_mark) - - # flow_mapping ::= FLOW-MAPPING-START - # (flow_mapping_entry FLOW-ENTRY)* - # flow_mapping_entry? - # FLOW-MAPPING-END - # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - - def parse_flow_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_mapping_key(first=True) - - def parse_flow_mapping_key(self, first=False): - if not self.check_token(FlowMappingEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow mapping", self.marks[-1], - "expected ',' or '}', but got %r" % token.id, token.start_mark) - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_value - return self.process_empty_scalar(token.end_mark) - elif not self.check_token(FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_empty_value) - return self.parse_flow_node() - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_key) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_mapping_empty_value(self): - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(self.peek_token().start_mark) - - def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), u'', mark, mark) - diff --git a/libs/yaml2.7/reader.py b/libs/yaml2.7/reader.py deleted file mode 100644 index b2f10b091..000000000 --- a/libs/yaml2.7/reader.py +++ /dev/null @@ -1,188 +0,0 @@ -# This module contains abstractions for the input stream. You don't have to -# looks further, there are no pretty code. -# -# We define two classes here. -# -# Mark(source, line, column) -# It's just a record and its only use is producing nice error messages. -# Parser does not use it for any other purposes. -# -# Reader(source, data) -# Reader determines the encoding of `data` and converts it to unicode. -# Reader provides the following methods and attributes: -# reader.peek(length=1) - return the next `length` characters -# reader.forward(length=1) - move the current position to `length` characters. -# reader.index - the number of the current character. -# reader.line, stream.column - the line and the column of the current character. - -__all__ = ['Reader', 'ReaderError'] - -from error import YAMLError, Mark - -import codecs, re, sys - -has_ucs4 = sys.maxunicode > 0xffff - -class ReaderError(YAMLError): - - def __init__(self, name, position, character, encoding, reason): - self.name = name - self.character = character - self.position = position - self.encoding = encoding - self.reason = reason - - def __str__(self): - if isinstance(self.character, str): - return "'%s' codec can't decode byte #x%02x: %s\n" \ - " in \"%s\", position %d" \ - % (self.encoding, ord(self.character), self.reason, - self.name, self.position) - else: - return "unacceptable character #x%04x: %s\n" \ - " in \"%s\", position %d" \ - % (self.character, self.reason, - self.name, self.position) - -class Reader(object): - # Reader: - # - determines the data encoding and converts it to unicode, - # - checks if characters are in allowed range, - # - adds '\0' to the end. - - # Reader accepts - # - a `str` object, - # - a `unicode` object, - # - a file-like object with its `read` method returning `str`, - # - a file-like object with its `read` method returning `unicode`. - - # Yeah, it's ugly and slow. - - def __init__(self, stream): - self.name = None - self.stream = None - self.stream_pointer = 0 - self.eof = True - self.buffer = u'' - self.pointer = 0 - self.raw_buffer = None - self.raw_decode = None - self.encoding = None - self.index = 0 - self.line = 0 - self.column = 0 - if isinstance(stream, unicode): - self.name = "<unicode string>" - self.check_printable(stream) - self.buffer = stream+u'\0' - elif isinstance(stream, str): - self.name = "<string>" - self.raw_buffer = stream - self.determine_encoding() - else: - self.stream = stream - self.name = getattr(stream, 'name', "<file>") - self.eof = False - self.raw_buffer = '' - self.determine_encoding() - - def peek(self, index=0): - try: - return self.buffer[self.pointer+index] - except IndexError: - self.update(index+1) - return self.buffer[self.pointer+index] - - def prefix(self, length=1): - if self.pointer+length >= len(self.buffer): - self.update(length) - return self.buffer[self.pointer:self.pointer+length] - - def forward(self, length=1): - if self.pointer+length+1 >= len(self.buffer): - self.update(length+1) - while length: - ch = self.buffer[self.pointer] - self.pointer += 1 - self.index += 1 - if ch in u'\n\x85\u2028\u2029' \ - or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): - self.line += 1 - self.column = 0 - elif ch != u'\uFEFF': - self.column += 1 - length -= 1 - - def get_mark(self): - if self.stream is None: - return Mark(self.name, self.index, self.line, self.column, - self.buffer, self.pointer) - else: - return Mark(self.name, self.index, self.line, self.column, - None, None) - - def determine_encoding(self): - while not self.eof and len(self.raw_buffer) < 2: - self.update_raw() - if not isinstance(self.raw_buffer, unicode): - if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): - self.raw_decode = codecs.utf_16_le_decode - self.encoding = 'utf-16-le' - elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): - self.raw_decode = codecs.utf_16_be_decode - self.encoding = 'utf-16-be' - else: - self.raw_decode = codecs.utf_8_decode - self.encoding = 'utf-8' - self.update(1) - - if has_ucs4: - NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]') - else: - NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') - def check_printable(self, data): - match = self.NON_PRINTABLE.search(data) - if match: - character = match.group() - position = self.index+(len(self.buffer)-self.pointer)+match.start() - raise ReaderError(self.name, position, ord(character), - 'unicode', "special characters are not allowed") - - def update(self, length): - if self.raw_buffer is None: - return - self.buffer = self.buffer[self.pointer:] - self.pointer = 0 - while len(self.buffer) < length: - if not self.eof: - self.update_raw() - if self.raw_decode is not None: - try: - data, converted = self.raw_decode(self.raw_buffer, - 'strict', self.eof) - except UnicodeDecodeError, exc: - character = exc.object[exc.start] - if self.stream is not None: - position = self.stream_pointer-len(self.raw_buffer)+exc.start - else: - position = exc.start - raise ReaderError(self.name, position, character, - exc.encoding, exc.reason) - else: - data = self.raw_buffer - converted = len(data) - self.check_printable(data) - self.buffer += data - self.raw_buffer = self.raw_buffer[converted:] - if self.eof: - self.buffer += u'\0' - self.raw_buffer = None - break - - def update_raw(self, size=1024): - data = self.stream.read(size) - if data: - self.raw_buffer += data - self.stream_pointer += len(data) - else: - self.eof = True diff --git a/libs/yaml2.7/representer.py b/libs/yaml2.7/representer.py deleted file mode 100644 index 9dca41af7..000000000 --- a/libs/yaml2.7/representer.py +++ /dev/null @@ -1,488 +0,0 @@ - -__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', - 'RepresenterError'] - -from error import * -from nodes import * - -import datetime - -import sys, copy_reg, types - -class RepresenterError(YAMLError): - pass - -class BaseRepresenter(object): - - yaml_representers = {} - yaml_multi_representers = {} - - def __init__(self, default_style=None, default_flow_style=False, sort_keys=True): - self.default_style = default_style - self.default_flow_style = default_flow_style - self.sort_keys = sort_keys - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent(self, data): - node = self.represent_data(data) - self.serialize(node) - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def get_classobj_bases(self, cls): - bases = [cls] - for base in cls.__bases__: - bases.extend(self.get_classobj_bases(base)) - return bases - - def represent_data(self, data): - if self.ignore_aliases(data): - self.alias_key = None - else: - self.alias_key = id(data) - if self.alias_key is not None: - if self.alias_key in self.represented_objects: - node = self.represented_objects[self.alias_key] - #if node is None: - # raise RepresenterError("recursive objects are not allowed: %r" % data) - return node - #self.represented_objects[alias_key] = None - self.object_keeper.append(data) - data_types = type(data).__mro__ - if type(data) is types.InstanceType: - data_types = self.get_classobj_bases(data.__class__)+list(data_types) - if data_types[0] in self.yaml_representers: - node = self.yaml_representers[data_types[0]](self, data) - else: - for data_type in data_types: - if data_type in self.yaml_multi_representers: - node = self.yaml_multi_representers[data_type](self, data) - break - else: - if None in self.yaml_multi_representers: - node = self.yaml_multi_representers[None](self, data) - elif None in self.yaml_representers: - node = self.yaml_representers[None](self, data) - else: - node = ScalarNode(None, unicode(data)) - #if alias_key is not None: - # self.represented_objects[alias_key] = node - return node - - def add_representer(cls, data_type, representer): - if not 'yaml_representers' in cls.__dict__: - cls.yaml_representers = cls.yaml_representers.copy() - cls.yaml_representers[data_type] = representer - add_representer = classmethod(add_representer) - - def add_multi_representer(cls, data_type, representer): - if not 'yaml_multi_representers' in cls.__dict__: - cls.yaml_multi_representers = cls.yaml_multi_representers.copy() - cls.yaml_multi_representers[data_type] = representer - add_multi_representer = classmethod(add_multi_representer) - - def represent_scalar(self, tag, value, style=None): - if style is None: - style = self.default_style - node = ScalarNode(tag, value, style=style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - return node - - def represent_sequence(self, tag, sequence, flow_style=None): - value = [] - node = SequenceNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - for item in sequence: - node_item = self.represent_data(item) - if not (isinstance(node_item, ScalarNode) and not node_item.style): - best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - if hasattr(mapping, 'items'): - mapping = mapping.items() - if self.sort_keys: - mapping.sort() - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def ignore_aliases(self, data): - return False - -class SafeRepresenter(BaseRepresenter): - - def ignore_aliases(self, data): - if data is None: - return True - if isinstance(data, tuple) and data == (): - return True - if isinstance(data, (str, unicode, bool, int, float)): - return True - - def represent_none(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:null', - u'null') - - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:str', data) - - def represent_bool(self, data): - if data: - value = u'true' - else: - value = u'false' - return self.represent_scalar(u'tag:yaml.org,2002:bool', value) - - def represent_int(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - - def represent_long(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - - inf_value = 1e300 - while repr(inf_value) != repr(inf_value*inf_value): - inf_value *= inf_value - - def represent_float(self, data): - if data != data or (data == 0.0 and data == 1.0): - value = u'.nan' - elif data == self.inf_value: - value = u'.inf' - elif data == -self.inf_value: - value = u'-.inf' - else: - value = unicode(repr(data)).lower() - # Note that in some cases `repr(data)` represents a float number - # without the decimal parts. For instance: - # >>> repr(1e17) - # '1e17' - # Unfortunately, this is not a valid float representation according - # to the definition of the `!!float` tag. We fix this by adding - # '.0' before the 'e' symbol. - if u'.' not in value and u'e' in value: - value = value.replace(u'e', u'.0e', 1) - return self.represent_scalar(u'tag:yaml.org,2002:float', value) - - def represent_list(self, data): - #pairs = (len(data) > 0 and isinstance(data, list)) - #if pairs: - # for item in data: - # if not isinstance(item, tuple) or len(item) != 2: - # pairs = False - # break - #if not pairs: - return self.represent_sequence(u'tag:yaml.org,2002:seq', data) - #value = [] - #for item_key, item_value in data: - # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', - # [(item_key, item_value)])) - #return SequenceNode(u'tag:yaml.org,2002:pairs', value) - - def represent_dict(self, data): - return self.represent_mapping(u'tag:yaml.org,2002:map', data) - - def represent_set(self, data): - value = {} - for key in data: - value[key] = None - return self.represent_mapping(u'tag:yaml.org,2002:set', value) - - def represent_date(self, data): - value = unicode(data.isoformat()) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - - def represent_datetime(self, data): - value = unicode(data.isoformat(' ')) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - - def represent_yaml_object(self, tag, data, cls, flow_style=None): - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__.copy() - return self.represent_mapping(tag, state, flow_style=flow_style) - - def represent_undefined(self, data): - raise RepresenterError("cannot represent an object", data) - -SafeRepresenter.add_representer(type(None), - SafeRepresenter.represent_none) - -SafeRepresenter.add_representer(str, - SafeRepresenter.represent_str) - -SafeRepresenter.add_representer(unicode, - SafeRepresenter.represent_unicode) - -SafeRepresenter.add_representer(bool, - SafeRepresenter.represent_bool) - -SafeRepresenter.add_representer(int, - SafeRepresenter.represent_int) - -SafeRepresenter.add_representer(long, - SafeRepresenter.represent_long) - -SafeRepresenter.add_representer(float, - SafeRepresenter.represent_float) - -SafeRepresenter.add_representer(list, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(tuple, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(dict, - SafeRepresenter.represent_dict) - -SafeRepresenter.add_representer(set, - SafeRepresenter.represent_set) - -SafeRepresenter.add_representer(datetime.date, - SafeRepresenter.represent_date) - -SafeRepresenter.add_representer(datetime.datetime, - SafeRepresenter.represent_datetime) - -SafeRepresenter.add_representer(None, - SafeRepresenter.represent_undefined) - -class Representer(SafeRepresenter): - - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:python/str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - tag = None - try: - data.encode('ascii') - tag = u'tag:yaml.org,2002:python/unicode' - except UnicodeEncodeError: - tag = u'tag:yaml.org,2002:str' - return self.represent_scalar(tag, data) - - def represent_long(self, data): - tag = u'tag:yaml.org,2002:int' - if int(data) is not data: - tag = u'tag:yaml.org,2002:python/long' - return self.represent_scalar(tag, unicode(data)) - - def represent_complex(self, data): - if data.imag == 0.0: - data = u'%r' % data.real - elif data.real == 0.0: - data = u'%rj' % data.imag - elif data.imag > 0: - data = u'%r+%rj' % (data.real, data.imag) - else: - data = u'%r%rj' % (data.real, data.imag) - return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) - - def represent_tuple(self, data): - return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) - - def represent_name(self, data): - name = u'%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') - - def represent_module(self, data): - return self.represent_scalar( - u'tag:yaml.org,2002:python/module:'+data.__name__, u'') - - def represent_instance(self, data): - # For instances of classic classes, we use __getinitargs__ and - # __getstate__ to serialize the data. - - # If data.__getinitargs__ exists, the object must be reconstructed by - # calling cls(**args), where args is a tuple returned by - # __getinitargs__. Otherwise, the cls.__init__ method should never be - # called and the class instance is created by instantiating a trivial - # class and assigning to the instance's __class__ variable. - - # If data.__getstate__ exists, it returns the state of the object. - # Otherwise, the state of the object is data.__dict__. - - # We produce either a !!python/object or !!python/object/new node. - # If data.__getinitargs__ does not exist and state is a dictionary, we - # produce a !!python/object node . Otherwise we produce a - # !!python/object/new node. - - cls = data.__class__ - class_name = u'%s.%s' % (cls.__module__, cls.__name__) - args = None - state = None - if hasattr(data, '__getinitargs__'): - args = list(data.__getinitargs__()) - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__ - if args is None and isinstance(state, dict): - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+class_name, state) - if isinstance(state, dict) and not state: - return self.represent_sequence( - u'tag:yaml.org,2002:python/object/new:'+class_name, args) - value = {} - if args: - value['args'] = args - value['state'] = state - return self.represent_mapping( - u'tag:yaml.org,2002:python/object/new:'+class_name, value) - - def represent_object(self, data): - # We use __reduce__ API to save the data. data.__reduce__ returns - # a tuple of length 2-5: - # (function, args, state, listitems, dictitems) - - # For reconstructing, we calls function(*args), then set its state, - # listitems, and dictitems if they are not None. - - # A special case is when function.__name__ == '__newobj__'. In this - # case we create the object with args[0].__new__(*args). - - # Another special case is when __reduce__ returns a string - we don't - # support it. - - # We produce a !!python/object, !!python/object/new or - # !!python/object/apply node. - - cls = type(data) - if cls in copy_reg.dispatch_table: - reduce = copy_reg.dispatch_table[cls](data) - elif hasattr(data, '__reduce_ex__'): - reduce = data.__reduce_ex__(2) - elif hasattr(data, '__reduce__'): - reduce = data.__reduce__() - else: - raise RepresenterError("cannot represent an object", data) - reduce = (list(reduce)+[None]*5)[:5] - function, args, state, listitems, dictitems = reduce - args = list(args) - if state is None: - state = {} - if listitems is not None: - listitems = list(listitems) - if dictitems is not None: - dictitems = dict(dictitems) - if function.__name__ == '__newobj__': - function = args[0] - args = args[1:] - tag = u'tag:yaml.org,2002:python/object/new:' - newobj = True - else: - tag = u'tag:yaml.org,2002:python/object/apply:' - newobj = False - function_name = u'%s.%s' % (function.__module__, function.__name__) - if not args and not listitems and not dictitems \ - and isinstance(state, dict) and newobj: - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+function_name, state) - if not listitems and not dictitems \ - and isinstance(state, dict) and not state: - return self.represent_sequence(tag+function_name, args) - value = {} - if args: - value['args'] = args - if state or not isinstance(state, dict): - value['state'] = state - if listitems: - value['listitems'] = listitems - if dictitems: - value['dictitems'] = dictitems - return self.represent_mapping(tag+function_name, value) - -Representer.add_representer(str, - Representer.represent_str) - -Representer.add_representer(unicode, - Representer.represent_unicode) - -Representer.add_representer(long, - Representer.represent_long) - -Representer.add_representer(complex, - Representer.represent_complex) - -Representer.add_representer(tuple, - Representer.represent_tuple) - -Representer.add_representer(type, - Representer.represent_name) - -Representer.add_representer(types.ClassType, - Representer.represent_name) - -Representer.add_representer(types.FunctionType, - Representer.represent_name) - -Representer.add_representer(types.BuiltinFunctionType, - Representer.represent_name) - -Representer.add_representer(types.ModuleType, - Representer.represent_module) - -Representer.add_multi_representer(types.InstanceType, - Representer.represent_instance) - -Representer.add_multi_representer(object, - Representer.represent_object) - diff --git a/libs/yaml2.7/resolver.py b/libs/yaml2.7/resolver.py deleted file mode 100644 index 528fbc0ea..000000000 --- a/libs/yaml2.7/resolver.py +++ /dev/null @@ -1,227 +0,0 @@ - -__all__ = ['BaseResolver', 'Resolver'] - -from error import * -from nodes import * - -import re - -class ResolverError(YAMLError): - pass - -class BaseResolver(object): - - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' - - yaml_implicit_resolvers = {} - yaml_path_resolvers = {} - - def __init__(self): - self.resolver_exact_paths = [] - self.resolver_prefix_paths = [] - - def add_implicit_resolver(cls, tag, regexp, first): - if not 'yaml_implicit_resolvers' in cls.__dict__: - implicit_resolvers = {} - for key in cls.yaml_implicit_resolvers: - implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:] - cls.yaml_implicit_resolvers = implicit_resolvers - if first is None: - first = [None] - for ch in first: - cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) - add_implicit_resolver = classmethod(add_implicit_resolver) - - def add_path_resolver(cls, tag, path, kind=None): - # Note: `add_path_resolver` is experimental. The API could be changed. - # `new_path` is a pattern that is matched against the path from the - # root to the node that is being considered. `node_path` elements are - # tuples `(node_check, index_check)`. `node_check` is a node class: - # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` - # matches any kind of a node. `index_check` could be `None`, a boolean - # value, a string value, or a number. `None` and `False` match against - # any _value_ of sequence and mapping nodes. `True` matches against - # any _key_ of a mapping node. A string `index_check` matches against - # a mapping value that corresponds to a scalar key which content is - # equal to the `index_check` value. An integer `index_check` matches - # against a sequence value with the index equal to `index_check`. - if not 'yaml_path_resolvers' in cls.__dict__: - cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() - new_path = [] - for element in path: - if isinstance(element, (list, tuple)): - if len(element) == 2: - node_check, index_check = element - elif len(element) == 1: - node_check = element[0] - index_check = True - else: - raise ResolverError("Invalid path element: %s" % element) - else: - node_check = None - index_check = element - if node_check is str: - node_check = ScalarNode - elif node_check is list: - node_check = SequenceNode - elif node_check is dict: - node_check = MappingNode - elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, basestring) \ - and node_check is not None: - raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (basestring, int)) \ - and index_check is not None: - raise ResolverError("Invalid index checker: %s" % index_check) - new_path.append((node_check, index_check)) - if kind is str: - kind = ScalarNode - elif kind is list: - kind = SequenceNode - elif kind is dict: - kind = MappingNode - elif kind not in [ScalarNode, SequenceNode, MappingNode] \ - and kind is not None: - raise ResolverError("Invalid node kind: %s" % kind) - cls.yaml_path_resolvers[tuple(new_path), kind] = tag - add_path_resolver = classmethod(add_path_resolver) - - def descend_resolver(self, current_node, current_index): - if not self.yaml_path_resolvers: - return - exact_paths = {} - prefix_paths = [] - if current_node: - depth = len(self.resolver_prefix_paths) - for path, kind in self.resolver_prefix_paths[-1]: - if self.check_resolver_prefix(depth, path, kind, - current_node, current_index): - if len(path) > depth: - prefix_paths.append((path, kind)) - else: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - for path, kind in self.yaml_path_resolvers: - if not path: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - prefix_paths.append((path, kind)) - self.resolver_exact_paths.append(exact_paths) - self.resolver_prefix_paths.append(prefix_paths) - - def ascend_resolver(self): - if not self.yaml_path_resolvers: - return - self.resolver_exact_paths.pop() - self.resolver_prefix_paths.pop() - - def check_resolver_prefix(self, depth, path, kind, - current_node, current_index): - node_check, index_check = path[depth-1] - if isinstance(node_check, basestring): - if current_node.tag != node_check: - return - elif node_check is not None: - if not isinstance(current_node, node_check): - return - if index_check is True and current_index is not None: - return - if (index_check is False or index_check is None) \ - and current_index is None: - return - if isinstance(index_check, basestring): - if not (isinstance(current_index, ScalarNode) - and index_check == current_index.value): - return - elif isinstance(index_check, int) and not isinstance(index_check, bool): - if index_check != current_index: - return - return True - - def resolve(self, kind, value, implicit): - if kind is ScalarNode and implicit[0]: - if value == u'': - resolvers = self.yaml_implicit_resolvers.get(u'', []) - else: - resolvers = self.yaml_implicit_resolvers.get(value[0], []) - resolvers += self.yaml_implicit_resolvers.get(None, []) - for tag, regexp in resolvers: - if regexp.match(value): - return tag - implicit = implicit[1] - if self.yaml_path_resolvers: - exact_paths = self.resolver_exact_paths[-1] - if kind in exact_paths: - return exact_paths[kind] - if None in exact_paths: - return exact_paths[None] - if kind is ScalarNode: - return self.DEFAULT_SCALAR_TAG - elif kind is SequenceNode: - return self.DEFAULT_SEQUENCE_TAG - elif kind is MappingNode: - return self.DEFAULT_MAPPING_TAG - -class Resolver(BaseResolver): - pass - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:bool', - re.compile(ur'''^(?:yes|Yes|YES|no|No|NO - |true|True|TRUE|false|False|FALSE - |on|On|ON|off|Off|OFF)$''', re.X), - list(u'yYnNtTfFoO')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:float', - re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? - |\.[0-9_]+(?:[eE][-+][0-9]+)? - |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* - |[-+]?\.(?:inf|Inf|INF) - |\.(?:nan|NaN|NAN))$''', re.X), - list(u'-+0123456789.')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:int', - re.compile(ur'''^(?:[-+]?0b[0-1_]+ - |[-+]?0[0-7_]+ - |[-+]?(?:0|[1-9][0-9_]*) - |[-+]?0x[0-9a-fA-F_]+ - |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list(u'-+0123456789')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:merge', - re.compile(ur'^(?:<<)$'), - [u'<']) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:null', - re.compile(ur'''^(?: ~ - |null|Null|NULL - | )$''', re.X), - [u'~', u'n', u'N', u'']) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:timestamp', - re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] - |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? - (?:[Tt]|[ \t]+)[0-9][0-9]? - :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? - (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list(u'0123456789')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:value', - re.compile(ur'^(?:=)$'), - [u'=']) - -# The following resolver is only for documentation purposes. It cannot work -# because plain scalars cannot start with '!', '&', or '*'. -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:yaml', - re.compile(ur'^(?:!|&|\*)$'), - list(u'!&*')) - diff --git a/libs/yaml2.7/scanner.py b/libs/yaml2.7/scanner.py deleted file mode 100644 index 5126cf07b..000000000 --- a/libs/yaml2.7/scanner.py +++ /dev/null @@ -1,1444 +0,0 @@ - -# Scanner produces tokens of the following types: -# STREAM-START -# STREAM-END -# DIRECTIVE(name, value) -# DOCUMENT-START -# DOCUMENT-END -# BLOCK-SEQUENCE-START -# BLOCK-MAPPING-START -# BLOCK-END -# FLOW-SEQUENCE-START -# FLOW-MAPPING-START -# FLOW-SEQUENCE-END -# FLOW-MAPPING-END -# BLOCK-ENTRY -# FLOW-ENTRY -# KEY -# VALUE -# ALIAS(value) -# ANCHOR(value) -# TAG(value) -# SCALAR(value, plain, style) -# -# Read comments in the Scanner code for more details. -# - -__all__ = ['Scanner', 'ScannerError'] - -from error import MarkedYAMLError -from tokens import * - -class ScannerError(MarkedYAMLError): - pass - -class SimpleKey(object): - # See below simple keys treatment. - - def __init__(self, token_number, required, index, line, column, mark): - self.token_number = token_number - self.required = required - self.index = index - self.line = line - self.column = column - self.mark = mark - -class Scanner(object): - - def __init__(self): - """Initialize the scanner.""" - # It is assumed that Scanner and Reader will have a common descendant. - # Reader do the dirty work of checking for BOM and converting the - # input data to Unicode. It also adds NUL to the end. - # - # Reader supports the following methods - # self.peek(i=0) # peek the next i-th character - # self.prefix(l=1) # peek the next l characters - # self.forward(l=1) # read the next l characters and move the pointer. - - # Had we reached the end of the stream? - self.done = False - - # The number of unclosed '{' and '['. `flow_level == 0` means block - # context. - self.flow_level = 0 - - # List of processed tokens that are not yet emitted. - self.tokens = [] - - # Add the STREAM-START token. - self.fetch_stream_start() - - # Number of tokens that were emitted through the `get_token` method. - self.tokens_taken = 0 - - # The current indentation level. - self.indent = -1 - - # Past indentation levels. - self.indents = [] - - # Variables related to simple keys treatment. - - # A simple key is a key that is not denoted by the '?' indicator. - # Example of simple keys: - # --- - # block simple key: value - # ? not a simple key: - # : { flow simple key: value } - # We emit the KEY token before all keys, so when we find a potential - # simple key, we try to locate the corresponding ':' indicator. - # Simple keys should be limited to a single line and 1024 characters. - - # Can a simple key start at the current position? A simple key may - # start: - # - at the beginning of the line, not counting indentation spaces - # (in block context), - # - after '{', '[', ',' (in the flow context), - # - after '?', ':', '-' (in the block context). - # In the block context, this flag also signifies if a block collection - # may start at the current position. - self.allow_simple_key = True - - # Keep track of possible simple keys. This is a dictionary. The key - # is `flow_level`; there can be no more that one possible simple key - # for each level. The value is a SimpleKey record: - # (token_number, required, index, line, column, mark) - # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), - # '[', or '{' tokens. - self.possible_simple_keys = {} - - # Public methods. - - def check_token(self, *choices): - # Check if the next token is one of the given types. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - # Return the next token, but do not delete if from the queue. - # Return None if no more tokens. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - return self.tokens[0] - else: - return None - - def get_token(self): - # Return the next token. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - self.tokens_taken += 1 - return self.tokens.pop(0) - - # Private methods. - - def need_more_tokens(self): - if self.done: - return False - if not self.tokens: - return True - # The current token may be a potential simple key, so we - # need to look further. - self.stale_possible_simple_keys() - if self.next_possible_simple_key() == self.tokens_taken: - return True - - def fetch_more_tokens(self): - - # Eat whitespaces and comments until we reach the next token. - self.scan_to_next_token() - - # Remove obsolete possible simple keys. - self.stale_possible_simple_keys() - - # Compare the current indentation and column. It may add some tokens - # and decrease the current indentation level. - self.unwind_indent(self.column) - - # Peek the next character. - ch = self.peek() - - # Is it the end of stream? - if ch == u'\0': - return self.fetch_stream_end() - - # Is it a directive? - if ch == u'%' and self.check_directive(): - return self.fetch_directive() - - # Is it the document start? - if ch == u'-' and self.check_document_start(): - return self.fetch_document_start() - - # Is it the document end? - if ch == u'.' and self.check_document_end(): - return self.fetch_document_end() - - # TODO: support for BOM within a stream. - #if ch == u'\uFEFF': - # return self.fetch_bom() <-- issue BOMToken - - # Note: the order of the following checks is NOT significant. - - # Is it the flow sequence start indicator? - if ch == u'[': - return self.fetch_flow_sequence_start() - - # Is it the flow mapping start indicator? - if ch == u'{': - return self.fetch_flow_mapping_start() - - # Is it the flow sequence end indicator? - if ch == u']': - return self.fetch_flow_sequence_end() - - # Is it the flow mapping end indicator? - if ch == u'}': - return self.fetch_flow_mapping_end() - - # Is it the flow entry indicator? - if ch == u',': - return self.fetch_flow_entry() - - # Is it the block entry indicator? - if ch == u'-' and self.check_block_entry(): - return self.fetch_block_entry() - - # Is it the key indicator? - if ch == u'?' and self.check_key(): - return self.fetch_key() - - # Is it the value indicator? - if ch == u':' and self.check_value(): - return self.fetch_value() - - # Is it an alias? - if ch == u'*': - return self.fetch_alias() - - # Is it an anchor? - if ch == u'&': - return self.fetch_anchor() - - # Is it a tag? - if ch == u'!': - return self.fetch_tag() - - # Is it a literal scalar? - if ch == u'|' and not self.flow_level: - return self.fetch_literal() - - # Is it a folded scalar? - if ch == u'>' and not self.flow_level: - return self.fetch_folded() - - # Is it a single quoted scalar? - if ch == u'\'': - return self.fetch_single() - - # Is it a double quoted scalar? - if ch == u'\"': - return self.fetch_double() - - # It must be a plain scalar then. - if self.check_plain(): - return self.fetch_plain() - - # No? It's an error. Let's produce a nice error message. - raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" - % ch.encode('utf-8'), self.get_mark()) - - # Simple keys treatment. - - def next_possible_simple_key(self): - # Return the number of the nearest possible simple key. Actually we - # don't need to loop through the whole dictionary. We may replace it - # with the following code: - # if not self.possible_simple_keys: - # return None - # return self.possible_simple_keys[ - # min(self.possible_simple_keys.keys())].token_number - min_token_number = None - for level in self.possible_simple_keys: - key = self.possible_simple_keys[level] - if min_token_number is None or key.token_number < min_token_number: - min_token_number = key.token_number - return min_token_number - - def stale_possible_simple_keys(self): - # Remove entries that are no longer possible simple keys. According to - # the YAML specification, simple keys - # - should be limited to a single line, - # - should be no longer than 1024 characters. - # Disabling this procedure will allow simple keys of any length and - # height (may cause problems if indentation is broken though). - for level in self.possible_simple_keys.keys(): - key = self.possible_simple_keys[level] - if key.line != self.line \ - or self.index-key.index > 1024: - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - del self.possible_simple_keys[level] - - def save_possible_simple_key(self): - # The next token may start a simple key. We check if it's possible - # and save its position. This function is called for - # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. - - # Check if a simple key is required at the current position. - required = not self.flow_level and self.indent == self.column - - # The next token might be a simple key. Let's save it's number and - # position. - if self.allow_simple_key: - self.remove_possible_simple_key() - token_number = self.tokens_taken+len(self.tokens) - key = SimpleKey(token_number, required, - self.index, self.line, self.column, self.get_mark()) - self.possible_simple_keys[self.flow_level] = key - - def remove_possible_simple_key(self): - # Remove the saved possible key position at the current flow level. - if self.flow_level in self.possible_simple_keys: - key = self.possible_simple_keys[self.flow_level] - - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not find expected ':'", self.get_mark()) - - del self.possible_simple_keys[self.flow_level] - - # Indentation functions. - - def unwind_indent(self, column): - - ## In flow context, tokens should respect indentation. - ## Actually the condition should be `self.indent >= column` according to - ## the spec. But this condition will prohibit intuitively correct - ## constructions such as - ## key : { - ## } - #if self.flow_level and self.indent > column: - # raise ScannerError(None, None, - # "invalid intendation or unclosed '[' or '{'", - # self.get_mark()) - - # In the flow context, indentation is ignored. We make the scanner less - # restrictive then specification requires. - if self.flow_level: - return - - # In block context, we may need to issue the BLOCK-END tokens. - while self.indent > column: - mark = self.get_mark() - self.indent = self.indents.pop() - self.tokens.append(BlockEndToken(mark, mark)) - - def add_indent(self, column): - # Check if we need to increase indentation. - if self.indent < column: - self.indents.append(self.indent) - self.indent = column - return True - return False - - # Fetchers. - - def fetch_stream_start(self): - # We always add STREAM-START as the first token and STREAM-END as the - # last token. - - # Read the token. - mark = self.get_mark() - - # Add STREAM-START. - self.tokens.append(StreamStartToken(mark, mark, - encoding=self.encoding)) - - - def fetch_stream_end(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - self.possible_simple_keys = {} - - # Read the token. - mark = self.get_mark() - - # Add STREAM-END. - self.tokens.append(StreamEndToken(mark, mark)) - - # The steam is finished. - self.done = True - - def fetch_directive(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Scan and add DIRECTIVE. - self.tokens.append(self.scan_directive()) - - def fetch_document_start(self): - self.fetch_document_indicator(DocumentStartToken) - - def fetch_document_end(self): - self.fetch_document_indicator(DocumentEndToken) - - def fetch_document_indicator(self, TokenClass): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. Note that there could not be a block collection - # after '---'. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Add DOCUMENT-START or DOCUMENT-END. - start_mark = self.get_mark() - self.forward(3) - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_start(self): - self.fetch_flow_collection_start(FlowSequenceStartToken) - - def fetch_flow_mapping_start(self): - self.fetch_flow_collection_start(FlowMappingStartToken) - - def fetch_flow_collection_start(self, TokenClass): - - # '[' and '{' may start a simple key. - self.save_possible_simple_key() - - # Increase the flow level. - self.flow_level += 1 - - # Simple keys are allowed after '[' and '{'. - self.allow_simple_key = True - - # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_end(self): - self.fetch_flow_collection_end(FlowSequenceEndToken) - - def fetch_flow_mapping_end(self): - self.fetch_flow_collection_end(FlowMappingEndToken) - - def fetch_flow_collection_end(self, TokenClass): - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Decrease the flow level. - self.flow_level -= 1 - - # No simple keys after ']' or '}'. - self.allow_simple_key = False - - # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_entry(self): - - # Simple keys are allowed after ','. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add FLOW-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(FlowEntryToken(start_mark, end_mark)) - - def fetch_block_entry(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a new entry? - if not self.allow_simple_key: - raise ScannerError(None, None, - "sequence entries are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-SEQUENCE-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockSequenceStartToken(mark, mark)) - - # It's an error for the block entry to occur in the flow context, - # but we let the parser detect this. - else: - pass - - # Simple keys are allowed after '-'. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add BLOCK-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(BlockEntryToken(start_mark, end_mark)) - - def fetch_key(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a key (not necessary a simple)? - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping keys are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-MAPPING-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after '?' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add KEY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(KeyToken(start_mark, end_mark)) - - def fetch_value(self): - - # Do we determine a simple key? - if self.flow_level in self.possible_simple_keys: - - # Add KEY. - key = self.possible_simple_keys[self.flow_level] - del self.possible_simple_keys[self.flow_level] - self.tokens.insert(key.token_number-self.tokens_taken, - KeyToken(key.mark, key.mark)) - - # If this key starts a new block mapping, we need to add - # BLOCK-MAPPING-START. - if not self.flow_level: - if self.add_indent(key.column): - self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) - - # There cannot be two simple keys one after another. - self.allow_simple_key = False - - # It must be a part of a complex key. - else: - - # Block context needs additional checks. - # (Do we really need them? They will be caught by the parser - # anyway.) - if not self.flow_level: - - # We are allowed to start a complex value if and only if - # we can start a simple key. - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping values are not allowed here", - self.get_mark()) - - # If this value starts a new block mapping, we need to add - # BLOCK-MAPPING-START. It will be detected as an error later by - # the parser. - if not self.flow_level: - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after ':' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add VALUE. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(ValueToken(start_mark, end_mark)) - - def fetch_alias(self): - - # ALIAS could be a simple key. - self.save_possible_simple_key() - - # No simple keys after ALIAS. - self.allow_simple_key = False - - # Scan and add ALIAS. - self.tokens.append(self.scan_anchor(AliasToken)) - - def fetch_anchor(self): - - # ANCHOR could start a simple key. - self.save_possible_simple_key() - - # No simple keys after ANCHOR. - self.allow_simple_key = False - - # Scan and add ANCHOR. - self.tokens.append(self.scan_anchor(AnchorToken)) - - def fetch_tag(self): - - # TAG could start a simple key. - self.save_possible_simple_key() - - # No simple keys after TAG. - self.allow_simple_key = False - - # Scan and add TAG. - self.tokens.append(self.scan_tag()) - - def fetch_literal(self): - self.fetch_block_scalar(style='|') - - def fetch_folded(self): - self.fetch_block_scalar(style='>') - - def fetch_block_scalar(self, style): - - # A simple key may follow a block scalar. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Scan and add SCALAR. - self.tokens.append(self.scan_block_scalar(style)) - - def fetch_single(self): - self.fetch_flow_scalar(style='\'') - - def fetch_double(self): - self.fetch_flow_scalar(style='"') - - def fetch_flow_scalar(self, style): - - # A flow scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after flow scalars. - self.allow_simple_key = False - - # Scan and add SCALAR. - self.tokens.append(self.scan_flow_scalar(style)) - - def fetch_plain(self): - - # A plain scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after plain scalars. But note that `scan_plain` will - # change this flag if the scan is finished at the beginning of the - # line. - self.allow_simple_key = False - - # Scan and add SCALAR. May change `allow_simple_key`. - self.tokens.append(self.scan_plain()) - - # Checkers. - - def check_directive(self): - - # DIRECTIVE: ^ '%' ... - # The '%' indicator is already checked. - if self.column == 0: - return True - - def check_document_start(self): - - # DOCUMENT-START: ^ '---' (' '|'\n') - if self.column == 0: - if self.prefix(3) == u'---' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return True - - def check_document_end(self): - - # DOCUMENT-END: ^ '...' (' '|'\n') - if self.column == 0: - if self.prefix(3) == u'...' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return True - - def check_block_entry(self): - - # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_key(self): - - # KEY(flow context): '?' - if self.flow_level: - return True - - # KEY(block context): '?' (' '|'\n') - else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_value(self): - - # VALUE(flow context): ':' - if self.flow_level: - return True - - # VALUE(block context): ':' (' '|'\n') - else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_plain(self): - - # A plain scalar may start with any non-space character except: - # '-', '?', ':', ',', '[', ']', '{', '}', - # '#', '&', '*', '!', '|', '>', '\'', '\"', - # '%', '@', '`'. - # - # It may also start with - # '-', '?', ':' - # if it is followed by a non-space character. - # - # Note that we limit the last rule to the block context (except the - # '-' character) because we want the flow context to be space - # independent. - ch = self.peek() - return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' - and (ch == u'-' or (not self.flow_level and ch in u'?:'))) - - # Scanners. - - def scan_to_next_token(self): - # We ignore spaces, line breaks and comments. - # If we find a line break in the block context, we set the flag - # `allow_simple_key` on. - # The byte order mark is stripped if it's the first character in the - # stream. We do not yet support BOM inside the stream as the - # specification requires. Any such mark will be considered as a part - # of the document. - # - # TODO: We need to make tab handling rules more sane. A good rule is - # Tabs cannot precede tokens - # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - # KEY(block), VALUE(block), BLOCK-ENTRY - # So the checking code is - # if <TAB>: - # self.allow_simple_keys = False - # We also need to add the check for `allow_simple_keys == True` to - # `unwind_indent` before issuing BLOCK-END. - # Scanners for block, flow, and plain scalars need to be modified. - - if self.index == 0 and self.peek() == u'\uFEFF': - self.forward() - found = False - while not found: - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - if self.scan_line_break(): - if not self.flow_level: - self.allow_simple_key = True - else: - found = True - - def scan_directive(self): - # See the specification for details. - start_mark = self.get_mark() - self.forward() - name = self.scan_directive_name(start_mark) - value = None - if name == u'YAML': - value = self.scan_yaml_directive_value(start_mark) - end_mark = self.get_mark() - elif name == u'TAG': - value = self.scan_tag_directive_value(start_mark) - end_mark = self.get_mark() - else: - end_mark = self.get_mark() - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - self.scan_directive_ignored_line(start_mark) - return DirectiveToken(name, value, start_mark, end_mark) - - def scan_directive_name(self, start_mark): - # See the specification for details. - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - return value - - def scan_yaml_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - major = self.scan_yaml_directive_number(start_mark) - if self.peek() != '.': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or '.', but found %r" - % self.peek().encode('utf-8'), - self.get_mark()) - self.forward() - minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" - % self.peek().encode('utf-8'), - self.get_mark()) - return (major, minor) - - def scan_yaml_directive_number(self, start_mark): - # See the specification for details. - ch = self.peek() - if not (u'0' <= ch <= u'9'): - raise ScannerError("while scanning a directive", start_mark, - "expected a digit, but found %r" % ch.encode('utf-8'), - self.get_mark()) - length = 0 - while u'0' <= self.peek(length) <= u'9': - length += 1 - value = int(self.prefix(length)) - self.forward(length) - return value - - def scan_tag_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == u' ': - self.forward() - prefix = self.scan_tag_directive_prefix(start_mark) - return (handle, prefix) - - def scan_tag_directive_handle(self, start_mark): - # See the specification for details. - value = self.scan_tag_handle('directive', start_mark) - ch = self.peek() - if ch != u' ': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) - return value - - def scan_tag_directive_prefix(self, start_mark): - # See the specification for details. - value = self.scan_tag_uri('directive', start_mark) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) - return value - - def scan_directive_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) - self.scan_line_break() - - def scan_anchor(self, TokenClass): - # The specification does not restrict characters for anchors and - # aliases. This may lead to problems, for instance, the document: - # [ *alias, value ] - # can be interpreted in two ways, as - # [ "value" ] - # and - # [ *alias , "value" ] - # Therefore we restrict aliases to numbers and ASCII letters. - start_mark = self.get_mark() - indicator = self.peek() - if indicator == u'*': - name = 'alias' - else: - name = 'anchor' - self.forward() - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - end_mark = self.get_mark() - return TokenClass(value, start_mark, end_mark) - - def scan_tag(self): - # See the specification for details. - start_mark = self.get_mark() - ch = self.peek(1) - if ch == u'<': - handle = None - self.forward(2) - suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != u'>': - raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.peek().encode('utf-8'), - self.get_mark()) - self.forward() - elif ch in u'\0 \t\r\n\x85\u2028\u2029': - handle = None - suffix = u'!' - self.forward() - else: - length = 1 - use_handle = False - while ch not in u'\0 \r\n\x85\u2028\u2029': - if ch == u'!': - use_handle = True - break - length += 1 - ch = self.peek(length) - handle = u'!' - if use_handle: - handle = self.scan_tag_handle('tag', start_mark) - else: - handle = u'!' - self.forward() - suffix = self.scan_tag_uri('tag', start_mark) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) - value = (handle, suffix) - end_mark = self.get_mark() - return TagToken(value, start_mark, end_mark) - - def scan_block_scalar(self, style): - # See the specification for details. - - if style == '>': - folded = True - else: - folded = False - - chunks = [] - start_mark = self.get_mark() - - # Scan the header. - self.forward() - chomping, increment = self.scan_block_scalar_indicators(start_mark) - self.scan_block_scalar_ignored_line(start_mark) - - # Determine the indentation level and go to the first non-empty line. - min_indent = self.indent+1 - if min_indent < 1: - min_indent = 1 - if increment is None: - breaks, max_indent, end_mark = self.scan_block_scalar_indentation() - indent = max(min_indent, max_indent) - else: - indent = min_indent+increment-1 - breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = u'' - - # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != u'\0': - chunks.extend(breaks) - leading_non_space = self.peek() not in u' \t' - length = 0 - while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': - length += 1 - chunks.append(self.prefix(length)) - self.forward(length) - line_break = self.scan_line_break() - breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != u'\0': - - # Unfortunately, folding rules are ambiguous. - # - # This is the folding according to the specification: - - if folded and line_break == u'\n' \ - and leading_non_space and self.peek() not in u' \t': - if not breaks: - chunks.append(u' ') - else: - chunks.append(line_break) - - # This is Clark Evans's interpretation (also in the spec - # examples): - # - #if folded and line_break == u'\n': - # if not breaks: - # if self.peek() not in ' \t': - # chunks.append(u' ') - # else: - # chunks.append(line_break) - #else: - # chunks.append(line_break) - else: - break - - # Chomp the tail. - if chomping is not False: - chunks.append(line_break) - if chomping is True: - chunks.extend(breaks) - - # We are done. - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) - - def scan_block_scalar_indicators(self, start_mark): - # See the specification for details. - chomping = None - increment = None - ch = self.peek() - if ch in u'+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch in u'0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - elif ch in u'0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - ch = self.peek() - if ch in u'+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found %r" - % ch.encode('utf-8'), self.get_mark()) - return chomping, increment - - def scan_block_scalar_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) - self.scan_line_break() - - def scan_block_scalar_indentation(self): - # See the specification for details. - chunks = [] - max_indent = 0 - end_mark = self.get_mark() - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() != u' ': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - else: - self.forward() - if self.column > max_indent: - max_indent = self.column - return chunks, max_indent, end_mark - - def scan_block_scalar_breaks(self, indent): - # See the specification for details. - chunks = [] - end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': - self.forward() - while self.peek() in u'\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': - self.forward() - return chunks, end_mark - - def scan_flow_scalar(self, style): - # See the specification for details. - # Note that we loose indentation rules for quoted scalars. Quoted - # scalars don't need to adhere indentation because " and ' clearly - # mark the beginning and the end of them. Therefore we are less - # restrictive then the specification requires. We only need to check - # that document separators are not included in scalars. - if style == '"': - double = True - else: - double = False - chunks = [] - start_mark = self.get_mark() - quote = self.peek() - self.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - while self.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - self.forward() - end_mark = self.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) - - ESCAPE_REPLACEMENTS = { - u'0': u'\0', - u'a': u'\x07', - u'b': u'\x08', - u't': u'\x09', - u'\t': u'\x09', - u'n': u'\x0A', - u'v': u'\x0B', - u'f': u'\x0C', - u'r': u'\x0D', - u'e': u'\x1B', - u' ': u'\x20', - u'\"': u'\"', - u'\\': u'\\', - u'/': u'/', - u'N': u'\x85', - u'_': u'\xA0', - u'L': u'\u2028', - u'P': u'\u2029', - } - - ESCAPE_CODES = { - u'x': 2, - u'u': 4, - u'U': 8, - } - - def scan_flow_scalar_non_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - length = 0 - while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': - length += 1 - if length: - chunks.append(self.prefix(length)) - self.forward(length) - ch = self.peek() - if not double and ch == u'\'' and self.peek(1) == u'\'': - chunks.append(u'\'') - self.forward(2) - elif (double and ch == u'\'') or (not double and ch in u'\"\\'): - chunks.append(ch) - self.forward() - elif double and ch == u'\\': - self.forward() - ch = self.peek() - if ch in self.ESCAPE_REPLACEMENTS: - chunks.append(self.ESCAPE_REPLACEMENTS[ch]) - self.forward() - elif ch in self.ESCAPE_CODES: - length = self.ESCAPE_CODES[ch] - self.forward() - for k in range(length): - if self.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.peek(k).encode('utf-8')), self.get_mark()) - code = int(self.prefix(length), 16) - chunks.append(unichr(code)) - self.forward(length) - elif ch in u'\r\n\x85\u2028\u2029': - self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) - else: - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) - else: - return chunks - - def scan_flow_scalar_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - length = 0 - while self.peek(length) in u' \t': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch == u'\0': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.get_mark()) - elif ch in u'\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != u'\n': - chunks.append(line_break) - elif not breaks: - chunks.append(u' ') - chunks.extend(breaks) - else: - chunks.append(whitespaces) - return chunks - - def scan_flow_scalar_breaks(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - # Instead of checking indentation, we check for document - # separators. - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document separator", self.get_mark()) - while self.peek() in u' \t': - self.forward() - if self.peek() in u'\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - else: - return chunks - - def scan_plain(self): - # See the specification for details. - # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ',' or '?'. - # We also keep track of the `allow_simple_key` flag here. - # Indentation rules are loosed for the flow context. - chunks = [] - start_mark = self.get_mark() - end_mark = start_mark - indent = self.indent+1 - # We allow zero indentation for scalars, but then we need to check for - # document separators at the beginning of the line. - #if indent == 0: - # indent = 1 - spaces = [] - while True: - length = 0 - if self.peek() == u'#': - break - while True: - ch = self.peek(length) - if ch in u'\0 \t\r\n\x85\u2028\u2029' \ - or (ch == u':' and - self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029' - + (u',[]{}' if self.flow_level else u''))\ - or (self.flow_level and ch in u',?[]{}'): - break - length += 1 - if length == 0: - break - self.allow_simple_key = False - chunks.extend(spaces) - chunks.append(self.prefix(length)) - self.forward(length) - end_mark = self.get_mark() - spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == u'#' \ - or (not self.flow_level and self.column < indent): - break - return ScalarToken(u''.join(chunks), True, start_mark, end_mark) - - def scan_plain_spaces(self, indent, start_mark): - # See the specification for details. - # The specification is really confusing about tabs in plain scalars. - # We just forbid them completely. Do not use tabs in YAML! - chunks = [] - length = 0 - while self.peek(length) in u' ': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch in u'\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - self.allow_simple_key = True - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return - breaks = [] - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() == ' ': - self.forward() - else: - breaks.append(self.scan_line_break()) - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return - if line_break != u'\n': - chunks.append(line_break) - elif not breaks: - chunks.append(u' ') - chunks.extend(breaks) - elif whitespaces: - chunks.append(whitespaces) - return chunks - - def scan_tag_handle(self, name, start_mark): - # See the specification for details. - # For some strange reasons, the specification does not allow '_' in - # tag handles. I have allowed it anyway. - ch = self.peek() - if ch != u'!': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) - length = 1 - ch = self.peek(length) - if ch != u' ': - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if ch != u'!': - self.forward(length) - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) - length += 1 - value = self.prefix(length) - self.forward(length) - return value - - def scan_tag_uri(self, name, start_mark): - # See the specification for details. - # Note: we do not check if URI is well-formed. - chunks = [] - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.!~*\'()[]%': - if ch == u'%': - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - chunks.append(self.scan_uri_escapes(name, start_mark)) - else: - length += 1 - ch = self.peek(length) - if length: - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - if not chunks: - raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % ch.encode('utf-8'), - self.get_mark()) - return u''.join(chunks) - - def scan_uri_escapes(self, name, start_mark): - # See the specification for details. - bytes = [] - mark = self.get_mark() - while self.peek() == u'%': - self.forward() - for k in range(2): - if self.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % - (self.peek(k).encode('utf-8')), self.get_mark()) - bytes.append(chr(int(self.prefix(2), 16))) - self.forward(2) - try: - value = unicode(''.join(bytes), 'utf-8') - except UnicodeDecodeError, exc: - raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) - return value - - def scan_line_break(self): - # Transforms: - # '\r\n' : '\n' - # '\r' : '\n' - # '\n' : '\n' - # '\x85' : '\n' - # '\u2028' : '\u2028' - # '\u2029 : '\u2029' - # default : '' - ch = self.peek() - if ch in u'\r\n\x85': - if self.prefix(2) == u'\r\n': - self.forward(2) - else: - self.forward() - return u'\n' - elif ch in u'\u2028\u2029': - self.forward() - return ch - return u'' diff --git a/libs/yaml2.7/serializer.py b/libs/yaml2.7/serializer.py deleted file mode 100644 index 0bf1e96dc..000000000 --- a/libs/yaml2.7/serializer.py +++ /dev/null @@ -1,111 +0,0 @@ - -__all__ = ['Serializer', 'SerializerError'] - -from error import YAMLError -from events import * -from nodes import * - -class SerializerError(YAMLError): - pass - -class Serializer(object): - - ANCHOR_TEMPLATE = u'id%03d' - - def __init__(self, encoding=None, - explicit_start=None, explicit_end=None, version=None, tags=None): - self.use_encoding = encoding - self.use_explicit_start = explicit_start - self.use_explicit_end = explicit_end - self.use_version = version - self.use_tags = tags - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - self.closed = None - - def open(self): - if self.closed is None: - self.emit(StreamStartEvent(encoding=self.use_encoding)) - self.closed = False - elif self.closed: - raise SerializerError("serializer is closed") - else: - raise SerializerError("serializer is already opened") - - def close(self): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif not self.closed: - self.emit(StreamEndEvent()) - self.closed = True - - #def __del__(self): - # self.close() - - def serialize(self, node): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif self.closed: - raise SerializerError("serializer is closed") - self.emit(DocumentStartEvent(explicit=self.use_explicit_start, - version=self.use_version, tags=self.use_tags)) - self.anchor_node(node) - self.serialize_node(node, None, None) - self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - - def anchor_node(self, node): - if node in self.anchors: - if self.anchors[node] is None: - self.anchors[node] = self.generate_anchor(node) - else: - self.anchors[node] = None - if isinstance(node, SequenceNode): - for item in node.value: - self.anchor_node(item) - elif isinstance(node, MappingNode): - for key, value in node.value: - self.anchor_node(key) - self.anchor_node(value) - - def generate_anchor(self, node): - self.last_anchor_id += 1 - return self.ANCHOR_TEMPLATE % self.last_anchor_id - - def serialize_node(self, node, parent, index): - alias = self.anchors[node] - if node in self.serialized_nodes: - self.emit(AliasEvent(alias)) - else: - self.serialized_nodes[node] = True - self.descend_resolver(parent, index) - if isinstance(node, ScalarNode): - detected_tag = self.resolve(ScalarNode, node.value, (True, False)) - default_tag = self.resolve(ScalarNode, node.value, (False, True)) - implicit = (node.tag == detected_tag), (node.tag == default_tag) - self.emit(ScalarEvent(alias, node.tag, implicit, node.value, - style=node.style)) - elif isinstance(node, SequenceNode): - implicit = (node.tag - == self.resolve(SequenceNode, node.value, True)) - self.emit(SequenceStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - index = 0 - for item in node.value: - self.serialize_node(item, node, index) - index += 1 - self.emit(SequenceEndEvent()) - elif isinstance(node, MappingNode): - implicit = (node.tag - == self.resolve(MappingNode, node.value, True)) - self.emit(MappingStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - for key, value in node.value: - self.serialize_node(key, node, None) - self.serialize_node(value, node, key) - self.emit(MappingEndEvent()) - self.ascend_resolver() - diff --git a/libs/yaml2.7/tokens.py b/libs/yaml2.7/tokens.py deleted file mode 100644 index 4d0b48a39..000000000 --- a/libs/yaml2.7/tokens.py +++ /dev/null @@ -1,104 +0,0 @@ - -class Token(object): - def __init__(self, start_mark, end_mark): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in self.__dict__ - if not key.endswith('_mark')] - attributes.sort() - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -#class BOMToken(Token): -# id = '<byte order mark>' - -class DirectiveToken(Token): - id = '<directive>' - def __init__(self, name, value, start_mark, end_mark): - self.name = name - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class DocumentStartToken(Token): - id = '<document start>' - -class DocumentEndToken(Token): - id = '<document end>' - -class StreamStartToken(Token): - id = '<stream start>' - def __init__(self, start_mark=None, end_mark=None, - encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndToken(Token): - id = '<stream end>' - -class BlockSequenceStartToken(Token): - id = '<block sequence start>' - -class BlockMappingStartToken(Token): - id = '<block mapping start>' - -class BlockEndToken(Token): - id = '<block end>' - -class FlowSequenceStartToken(Token): - id = '[' - -class FlowMappingStartToken(Token): - id = '{' - -class FlowSequenceEndToken(Token): - id = ']' - -class FlowMappingEndToken(Token): - id = '}' - -class KeyToken(Token): - id = '?' - -class ValueToken(Token): - id = ':' - -class BlockEntryToken(Token): - id = '-' - -class FlowEntryToken(Token): - id = ',' - -class AliasToken(Token): - id = '<alias>' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class AnchorToken(Token): - id = '<anchor>' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class TagToken(Token): - id = '<tag>' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class ScalarToken(Token): - id = '<scalar>' - def __init__(self, value, plain, start_mark, end_mark, style=None): - self.value = value - self.plain = plain - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - |