WIP

author: Louis Vézina <[email protected]> 2019-09-24 06:23:11 -0400
committer: Louis Vézina <[email protected]> 2019-09-24 06:23:11 -0400
commit: 8227df459a8a9286a4b5e8829b95abad337fefe8 (patch)
tree: d3a5cfe429e76149886e22f05b7cf363d2e98643 /libs/bs4/builder
parent: 2b2fd4e8d7d69db132be57a81ab3703147f9c5a5 (diff)
download: bazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.tar.gz
bazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.zip
4 files changed, 28 insertions, 19 deletions
diff --git a/libs/bs4/builder/__init__.py b/libs/bs4/builder/__init__.py
index fdb3362fc..4b44ef05f 100644
--- a/libs/bs4/builder/__init__.py
+++ b/libs/bs4/builder/__init__.py
@@ -1,6 +1,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
 from collections import defaultdict
 import itertools
 import sys
@@ -10,6 +11,7 @@ from bs4.element import (
     HTMLAwareEntitySubstitution,
     whitespace_re
     )
+import six
 
 __all__ = [
     'HTMLTreeBuilder',
@@ -166,7 +168,7 @@ class TreeBuilder(object):
                     # value is a whitespace-separated list of
                     # values. Split it into a list.
                     value = attrs[attr]
-                    if isinstance(value, basestring):
+                    if isinstance(value, six.string_types):
                         values = whitespace_re.split(value)
                     else:
                         # html5lib sometimes calls setAttributes twice
diff --git a/libs/bs4/builder/_html5lib.py b/libs/bs4/builder/_html5lib.py
index 5f5489358..cf6063b83 100644
--- a/libs/bs4/builder/_html5lib.py
+++ b/libs/bs4/builder/_html5lib.py
@@ -1,6 +1,8 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
+import six
 __all__ = [
     'HTML5TreeBuilder',
     ]
@@ -33,7 +35,7 @@ try:
     # Pre-0.99999999
     from html5lib.treebuilders import _base as treebuilder_base
     new_html5lib = False
-except ImportError, e:
+except ImportError as e:
     # 0.99999999 and up
     from html5lib.treebuilders import base as treebuilder_base
     new_html5lib = True
@@ -64,7 +66,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
 
         extra_kwargs = dict()
-        if not isinstance(markup, unicode):
+        if not isinstance(markup, six.text_type):
             if new_html5lib:
                 extra_kwargs['override_encoding'] = self.user_specified_encoding
             else:
@@ -72,13 +74,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         doc = parser.parse(markup, **extra_kwargs)
 
         # Set the character encoding detected by the tokenizer.
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             # We need to special-case this because html5lib sets
             # charEncoding to UTF-8 if it gets Unicode input.
             doc.original_encoding = None
         else:
             original_encoding = parser.tokenizer.stream.charEncoding[0]
-            if not isinstance(original_encoding, basestring):
+            if not isinstance(original_encoding, six.string_types):
                 # In 0.99999999 and up, the encoding is an html5lib
                 # Encoding object. We want to use a string for compatibility
                 # with other tree builders.
@@ -229,7 +231,7 @@ class Element(treebuilder_base.Node):
 
     def appendChild(self, node):
         string_child = child = None
-        if isinstance(node, basestring):
+        if isinstance(node, six.string_types):
             # Some other piece of code decided to pass in a string
             # instead of creating a TextElement object to contain the
             # string.
@@ -246,7 +248,7 @@ class Element(treebuilder_base.Node):
             child = node.element
             node.parent = self
 
-        if not isinstance(child, basestring) and child.parent is not None:
+        if not isinstance(child, six.string_types) and child.parent is not None:
             node.element.extract()
 
         if (string_child and self.element.contents
@@ -259,7 +261,7 @@ class Element(treebuilder_base.Node):
             old_element.replace_with(new_element)
             self.soup._most_recent_element = new_element
         else:
-            if isinstance(node, basestring):
+            if isinstance(node, six.string_types):
                 # Create a brand new NavigableString from this string.
                 child = self.soup.new_string(node)
 
diff --git a/libs/bs4/builder/_htmlparser.py b/libs/bs4/builder/_htmlparser.py
index 67890b3a3..624028e3c 100644
--- a/libs/bs4/builder/_htmlparser.py
+++ b/libs/bs4/builder/_htmlparser.py
@@ -3,15 +3,18 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
+from six import unichr
+import six
 __all__ = [
     'HTMLParserTreeBuilder',
     ]
 
-from HTMLParser import HTMLParser
+from six.moves.html_parser import HTMLParser
 
 try:
-    from HTMLParser import HTMLParseError
-except ImportError, e:
+    from six.moves.html_parser import HTMLParseError
+except ImportError as e:
     # HTMLParseError is removed in Python 3.5. Since it can never be
     # thrown in 3.5, we can just define our own class as a placeholder.
     class HTMLParseError(Exception):
@@ -131,7 +134,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
 
         try:
             data = unichr(real_name)
-        except (ValueError, OverflowError), e:
+        except (ValueError, OverflowError) as e:
             data = u"\N{REPLACEMENT CHARACTER}"
 
         self.handle_data(data)
@@ -196,7 +199,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         declared within markup, whether any characters had to be
         replaced with REPLACEMENT CHARACTER).
         """
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             yield (markup, None, None, False)
             return
 
@@ -213,7 +216,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         parser.soup = self.soup
         try:
             parser.feed(markup)
-        except HTMLParseError, e:
+        except HTMLParseError as e:
             warnings.warn(RuntimeWarning(
                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
             raise e
diff --git a/libs/bs4/builder/_lxml.py b/libs/bs4/builder/_lxml.py
index d2ca2872d..73f6e2b34 100644
--- a/libs/bs4/builder/_lxml.py
+++ b/libs/bs4/builder/_lxml.py
@@ -1,5 +1,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import absolute_import
+import six
 __all__ = [
     'LXMLTreeBuilderForXML',
     'LXMLTreeBuilder',
@@ -101,12 +103,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         else:
             self.processing_instruction_class = XMLProcessingInstruction
 
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             # We were given Unicode. Maybe lxml can parse Unicode on
             # this system?
             yield markup, None, document_declared_encoding, False
 
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             # No, apparently not. Convert the Unicode to UTF-8 and
             # tell lxml to parse it as UTF-8.
             yield (markup.encode("utf8"), "utf8",
@@ -121,7 +123,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     def feed(self, markup):
         if isinstance(markup, bytes):
             markup = BytesIO(markup)
-        elif isinstance(markup, unicode):
+        elif isinstance(markup, six.text_type):
             markup = StringIO(markup)
 
         # Call feed() at least once, even if the markup is empty,
@@ -136,7 +138,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                 if len(data) != 0:
                     self.parser.feed(data)
             self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
             raise ParserRejectedMarkup(str(e))
 
     def close(self):
@@ -249,7 +251,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
             self.parser = self.parser_for(encoding)
             self.parser.feed(markup)
             self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
             raise ParserRejectedMarkup(str(e))
author	Louis Vézina <[email protected]>	2019-09-24 06:23:11 -0400
committer	Louis Vézina <[email protected]>	2019-09-24 06:23:11 -0400
commit	8227df459a8a9286a4b5e8829b95abad337fefe8 (patch)
tree	d3a5cfe429e76149886e22f05b7cf363d2e98643 /libs/bs4/builder
parent	2b2fd4e8d7d69db132be57a81ab3703147f9c5a5 (diff)
download	bazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.tar.gz bazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.zip