summaryrefslogtreecommitdiffhomepage
path: root/libs/bs4/builder
diff options
context:
space:
mode:
authorLouis Vézina <[email protected]>2019-09-24 06:23:11 -0400
committerLouis Vézina <[email protected]>2019-09-24 06:23:11 -0400
commit8227df459a8a9286a4b5e8829b95abad337fefe8 (patch)
treed3a5cfe429e76149886e22f05b7cf363d2e98643 /libs/bs4/builder
parent2b2fd4e8d7d69db132be57a81ab3703147f9c5a5 (diff)
downloadbazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.tar.gz
bazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.zip
WIP
Diffstat (limited to 'libs/bs4/builder')
-rw-r--r--libs/bs4/builder/__init__.py4
-rw-r--r--libs/bs4/builder/_html5lib.py16
-rw-r--r--libs/bs4/builder/_htmlparser.py15
-rw-r--r--libs/bs4/builder/_lxml.py12
4 files changed, 28 insertions, 19 deletions
diff --git a/libs/bs4/builder/__init__.py b/libs/bs4/builder/__init__.py
index fdb3362fc..4b44ef05f 100644
--- a/libs/bs4/builder/__init__.py
+++ b/libs/bs4/builder/__init__.py
@@ -1,6 +1,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+from __future__ import absolute_import
from collections import defaultdict
import itertools
import sys
@@ -10,6 +11,7 @@ from bs4.element import (
HTMLAwareEntitySubstitution,
whitespace_re
)
+import six
__all__ = [
'HTMLTreeBuilder',
@@ -166,7 +168,7 @@ class TreeBuilder(object):
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
- if isinstance(value, basestring):
+ if isinstance(value, six.string_types):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
diff --git a/libs/bs4/builder/_html5lib.py b/libs/bs4/builder/_html5lib.py
index 5f5489358..cf6063b83 100644
--- a/libs/bs4/builder/_html5lib.py
+++ b/libs/bs4/builder/_html5lib.py
@@ -1,6 +1,8 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+from __future__ import absolute_import
+import six
__all__ = [
'HTML5TreeBuilder',
]
@@ -33,7 +35,7 @@ try:
# Pre-0.99999999
from html5lib.treebuilders import _base as treebuilder_base
new_html5lib = False
-except ImportError, e:
+except ImportError as e:
# 0.99999999 and up
from html5lib.treebuilders import base as treebuilder_base
new_html5lib = True
@@ -64,7 +66,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
extra_kwargs = dict()
- if not isinstance(markup, unicode):
+ if not isinstance(markup, six.text_type):
if new_html5lib:
extra_kwargs['override_encoding'] = self.user_specified_encoding
else:
@@ -72,13 +74,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
doc = parser.parse(markup, **extra_kwargs)
# Set the character encoding detected by the tokenizer.
- if isinstance(markup, unicode):
+ if isinstance(markup, six.text_type):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
original_encoding = parser.tokenizer.stream.charEncoding[0]
- if not isinstance(original_encoding, basestring):
+ if not isinstance(original_encoding, six.string_types):
# In 0.99999999 and up, the encoding is an html5lib
# Encoding object. We want to use a string for compatibility
# with other tree builders.
@@ -229,7 +231,7 @@ class Element(treebuilder_base.Node):
def appendChild(self, node):
string_child = child = None
- if isinstance(node, basestring):
+ if isinstance(node, six.string_types):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
@@ -246,7 +248,7 @@ class Element(treebuilder_base.Node):
child = node.element
node.parent = self
- if not isinstance(child, basestring) and child.parent is not None:
+ if not isinstance(child, six.string_types) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
@@ -259,7 +261,7 @@ class Element(treebuilder_base.Node):
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
- if isinstance(node, basestring):
+ if isinstance(node, six.string_types):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
diff --git a/libs/bs4/builder/_htmlparser.py b/libs/bs4/builder/_htmlparser.py
index 67890b3a3..624028e3c 100644
--- a/libs/bs4/builder/_htmlparser.py
+++ b/libs/bs4/builder/_htmlparser.py
@@ -3,15 +3,18 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+from __future__ import absolute_import
+from six import unichr
+import six
__all__ = [
'HTMLParserTreeBuilder',
]
-from HTMLParser import HTMLParser
+from six.moves.html_parser import HTMLParser
try:
- from HTMLParser import HTMLParseError
-except ImportError, e:
+ from six.moves.html_parser import HTMLParseError
+except ImportError as e:
# HTMLParseError is removed in Python 3.5. Since it can never be
# thrown in 3.5, we can just define our own class as a placeholder.
class HTMLParseError(Exception):
@@ -131,7 +134,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
try:
data = unichr(real_name)
- except (ValueError, OverflowError), e:
+ except (ValueError, OverflowError) as e:
data = u"\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@@ -196,7 +199,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
- if isinstance(markup, unicode):
+ if isinstance(markup, six.text_type):
yield (markup, None, None, False)
return
@@ -213,7 +216,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
- except HTMLParseError, e:
+ except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
diff --git a/libs/bs4/builder/_lxml.py b/libs/bs4/builder/_lxml.py
index d2ca2872d..73f6e2b34 100644
--- a/libs/bs4/builder/_lxml.py
+++ b/libs/bs4/builder/_lxml.py
@@ -1,5 +1,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+from __future__ import absolute_import
+import six
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
@@ -101,12 +103,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
else:
self.processing_instruction_class = XMLProcessingInstruction
- if isinstance(markup, unicode):
+ if isinstance(markup, six.text_type):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
- if isinstance(markup, unicode):
+ if isinstance(markup, six.text_type):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@@ -121,7 +123,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
- elif isinstance(markup, unicode):
+ elif isinstance(markup, six.text_type):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -136,7 +138,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def close(self):
@@ -249,7 +251,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))