summaryrefslogtreecommitdiffhomepage
path: root/libs/bs4/dammit.py
diff options
context:
space:
mode:
authorLouis Vézina <[email protected]>2019-09-24 06:23:11 -0400
committerLouis Vézina <[email protected]>2019-09-24 06:23:11 -0400
commit8227df459a8a9286a4b5e8829b95abad337fefe8 (patch)
treed3a5cfe429e76149886e22f05b7cf363d2e98643 /libs/bs4/dammit.py
parent2b2fd4e8d7d69db132be57a81ab3703147f9c5a5 (diff)
downloadbazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.tar.gz
bazarr-8227df459a8a9286a4b5e8829b95abad337fefe8.zip
WIP
Diffstat (limited to 'libs/bs4/dammit.py')
-rw-r--r--libs/bs4/dammit.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/libs/bs4/dammit.py b/libs/bs4/dammit.py
index 7965565f5..fe10691d0 100644
--- a/libs/bs4/dammit.py
+++ b/libs/bs4/dammit.py
@@ -8,10 +8,13 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
"""
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
+from __future__ import absolute_import
+from six import unichr
+import six
__license__ = "MIT"
import codecs
-from htmlentitydefs import codepoint2name
+from six.moves.html_entities import codepoint2name
import re
import logging
import string
@@ -274,7 +277,7 @@ class EncodingDetector:
def strip_byte_order_mark(cls, data):
"""If a byte-order mark is present, strip it and return the encoding it implies."""
encoding = None
- if isinstance(data, unicode):
+ if isinstance(data, six.text_type):
# Unicode data cannot have a byte-order mark.
return data, encoding
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
@@ -352,9 +355,9 @@ class UnicodeDammit:
markup, override_encodings, is_html, exclude_encodings)
# Short-circuit if the data is in Unicode to begin with.
- if isinstance(markup, unicode) or markup == '':
+ if isinstance(markup, six.text_type) or markup == '':
self.markup = markup
- self.unicode_markup = unicode(markup)
+ self.unicode_markup = six.text_type(markup)
self.original_encoding = None
return
@@ -438,7 +441,7 @@ class UnicodeDammit:
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
- return unicode(data, encoding, errors)
+ return six.text_type(data, encoding, errors)
@property
def declared_html_encoding(self):