diff options
author | Philipp Hagemeister <[email protected]> | 2014-01-21 01:39:39 +0100 |
---|---|---|
committer | Philipp Hagemeister <[email protected]> | 2014-01-21 01:39:40 +0100 |
commit | b60016e83139ace517fc823cf2b22756e64c2e63 (patch) | |
tree | 74c5a65f6a3b34c0f862c2be0de6602a7ccf507e | |
parent | 5aafe895fce2a7be9595cb2e56b7bd73a748e6b6 (diff) | |
download | youtube-dl-b60016e83139ace517fc823cf2b22756e64c2e63.tar.gz youtube-dl-b60016e83139ace517fc823cf2b22756e64c2e63.zip |
Deal with implicitly UTF-16 decoded webpages
These webpages don't specify an encoding and rely on the BOM
-rw-r--r-- | youtube_dl/extractor/common.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 692d828da..6c5d77e58 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -220,6 +220,8 @@ class InfoExtractor(object): webpage_bytes[:1024]) if m: encoding = m.group(1).decode('ascii') + elif webpage_bytes.startswith(b'\xff\xfe'): + encoding = 'utf-16' else: encoding = 'utf-8' if self._downloader.params.get('dump_intermediate_pages', False): |