diff options
author | Yen Chi Hsuan <[email protected]> | 2016-03-23 22:24:52 +0800 |
---|---|---|
committer | Yen Chi Hsuan <[email protected]> | 2016-03-23 22:24:52 +0800 |
commit | efbed08dc20c530fe428256e4dcbea4dc4423d0d (patch) | |
tree | 836f73a2f8b9f8b2f528619ec13374e42decce32 | |
parent | 7da2c87119db8beda1bdc979fad38c08fc1252e9 (diff) | |
download | youtube-dl-efbed08dc20c530fe428256e4dcbea4dc4423d0d.tar.gz youtube-dl-efbed08dc20c530fe428256e4dcbea4dc4423d0d.zip |
[utils] Encode hostnames before passing to urllib
With IDN (Internationalized Domain Name) and a proxy, non-ascii URLs
are passed down to urllib/urllib2, causing UnicodeEncodeError
Fixes #8890
-rw-r--r-- | test/test_http.py | 10 | ||||
-rw-r--r-- | youtube_dl/utils.py | 1 |
2 files changed, 11 insertions, 0 deletions
diff --git a/test/test_http.py b/test/test_http.py index fc59b1aed..15e0ad369 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals # Allow direct execution @@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase): response = ydl.urlopen(req).read().decode('utf-8') self.assertEqual(response, 'cn: {0}'.format(url)) + def test_proxy_with_idn(self): + ydl = YoutubeDL({ + 'proxy': 'localhost:{0}'.format(self.port), + }) + url = 'http://中文.tw/' + response = ydl.urlopen(url).read().decode('utf-8') + # b'xn--fiq228c' is '中文'.encode('idna') + self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 067b8a184..03bb7782f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1746,6 +1746,7 @@ def escape_url(url): """Escape URL as suggested by RFC 3986""" url_parsed = compat_urllib_parse_urlparse(url) return url_parsed._replace( + netloc=url_parsed.netloc.encode('idna').decode('ascii'), path=escape_rfc3986(url_parsed.path), params=escape_rfc3986(url_parsed.params), query=escape_rfc3986(url_parsed.query), |