diff options
-rw-r--r-- | test/test_utils.py | 11 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/podomatic.py | 21 | ||||
-rw-r--r-- | youtube_dl/utils.py | 17 |
4 files changed, 46 insertions, 16 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 84553b943..4e3c37fb4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Various small unit tests +import io import xml.etree.ElementTree #from youtube_dl.utils import htmlentity_transform @@ -21,6 +22,7 @@ from youtube_dl.utils import ( orderedSet, PagedList, parse_duration, + read_batch_urls, sanitize_filename, shell_quote, smuggle_url, @@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase): def test_struct_unpack(self): self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) + def test_read_batch_urls(self): + f = io.StringIO(u'''\xef\xbb\xbf foo + bar\r + baz + # More after this line\r + ; or after this + bam''') + self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 84f29a1a5..2aaafd37a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -71,6 +71,7 @@ from .utils import ( get_cachedir, MaxDownloadsReached, preferredencoding, + read_batch_urls, SameFileError, setproctitle, std_headers, @@ -552,21 +553,19 @@ def _real_main(argv=None): sys.exit(0) # Batch file verification - batchurls = [] + batch_urls = [] if opts.batchfile is not None: try: if opts.batchfile == '-': batchfd = sys.stdin else: - batchfd = open(opts.batchfile, 'r') - batchurls = batchfd.readlines() - batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') + batch_urls = read_batch_urls(batchfd) if opts.verbose: - write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') + write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n') except IOError: sys.exit(u'ERROR: batch file could not be read') - all_urls = batchurls + args + all_urls = batch_urls + args all_urls = [url.strip() for url in all_urls] _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py index 58200971b..19ad45c98 100644 --- a/youtube_dl/extractor/podomatic.py +++ b/youtube_dl/extractor/podomatic.py @@ -1,7 +1,10 @@ +from __future__ import unicode_literals + import json import re from .common import InfoExtractor +from ..utils import int_or_none class PodomaticIE(InfoExtractor): @@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor): _VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)' _TEST = { - u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", - u"file": u"2009-01-02T16_03_35-08_00.mp3", - u"md5": u"84bb855fcf3429e6bf72460e1eed782d", - u"info_dict": { - u"uploader": u"Science Teaching Tips", - u"uploader_id": u"scienceteachingtips", - u"title": u"64. When the Moon Hits Your Eye", - u"duration": 446, + "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00", + "file": "2009-01-02T16_03_35-08_00.mp3", + "md5": "84bb855fcf3429e6bf72460e1eed782d", + "info_dict": { + "uploader": "Science Teaching Tips", + "uploader_id": "scienceteachingtips", + "title": "64. When the Moon Hits Your Eye", + "duration": 446, } } @@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor): uploader = data['podcast'] title = data['title'] thumbnail = data['imageLocation'] - duration = int(data['length'] / 1000.0) + duration = int_or_none(data.get('length'), 1000) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25e40a837..0c482631a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import contextlib import ctypes import datetime import email.utils @@ -1245,3 +1246,19 @@ except TypeError: else: struct_pack = struct.pack struct_unpack = struct.unpack + + +def read_batch_urls(batch_fd): + def fixup(url): + if not isinstance(url, compat_str): + url = url.decode('utf-8', 'replace') + BOM_UTF8 = u'\xef\xbb\xbf' + if url.startswith(BOM_UTF8): + url = url[len(BOM_UTF8):] + url = url.strip() + if url.startswith(('#', ';', ']')): + return False + return url + + with contextlib.closing(batch_fd) as fd: + return [url for url in map(fixup, fd) if url] |