summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--test/test_utils.py11
-rw-r--r--youtube_dl/__init__.py13
-rw-r--r--youtube_dl/extractor/podomatic.py21
-rw-r--r--youtube_dl/utils.py17
4 files changed, 46 insertions, 16 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 84553b943..4e3c37fb4 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests
+import io
import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform
@@ -21,6 +22,7 @@ from youtube_dl.utils import (
orderedSet,
PagedList,
parse_duration,
+ read_batch_urls,
sanitize_filename,
shell_quote,
smuggle_url,
@@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
def test_struct_unpack(self):
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
+ def test_read_batch_urls(self):
+ f = io.StringIO(u'''\xef\xbb\xbf foo
+ bar\r
+ baz
+ # More after this line\r
+ ; or after this
+ bam''')
+ self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 84f29a1a5..2aaafd37a 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -71,6 +71,7 @@ from .utils import (
get_cachedir,
MaxDownloadsReached,
preferredencoding,
+ read_batch_urls,
SameFileError,
setproctitle,
std_headers,
@@ -552,21 +553,19 @@ def _real_main(argv=None):
sys.exit(0)
# Batch file verification
- batchurls = []
+ batch_urls = []
if opts.batchfile is not None:
try:
if opts.batchfile == '-':
batchfd = sys.stdin
else:
- batchfd = open(opts.batchfile, 'r')
- batchurls = batchfd.readlines()
- batchurls = [x.strip() for x in batchurls]
- batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
+ batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
+ batch_urls = read_batch_urls(batchfd)
if opts.verbose:
- write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+ write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
- all_urls = batchurls + args
+ all_urls = batch_urls + args
all_urls = [url.strip() for url in all_urls]
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py
index 58200971b..19ad45c98 100644
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@@ -1,7 +1,10 @@
+from __future__ import unicode_literals
+
import json
import re
from .common import InfoExtractor
+from ..utils import int_or_none
class PodomaticIE(InfoExtractor):
@@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor):
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
_TEST = {
- u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
- u"file": u"2009-01-02T16_03_35-08_00.mp3",
- u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
- u"info_dict": {
- u"uploader": u"Science Teaching Tips",
- u"uploader_id": u"scienceteachingtips",
- u"title": u"64. When the Moon Hits Your Eye",
- u"duration": 446,
+ "url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
+ "file": "2009-01-02T16_03_35-08_00.mp3",
+ "md5": "84bb855fcf3429e6bf72460e1eed782d",
+ "info_dict": {
+ "uploader": "Science Teaching Tips",
+ "uploader_id": "scienceteachingtips",
+ "title": "64. When the Moon Hits Your Eye",
+ "duration": 446,
}
}
@@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor):
uploader = data['podcast']
title = data['title']
thumbnail = data['imageLocation']
- duration = int(data['length'] / 1000.0)
+ duration = int_or_none(data.get('length'), 1000)
return {
'id': video_id,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 25e40a837..0c482631a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
+import contextlib
import ctypes
import datetime
import email.utils
@@ -1245,3 +1246,19 @@ except TypeError:
else:
struct_pack = struct.pack
struct_unpack = struct.unpack
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = u'\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]