diff options
32 files changed, 846 insertions, 319 deletions
@@ -91,3 +91,4 @@ t0mm0 Tithen-Firion Zack Fernandes cryptonaut +Adrian Kretz @@ -1,7 +1,7 @@ all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json cleanall: clean rm -f youtube-dl youtube-dl.exe diff --git a/test/test_utils.py b/test/test_utils.py index aaa293ff8..d42df6d96 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -144,6 +144,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('1968-12-10'), '19681210') self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') + self.assertEqual( + unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), + '20141126') def test_find_xpath_attr(self): testxml = '''<root> diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 56dc3d461..31531855e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -622,23 +622,17 @@ class YoutubeDL(object): ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) - def make_result(embedded_info): - new_result = ie_result.copy() - for f in ('_type', 'url', 'ext', 'player_url', 'formats', - 'entries', 'ie_key', 'duration', - 'subtitles', 'annotations', 'format', - 'thumbnail', 'thumbnails'): - if f in new_result: - del new_result[f] - if f in embedded_info: - new_result[f] = embedded_info[f] - return new_result - new_result = make_result(info) + new_result = ie_result.copy() + for f in ('_type', 'id', 'url', 'ext', 'player_url', 'formats', + 'entries', 'ie_key', 'duration', + 'subtitles', 'annotations', 'format', + 'thumbnail', 'thumbnails'): + if f in new_result: + del new_result[f] + if f in info: + new_result[f] = info[f] assert new_result.get('_type') != 'url_transparent' - if new_result.get('_type') == 'compat_list': - new_result['entries'] = [ - make_result(e) for e in new_result['entries']] return self.process_ie_result( new_result, download=download, extra_info=extra_info) @@ -942,8 +936,12 @@ class YoutubeDL(object): if self.params.get('forceid', False): self.to_stdout(info_dict['id']) if self.params.get('forceurl', False): - # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + if info_dict.get('requested_formats') is not None: + for f in info_dict['requested_formats']: + self.to_stdout(f['url'] + f.get('play_path', '')) + else: + # For RTMP URLs, also include the playpath + self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: self.to_stdout(info_dict['thumbnail']) if self.params.get('forcedescription', False) and info_dict.get('description') is not None: diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 27596687d..f4a85443e 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -247,7 +247,7 @@ else: userhome = compat_getenv('HOME') elif 'USERPROFILE' in os.environ: userhome = compat_getenv('USERPROFILE') - elif not 'HOMEPATH' in os.environ: + elif 'HOMEPATH' not in os.environ: return path else: try: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4c9c44b31..746ee69e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -30,6 +30,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE from .beeg import BeegIE from .behindkink import BehindKinkIE +from .bet import BetIE from .bild import BildIE from .bilibili import BiliBiliIE from .blinkx import BlinkxIE @@ -50,7 +51,7 @@ from .cbsnews import CBSNewsIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE from .chilloutzone import ChilloutzoneIE -from .cinemassacre import CinemassacreIE +from .cinchcast import CinchcastIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE @@ -307,6 +308,7 @@ from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE +from .radiode import RadioDeIE from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE @@ -334,6 +336,7 @@ from .savefrom import SaveFromIE from .sbs import SBSIE from .scivee import SciVeeIE from .screencast import ScreencastIE +from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE from .servingsys import ServingSysIE from .sexu import SexuIE from .sexykarma import SexyKarmaIE @@ -524,7 +527,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) -from .zdf import ZDFIE +from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ( ZingMp3SongIE, ZingMp3AlbumIE, diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py index 31fdc0dcc..1bdc25812 100644 --- a/youtube_dl/extractor/behindkink.py +++ b/youtube_dl/extractor/behindkink.py @@ -10,15 +10,15 @@ from ..utils import url_basename class BehindKinkIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' _TEST = { - 'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/', - 'md5': '41ad01222b8442089a55528fec43ec01', + 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', + 'md5': '507b57d8fdcd75a41a9a7bdb7989c762', 'info_dict': { - 'id': '36370', + 'id': '37127', 'ext': 'mp4', - 'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!', - 'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...', - 'upload_date': '20140814', - 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg', + 'title': 'What are you passionate about – Marley Blaze', + 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', + 'upload_date': '20141205', + 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 'age_limit': 18, } } @@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('id') - year = mobj.group('year') - month = mobj.group('month') - day = mobj.group('day') - upload_date = year + month + day webpage = self._download_webpage(url, display_id) video_url = self._search_regex( - r"'file':\s*'([^']+)'", - webpage, 'URL base') - - video_id = url_basename(video_url) - video_id = video_id.split('_')[0] + r'<source src="([^"]+)"', webpage, 'video URL') + video_id = url_basename(video_url).split('_')[0] + upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day') return { 'id': video_id, + 'display_id': display_id, 'url': video_url, - 'ext': 'mp4', 'title': self._og_search_title(webpage), - 'display_id': display_id, 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), 'upload_date': upload_date, diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py new file mode 100644 index 000000000..c1fc433f7 --- /dev/null +++ b/youtube_dl/extractor/bet.py @@ -0,0 +1,108 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + xpath_text, + xpath_with_ns, + int_or_none, + parse_iso8601, +) + + +class BetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' + _TESTS = [ + { + 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', + 'info_dict': { + 'id': '417cd61c-c793-4e8e-b006-e445ecc45add', + 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', + 'ext': 'flv', + 'title': 'BET News Presents: A Conversation With President Obama', + 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6', + 'duration': 1534, + 'timestamp': 1418075340, + 'upload_date': '20141208', + 'uploader': 'admin', + 'thumbnail': 're:(?i)^https?://.*\.jpg$', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', + 'info_dict': { + 'id': '4160e53b-ad41-43b1-980f-8d85f63121f4', + 'display_id': 'justice-for-ferguson-a-community-reacts', + 'ext': 'flv', + 'title': 'Justice for Ferguson: A Community Reacts', + 'description': 'A BET News special.', + 'duration': 1696, + 'timestamp': 1416942360, + 'upload_date': '20141125', + 'uploader': 'admin', + 'thumbnail': 're:(?i)^https?://.*\.jpg$', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + media_url = compat_urllib_parse.unquote(self._search_regex( + [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], + webpage, 'media URL')) + + mrss = self._download_xml(media_url, display_id) + + item = mrss.find('./channel/item') + + NS_MAP = { + 'dc': 'http://purl.org/dc/elements/1.1/', + 'media': 'http://search.yahoo.com/mrss/', + 'ka': 'http://kickapps.com/karss', + } + + title = xpath_text(item, './title', 'title') + description = xpath_text( + item, './description', 'description', fatal=False) + + video_id = xpath_text(item, './guid', 'video id', fatal=False) + + timestamp = parse_iso8601(xpath_text( + item, xpath_with_ns('./dc:date', NS_MAP), + 'upload date', fatal=False)) + uploader = xpath_text( + item, xpath_with_ns('./dc:creator', NS_MAP), + 'uploader', fatal=False) + + media_content = item.find( + xpath_with_ns('./media:content', NS_MAP)) + duration = int_or_none(media_content.get('duration')) + smil_url = media_content.get('url') + + thumbnail = media_content.find( + xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') + + formats = self._extract_smil_formats(smil_url, display_id) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index da47f27bd..14b814120 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -4,13 +4,17 @@ import re from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor -from ..utils import ( + +from ..compat import ( + compat_str, compat_urllib_request, - unescapeHTML, - parse_iso8601, compat_urlparse, +) +from ..utils import ( clean_html, - compat_str, + int_or_none, + parse_iso8601, + unescapeHTML, ) @@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor): 'uploader': 'NostalgiaCritic', 'uploader_id': '246467', } - } + }, + { + # https://github.com/rg3/youtube-dl/pull/4404 + 'note': 'Audio only', + 'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982', + 'md5': '76c0a56f24e769ceaab21fbb6416a351', + 'info_dict': { + 'id': '7103299', + 'ext': 'flv', + 'title': 'Weekly Manga Recap: Kingdom', + 'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?', + 'timestamp': 1417660321, + 'upload_date': '20141204', + 'uploader': 'The Rollo T', + 'uploader_id': '407429', + 'duration': 7251, + 'vcodec': 'none', + } + }, ] def _real_extract(self, url): @@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor): 'url': real_url, 'format_id': role, 'format_note': media_type, - 'vcodec': media_content.get(blip('vcodec')), + 'vcodec': media_content.get(blip('vcodec')) or 'none', 'acodec': media_content.get(blip('acodec')), 'filesize': media_content.get('filesize'), - 'width': int(media_content.get('width')), - 'height': int(media_content.get('height')), + 'width': int_or_none(media_content.get('width')), + 'height': int_or_none(media_content.get('height')), }) self._sort_formats(formats) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py new file mode 100644 index 000000000..0c9a24bef --- /dev/null +++ b/youtube_dl/extractor/cinchcast.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, + xpath_text, +) + + +class CinchcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)' + _TEST = { + # Actual test is run in generic, look for undergroundwellness + 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', + 'only_matching': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + doc = self._download_xml( + 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, + video_id) + + item = doc.find('.//item') + title = xpath_text(item, './title', fatal=True) + date_str = xpath_text( + item, './{http://developer.longtailvideo.com/trac/}date') + upload_date = unified_strdate(date_str, day_first=False) + # duration is present but wrong + formats = [] + formats.append({ + 'format_id': 'main', + 'url': item.find( + './{http://search.yahoo.com/mrss/}content').attrib['url'], + }) + backup_url = xpath_text( + item, './{http://developer.longtailvideo.com/trac/}backupContent') + if backup_url: + formats.append({ + 'preference': 2, # seems to be more reliable + 'format_id': 'backup', + 'url': backup_url, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'formats': formats, + } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cb6081dd0..d302fe45f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -118,6 +118,7 @@ class InfoExtractor(object): The following fields are optional: + alt_title: A secondary title of the video. display_id An alternative identifier for the video, not necessarily unique, but available before title. Typically, id is something like "4234987", title "Dancing naked mole rats", @@ -129,7 +130,7 @@ class InfoExtractor(object): * "resolution" (optional, string "{width}x{height"}, deprecated) thumbnail: Full URL to a video thumbnail image. - description: One-line video description. + description: Full video description. uploader: Full name of the video uploader. timestamp: UNIX timestamp of the moment the video became available. upload_date: Video upload date (YYYYMMDD). @@ -174,9 +175,10 @@ class InfoExtractor(object): _type "url" indicates that the video must be extracted from another location, possibly by a different extractor. Its only required key is: "url" - the next URL to extract. - - Additionally, it may have properties believed to be identical to the - resolved entity, for example "title" if the title of the referred video is + The key "ie_key" can be set to the class name (minus the trailing "IE", + e.g. "Youtube") if the extractor class is known in advance. + Additionally, the dictionary may have any properties of the resolved entity + known in advance, for example "title" if the title of the referred video is known ahead of time. @@ -390,6 +392,10 @@ class InfoExtractor(object): url_or_request, video_id, note, errnote, fatal=fatal) if (not fatal) and json_string is False: return None + return self._parse_json( + json_string, video_id, transform_source=transform_source, fatal=fatal) + + def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): if transform_source: json_string = transform_source(json_string) try: @@ -439,7 +445,7 @@ class InfoExtractor(object): return video_info @staticmethod - def playlist_result(entries, playlist_id=None, playlist_title=None): + def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None): """Returns a playlist""" video_info = {'_type': 'playlist', 'entries': entries} @@ -447,6 +453,8 @@ class InfoExtractor(object): video_info['id'] = playlist_id if playlist_title: video_info['title'] = playlist_title + if playlist_description: + video_info['description'] = playlist_description return video_info def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): @@ -790,6 +798,49 @@ class InfoExtractor(object): self._sort_formats(formats) return formats + # TODO: improve extraction + def _extract_smil_formats(self, smil_url, video_id): + smil = self._download_xml( + smil_url, video_id, 'Downloading SMIL file', + 'Unable to download SMIL file') + + base = smil.find('./head/meta').get('base') + + formats = [] + rtmp_count = 0 + for video in smil.findall('./body/switch/video'): + src = video.get('src') + if not src: + continue + bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) + width = int_or_none(video.get('width')) + height = int_or_none(video.get('height')) + proto = video.get('proto') + if not proto: + if base: + if base.startswith('rtmp'): + proto = 'rtmp' + elif base.startswith('http'): + proto = 'http' + ext = video.get('ext') + if proto == 'm3u8': + formats.extend(self._extract_m3u8_formats(src, video_id, ext)) + elif proto == 'rtmp': + rtmp_count += 1 + streamer = video.get('streamer') or base + formats.append({ + 'url': streamer, + 'play_path': src, + 'ext': 'flv', + 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), + 'tbr': bitrate, + 'width': width, + 'height': height, + }) + self._sort_formats(formats) + + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 2139f68aa..1ad4e77a8 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -13,9 +13,10 @@ from ..compat import ( compat_urllib_request, ) from ..utils import ( - urlencode_postdata, ExtractorError, + int_or_none, limit_length, + urlencode_postdata, ) @@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '637842556329505', 'ext': 'mp4', - 'duration': 38, 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', } }, { @@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor): self._login() def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) url = 'https://www.facebook.com/video/video.php?v=%s' % video_id webpage = self._download_webpage(url, video_id) @@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor): 'id': video_id, 'title': video_title, 'url': video_url, - 'duration': int(video_data['video_duration']), - 'thumbnail': video_data['thumbnail_src'], + 'duration': int_or_none(video_data.get('video_duration')), + 'thumbnail': video_data.get('thumbnail_src'), } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 328301de3..2b4d8c62f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -467,8 +467,17 @@ class GenericIE(InfoExtractor): 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' ] - } - + }, + # Cinchcast embed + { + 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', + 'info_dict': { + 'id': '7141703', + 'ext': 'mp3', + 'upload_date': '20141126', + 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', + } + }, ] def report_following_redirect(self, new_url): @@ -962,6 +971,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'SBS') + # Look for embedded Cinchcast player + mobj = re.search( + r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Cinchcast') + mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', webpage) diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index fccc23884..e97339121 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -1,12 +1,12 @@ from __future__ import unicode_literals -import re -import json -import random -import string - from .common import InfoExtractor -from ..utils import find_xpath_attr +from ..utils import ( + find_xpath_attr, + int_or_none, + js_to_json, + unescapeHTML, +) class HowStuffWorksIE(InfoExtractor): @@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor): 'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm', 'info_dict': { 'id': '450221', - 'display_id': 'cool-jobs-iditarod-musher', 'ext': 'flv', 'title': 'Cool Jobs - Iditarod Musher', - 'description': 'md5:82bb58438a88027b8186a1fccb365f90', + 'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.', + 'display_id': 'cool-jobs-iditarod-musher', 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 161, }, - 'params': { - # md5 is not consistent - 'skip_download': True - } }, { 'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm', 'info_dict': { 'id': '453464', - 'display_id': 'survival-zone-food-and-water-in-the-savanna', 'ext': 'mp4', 'title': 'Survival Zone: Food and Water In the Savanna', - 'description': 'md5:7e1c89f6411434970c15fa094170c371', + 'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.', + 'display_id': 'survival-zone-food-and-water-in-the-savanna', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # md5 is not consistent - 'skip_download': True - } }, { 'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm', 'info_dict': { 'id': '440011', - 'display_id': 'sword-swallowing-1-by-dan-meyer', 'ext': 'flv', 'title': 'Sword Swallowing #1 by Dan Meyer', - 'description': 'md5:b2409e88172913e2e7d3d1159b0ef735', + 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>', + 'display_id': 'sword-swallowing-1-by-dan-meyer', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # md5 is not consistent - 'skip_download': True - } }, ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + clip_js = self._search_regex( + r'(?s)var clip = ({.*?});', webpage, 'clip info') + clip_info = self._parse_json( + clip_js, display_id, transform_source=js_to_json) - content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id') - - mp4 = self._search_regex( - r'''(?xs)var\s+clip\s*=\s*{\s* - .+?\s* - content_id\s*:\s*%s\s*,\s* - .+?\s* - mp4\s*:\s*\[(.*?),?\]\s* - };\s* - videoData\.push\(clip\);''' % content_id, - webpage, 'mp4', fatal=False, default=None) - - smil = self._download_xml( - 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id, - content_id, 'Downloading video SMIL') - - http_base = find_xpath_attr( - smil, - './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), - 'name', - 'httpBase').get('content') - - def random_string(str_len=0): - return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)]) - - URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12)) - + video_id = clip_info['content_id'] formats = [] + m3u8_url = clip_info.get('m3u8') + if m3u8_url: + formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + for video in clip_info.get('mp4', []): + formats.append({ + 'url': video['src'], + 'format_id': video['bitrate'], + 'vbr': int(video['bitrate'].rstrip('k')), + }) + + if not formats: + smil = self._download_xml( + 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, + video_id, 'Downloading video SMIL') + + http_base = find_xpath_attr( + smil, + './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), + 'name', + 'httpBase').get('content') + + URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' - if mp4: - for video in json.loads('[%s]' % mp4): - bitrate = video['bitrate'] - fmt = { - 'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX, - 'format_id': bitrate, - } - m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate) - if m: - fmt['vbr'] = int(m.group('vbr')) - formats.append(fmt) - else: for video in smil.findall( - './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): - vbr = int(video.attrib['system-bitrate']) / 1000 + './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): + vbr = int_or_none(video.attrib['system-bitrate'], scale=1000) formats.append({ 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), 'format_id': '%dk' % vbr, @@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor): self._sort_formats(formats) - title = self._og_search_title(webpage) - TITLE_SUFFIX = ' : HowStuffWorks' - if title.endswith(TITLE_SUFFIX): - title = title[:-len(TITLE_SUFFIX)] - - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - return { - 'id': content_id, + 'id': '%s' % video_id, 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'title': unescapeHTML(clip_info['clip_title']), + 'description': unescapeHTML(clip_info.get('caption')), + 'thumbnail': clip_info.get('video_still_url'), + 'duration': clip_info.get('duration'), 'formats': formats, } diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index bb8937c4d..55cc33a3e 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor): raise ExtractorError('Unable to extract track url') PREFIX = ( - r'<div class="cloudcast-play-button-container[^"]*?"' + r'<span class="play-button[^"]*?"' r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') title = self._html_search_regex( PREFIX + r'm-title="([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 0244368e9..b2f40344f 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re import json +import os from .common import InfoExtractor from ..compat import ( @@ -26,7 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor): initial_video_url = info['publishPoint'] if info['formats'] == '1': parsed_url = compat_urllib_parse_urlparse(initial_video_url) - path = parsed_url.path.replace('.', '_sd.', 1) + filename, ext = os.path.splitext(parsed_url.path) + path = '%s_sd%s' % (filename, ext) data = compat_urllib_parse.urlencode({ 'type': 'fvod', 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py index 13c8d79cd..ee740cd9c 100644 --- a/youtube_dl/extractor/ntv.py +++ b/youtube_dl/extractor/ntv.py @@ -130,7 +130,7 @@ class NTVIE(InfoExtractor): 'rtmp_conn': 'B:1', 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128', 'page_url': 'http://www.ntv.ru', - 'flash_ver': 'LNX 11,2,202,341', + 'flash_version': 'LNX 11,2,202,341', 'rtmp_live': True, 'ext': 'flv', 'filesize': int(size.text), diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index bac484c67..954dfccb7 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -8,7 +8,6 @@ from ..utils import ( int_or_none, js_to_json, qualities, - determine_ext, ) @@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor): thumbnail = self._search_regex( r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) - quality = qualities(['SD', 'HD']) - formats = [{ - 'url': source['file'], - 'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])), - 'quality': quality(source['label']), - } for source in json.loads(js_to_json(self._search_regex( - r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))] + quality = qualities(['sd', 'hd']) + sources = json.loads(js_to_json(self._search_regex( + r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) + formats = [] + for container, s in sources.items(): + for qname, video_url in s.items(): + formats.append({ + 'url': video_url, + 'container': container, + 'format_id': '%s-%s' % (container, qname), + 'quality': quality(qname), + }) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 32d747ede..1262793c8 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor): 'ext': 'mp4', 'title': 'Im Interview: Kai Wiesinger', 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', - 'upload_date': '20140225', + 'upload_date': '20140203', 'duration': 522.56, }, 'params': { @@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor): 'ext': 'mp4', 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', 'description': 'md5:2669cde3febe9bce13904f701e774eb6', - 'upload_date': '20140225', + 'upload_date': '20141014', 'duration': 2410.44, }, 'params': { @@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', + 'info_dict': { + 'id': '439664', + 'title': 'Episode 8 - Ganze Folge - Playlist', + 'description': 'md5:63b8963e71f481782aeea877658dec84', + }, + 'playlist_count': 2, + }, ] _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', + r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", ] _TITLE_REGEXES = [ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', @@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor): r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', ] + _PAGE_TYPE_REGEXES = [ + r'<meta name="page_type" content="([^"]+)">', + r"'itemType'\s*:\s*'([^']*)'", + ] + _PLAYLIST_ID_REGEXES = [ + r'content[iI]d=(\d+)', + r"'itemId'\s*:\s*'([^']*)'", + ] + _PLAYLIST_CLIP_REGEXES = [ + r'(?s)data-qvt=.+?<a href="([^"]+)"', + ] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - + def _extract_clip(self, url, webpage): clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') access_token = 'testclient' @@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor): 'duration': duration, 'formats': formats, } + + def _extract_playlist(self, url, webpage): + playlist_id = self._html_search_regex( + self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') + for regex in self._PLAYLIST_CLIP_REGEXES: + playlist_clips = re.findall(regex, webpage) + if playlist_clips: + title = self._html_search_regex( + self._TITLE_REGEXES, webpage, 'title') + description = self._html_search_regex( + self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) + entries = [ + self.url_result( + re.match('(.+?//.+?)/', url).group(1) + clip_path, + 'ProSiebenSat1') + for clip_path in playlist_clips] + return self.playlist_result(entries, playlist_id, title, description) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + page_type = self._search_regex( + self._PAGE_TYPE_REGEXES, webpage, + 'page type', default='clip').lower() + if page_type == 'clip': + return self._extract_clip(url, webpage) + elif page_type == 'playlist': + return self._extract_playlist(url, webpage) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py new file mode 100644 index 000000000..f95bc9454 --- /dev/null +++ b/youtube_dl/extractor/radiode.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor + + +class RadioDeIE(InfoExtractor): + IE_NAME = 'radio.de' + _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' + _TEST = { + 'url': 'http://ndr2.radio.de/', + 'md5': '3b4cdd011bc59174596b6145cda474a4', + 'info_dict': { + 'id': 'ndr2', + 'ext': 'mp3', + 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:591c49c702db1a33751625ebfb67f273', + 'thumbnail': 're:^https?://.*\.png', + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + radio_id = self._match_id(url) + + webpage = self._download_webpage(url, radio_id) + + broadcast = json.loads(self._search_regex( + r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', + webpage, 'broadcast')) + + title = self._live_title(broadcast['name']) + description = broadcast.get('description') or broadcast.get('shortDescription') + thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') + + formats = [{ + 'url': stream['streamUrl'], + 'ext': stream['streamContentFormat'].lower(), + 'acodec': stream['streamContentFormat'], + 'abr': stream['bitRate'], + 'asr': stream['sampleRate'] + } for stream in broadcast['streamUrls']] + self._sort_formats(formats) + + return { + 'id': radio_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'is_live': True, + 'formats': formats, + } diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/screenwavemedia.py index b7fa73c3b..6c9fdb7c1 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -5,61 +5,27 @@ import re from .common import InfoExtractor from ..utils import ( - ExtractorError, int_or_none, + unified_strdate, ) -class CinemassacreIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' - _TESTS = [ - { - 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - 'md5': 'fde81fbafaee331785f58cd6c0d46190', - 'info_dict': { - 'id': '19911', - 'ext': 'mp4', - 'upload_date': '20121110', - 'title': '“Angry Video Game Nerd: The Movie” – Trailer', - 'description': 'md5:fb87405fcb42a331742a0dce2708560b', - }, - }, - { - 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - 'md5': 'd72f10cd39eac4215048f62ab477a511', - 'info_dict': { - 'id': '521be8ef82b16', - 'ext': 'mp4', - 'upload_date': '20131002', - 'title': 'The Mummy’s Hand (1940)', - }, - } - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') - mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) - if not mobj: - raise ExtractorError('Can\'t extract embed url and video id') - playerdata_url = mobj.group('embed_url') - video_id = mobj.group('video_id') - full_video_id = mobj.group('full_video_id') +class ScreenwaveMediaIE(InfoExtractor): + _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)' - video_title = self._html_search_regex( - r'<title>(?P<title>.+?)\|', webpage, 'title') - video_description = self._html_search_regex( - r'<div class="entry-content">(?P<description>.+?)</div>', - webpage, 'description', flags=re.DOTALL, fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) + _TESTS = [{ + 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', + 'only_matching': True, + }] - playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') + def _real_extract(self, url): + video_id = self._match_id(url) + playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') + vidtitle = self._search_regex( + r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') vidurl = self._search_regex( - r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') + r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/') videolist_url = None @@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor): if mobj: videoserver = mobj.group('videoserver') mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) - vidid = mobj.group('vidid') if mobj else full_video_id + vidid = mobj.group('vidid') if mobj else video_id videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) else: mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) @@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor): file_ = src.partition(':')[-1] width = int_or_none(video.get('width')) height = int_or_none(video.get('height')) - bitrate = int_or_none(video.get('system-bitrate')) + bitrate = int_or_none(video.get('system-bitrate'), scale=1000) format = { 'url': baseurl + file_, 'format_id': src.rpartition('.')[0].rpartition('_')[-1], } if width or height: format.update({ - 'tbr': bitrate // 1000 if bitrate else None, + 'tbr': bitrate, 'width': width, 'height': height, }) else: format.update({ - 'abr': bitrate // 1000 if bitrate else None, + 'abr': bitrate, 'vcodec': 'none', }) formats.append(format) - self._sort_formats(formats) else: formats = [{ 'url': vidurl, }] + self._sort_formats(formats) return { 'id': video_id, - 'title': video_title, + 'title': vidtitle, 'formats': formats, + } + + +class CinemassacreIE(InfoExtractor): + _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' + _TESTS = [ + { + 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', + 'md5': 'fde81fbafaee331785f58cd6c0d46190', + 'info_dict': { + 'id': 'Cinemassacre-19911', + 'ext': 'mp4', + 'upload_date': '20121110', + 'title': '“Angry Video Game Nerd: The Movie” – Trailer', + 'description': 'md5:fb87405fcb42a331742a0dce2708560b', + }, + }, + { + 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', + 'md5': 'd72f10cd39eac4215048f62ab477a511', + 'info_dict': { + 'id': 'Cinemassacre-521be8ef82b16', + 'ext': 'mp4', + 'upload_date': '20131002', + 'title': 'The Mummy’s Hand (1940)', + }, + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') + + webpage = self._download_webpage(url, display_id) + + playerdata_url = self._search_regex( + r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + webpage, 'player data URL') + video_title = self._html_search_regex( + r'<title>(?P<title>.+?)\|', webpage, 'title') + video_description = self._html_search_regex( + r'<div class="entry-content">(?P<description>.+?)</div>', + webpage, 'description', flags=re.DOTALL, fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + 'url': playerdata_url, + } + + +class TeamFourIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' + _TEST = { + 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', + 'info_dict': { + 'id': 'TeamFourStar-5292a02f20bfa', + 'ext': 'mp4', + 'upload_date': '20130401', + 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', + 'title': 'A Moment With TFS Episode 4', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + playerdata_url = self._search_regex( + r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + webpage, 'player data URL') + + video_title = self._html_search_regex( + r'<div class="heroheadingtitle">(?P<title>.+?)</div>', + webpage, 'title') + video_date = unified_strdate(self._html_search_regex( + r'<div class="heroheadingdate">(?P<date>.+?)</div>', + webpage, 'date', fatal=False)) + video_description = self._html_search_regex( + r'(?s)<div class="postcontent">(?P<description>.+?)</div>', + webpage, 'description', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, 'description': video_description, 'upload_date': video_date, 'thumbnail': video_thumbnail, + 'url': playerdata_url, } diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 0751efc61..646af3cc9 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor): broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: - raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True) + raise ExtractorError( + 'Broadcast %s does not exist' % broadcast_id, expected=True) # Adult content if re.search('EroConfirmText">', broadcast_page) is not None: (username, password) = self._get_login_info() if username is None: - raise ExtractorError('Erotic broadcasts allowed only for registered users, ' - 'use --username and --password options to provide account credentials.', expected=True) + raise ExtractorError( + 'Erotic broadcasts allowed only for registered users, ' + 'use --username and --password options to provide account credentials.', + expected=True) login_form = { 'login-hint53': '1', @@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor): 'password': password, } - request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) + request = compat_urllib_request.Request( + broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') - broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age') + broadcast_page = self._download_webpage( + request, broadcast_id, 'Logging in and confirming age') if re.search('>Неверный логин или пароль<', broadcast_page) is not None: raise ExtractorError('Unable to log in: bad username or password', expected=True) @@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor): adult_content = False ticket = self._html_search_regex( - 'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);', + r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)", broadcast_page, 'broadcast ticket') url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket @@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor): if broadcast_password: url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() - broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON') + broadcast_json_page = self._download_webpage( + url, broadcast_id, 'Downloading broadcast JSON') try: broadcast_json = json.loads(broadcast_json_page) protected_broadcast = broadcast_json['_pass_protected'] == 1 if protected_broadcast and not broadcast_password: - raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True) + raise ExtractorError( + 'This broadcast is protected by a password, use the --video-password option', + expected=True) broadcast_offline = broadcast_json['is_play'] == 0 if broadcast_offline: raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) rtmp_url = broadcast_json['_server'] - if not rtmp_url.startswith('rtmp://'): + mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url) + if not mobj: raise ExtractorError('Unexpected broadcast rtmp URL') broadcast_playpath = broadcast_json['_streamName'] + broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) broadcast_thumbnail = broadcast_json['_imgURL'] - broadcast_title = broadcast_json['title'] + broadcast_title = self._live_title(broadcast_json['title']) broadcast_description = broadcast_json['description'] broadcaster_nick = broadcast_json['nick'] broadcaster_login = broadcast_json['login'] @@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor): 'age_limit': 18 if adult_content else 0, 'ext': 'flv', 'play_path': broadcast_playpath, + 'player_url': 'http://pics.smotri.com/broadcast_play.swf', + 'app': broadcast_app, 'rtmp_live': True, - 'rtmp_conn': rtmp_conn + 'rtmp_conn': rtmp_conn, + 'is_live': True, } diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index d81d1d1a6..ba65996dc 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,32 +1,30 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( float_or_none, - str_to_int, + parse_age_limit, ) class TvigleIE(InfoExtractor): IE_NAME = 'tvigle' IE_DESC = 'Интернет-телевидение Tvigle.ru' - _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$' + _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$' _TESTS = [ { - 'url': 'http://www.tvigle.ru/video/brat/', - 'md5': 'ff4344a4894b0524441fb6f8218dc716', + 'url': 'http://www.tvigle.ru/video/sokrat/', + 'md5': '36514aed3657d4f70b4b2cef8eb520cd', 'info_dict': { - 'id': '5118490', - 'display_id': 'brat', - 'ext': 'mp4', - 'title': 'Брат', - 'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb', - 'duration': 5722.6, - 'age_limit': 16, + 'id': '1848932', + 'display_id': 'sokrat', + 'ext': 'flv', + 'title': 'Сократ', + 'description': 'md5:a05bd01be310074d5833efc6743be95e', + 'duration': 6586, + 'age_limit': 0, }, }, { @@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) @@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor): title = item['title'] description = item['description'] thumbnail = item['thumbnail'] - duration = float_or_none(item['durationMilliseconds'], 1000) - age_limit = str_to_int(item['ageRestrictions']) + duration = float_or_none(item.get('durationMilliseconds'), 1000) + age_limit = parse_age_limit(item.get('ageRestrictions')) formats = [] for vcodec, fmts in item['videos'].items(): diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index eb9473754..9a53a3c74 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - ExtractorError, parse_iso8601, qualities, ) @@ -182,8 +181,8 @@ class TVPlayIE(InfoExtractor): 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') if video['is_geo_blocked']: - raise ExtractorError( - 'This content is not available in your country due to copyright reasons', expected=True) + self.report_warning( + 'This content might not be available in your country due to copyright reasons') streams = self._download_json( 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 36aa1ad6e..397d167e8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -5,6 +6,8 @@ import re from .common import InfoExtractor from ..utils import ( + compat_urllib_parse, + compat_urllib_request, ExtractorError, parse_iso8601, ) @@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor): """ _PAGE_LIMIT = 100 _API_BASE = 'https://api.twitch.tv' + _LOGIN_URL = 'https://secure.twitch.tv/user/login' _TESTS = [{ 'url': 'http://www.twitch.tv/riotgames/b/577357806', 'info_dict': { @@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor): 'view_count': info['views'], } + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + authenticity_token = self._search_regex( + r'<input name="authenticity_token" type="hidden" value="([^"]+)"', + login_page, 'authenticity token') + + login_form = { + 'utf8': '✓'.encode('utf-8'), + 'authenticity_token': authenticity_token, + 'redirect_on_login': '', + 'embed_form': 'false', + 'mp_source_action': '', + 'follow': '', + 'user[login]': username, + 'user[password]': password, + } + + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + request.add_header('Referer', self._LOGIN_URL) + response = self._download_webpage( + request, None, 'Logging in as %s' % username) + + m = re.search( + r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response) + if m: + raise ExtractorError( + 'Unable to login: %s' % m.group('msg').strip(), expected=True) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj.group('chapterid'): diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 4970b2f23..0b58fe0fe 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -17,6 +17,7 @@ class VineIE(InfoExtractor): 'id': 'b9KOOWX7HUx', 'ext': 'mp4', 'title': 'Chicken.', + 'alt_title': 'Vine by Jack Dorsey', 'description': 'Chicken.', 'upload_date': '20130519', 'uploader': 'Jack Dorsey', @@ -25,30 +26,26 @@ class VineIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) data = json.loads(self._html_search_regex( r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) - formats = [ - { - 'url': data['videoLowURL'], - 'ext': 'mp4', - 'format_id': 'low', - }, - { - 'url': data['videoUrl'], - 'ext': 'mp4', - 'format_id': 'standard', - } - ] + formats = [{ + 'url': data['videoLowURL'], + 'ext': 'mp4', + 'format_id': 'low', + }, { + 'url': data['videoUrl'], + 'ext': 'mp4', + 'format_id': 'standard', + }] return { 'id': video_id, 'title': self._og_search_title(webpage), + 'alt_title': self._og_search_description(webpage), 'description': data['description'], 'thumbnail': data['thumbnailUrl'], 'upload_date': unified_strdate(data['created']), diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8b6e591a4..7b6179a2a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter -from ..utils import ( +from ..compat import ( compat_chr, compat_parse_qs, compat_urllib_parse, compat_urllib_request, compat_urlparse, compat_str, - +) +from ..utils import ( clean_html, - get_element_by_id, - get_element_by_attribute, ExtractorError, + get_element_by_attribute, + get_element_by_id, int_or_none, OnDemandPagedList, + orderedSet, unescapeHTML, unified_strdate, - orderedSet, uppercase_escape, ) @@ -417,6 +418,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'upload_date': '20140605', }, }, + # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + { + 'url': '__2ABJjxzNo', + 'info_dict': { + 'id': '__2ABJjxzNo', + 'ext': 'mp4', + 'upload_date': '20100430', + 'uploader_id': 'deadmau5', + 'description': 'md5:12c56784b8032162bb936a5f76d55360', + 'uploader': 'deadmau5', + 'title': 'Deadmau5 - Some Chords (HD)', + }, + 'expected_warnings': [ + 'DASH manifest missing', + ] + }, + # Olympics (https://github.com/rg3/youtube-dl/issues/4431) + { + 'url': 'lqQg6PlCWgI', + 'info_dict': { + 'id': 'lqQg6PlCWgI', + 'ext': 'mp4', + 'upload_date': '20120731', + 'uploader_id': 'olympic', + 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', + 'uploader': 'Olympics', + 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', + }, + 'params': { + 'skip_download': 'requires avconv', + } + }, ] def __init__(self, *args, **kwargs): @@ -666,6 +699,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + def _parse_dash_manifest( + self, video_id, dash_manifest_url, player_url, age_gate): + def decrypt_sig(mobj): + s = mobj.group(1) + dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) + return '/signature/%s' % dec_s + dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) + dash_doc = self._download_xml( + dash_manifest_url, video_id, + note='Downloading DASH manifest', + errnote='Could not download DASH manifest') + + formats = [] + for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + f.update(self._formats.get(format_id, {})) + formats.append(f) + else: + existing_format.update(f) + return formats + def _real_extract(self, url): proto = ( 'http' if self._downloader.params.get('prefer_insecure', False) @@ -800,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): m_cat_container = self._search_regex( r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', - video_webpage, 'categories', fatal=False) + video_webpage, 'categories', default=None) if m_cat_container: category = self._html_search_regex( r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', @@ -878,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'url': video_info['conn'][0], 'player_url': player_url, }] - elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: + elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1: encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True) @@ -943,51 +1016,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Look for the DASH manifest if self._downloader.params.get('youtube_include_dash_manifest', True): - try: - # The DASH manifest used needs to be the one from the original video_webpage. - # The one found in get_video_info seems to be using different signatures. - # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. - # Luckily, it seems, this case uses some kind of default signature (len == 86), so the - # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. - dash_manifest_url = video_info.get('dashmpd')[0] - - def decrypt_sig(mobj): - s = mobj.group(1) - dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) - return '/signature/%s' % dec_s - dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) - dash_doc = self._download_xml( - dash_manifest_url, video_id, - note='Downloading DASH manifest', - errnote='Could not download DASH manifest') - for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue - format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) - f = { - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(r.attrib.get('width')), - 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), - 'asr': int_or_none(r.attrib.get('audioSamplingRate')), - 'filesize': filesize, - 'fps': int_or_none(r.attrib.get('frameRate')), - } - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == format_id) - except StopIteration: - f.update(self._formats.get(format_id, {})) - formats.append(f) - else: - existing_format.update(f) - - except (ExtractorError, KeyError) as e: - self.report_warning('Skipping DASH manifest: %r' % e, video_id) + dash_mpd = video_info.get('dashmpd') + if dash_mpd: + dash_manifest_url = dash_mpd[0] + try: + dash_formats = self._parse_dash_manifest( + video_id, dash_manifest_url, player_url, age_gate) + except (ExtractorError, KeyError) as e: + self.report_warning( + 'Skipping DASH manifest: %r' % e, video_id) + else: + formats.extend(dash_formats) self._sort_formats(formats) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 9ff00e26c..74c76a9a0 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + OnDemandPagedList, ) @@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url): class ZDFIE(InfoExtractor): - _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' + _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' _TEST = { 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', @@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id return extract_from_xml_url(self, video_id, xml_url) + + +class ZDFChannelIE(InfoExtractor): + _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', + 'info_dict': { + 'id': '1586442', + }, + 'playlist_count': 4, + } + _PAGE_SIZE = 50 + + def _fetch_page(self, channel_id, page): + offset = page * self._PAGE_SIZE + xml_url = ( + 'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s' + % (offset, self._PAGE_SIZE, channel_id)) + doc = self._download_xml( + xml_url, channel_id, + note='Downloading channel info', + errnote='Failed to download channel info') + + title = doc.find('.//information/title').text + description = doc.find('.//information/detail').text + for asset in doc.findall('.//teasers/teaser'): + a_type = asset.find('./type').text + a_id = asset.find('./details/assetId').text + if a_type not in ('video', 'topic'): + continue + yield { + '_type': 'url', + 'playlist_title': title, + 'playlist_description': description, + 'url': 'zdf:%s:%s' % (a_type, a_id), + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + entries = OnDemandPagedList( + functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE) + + return { + '_type': 'playlist', + 'id': channel_id, + 'entries': entries, + } diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 09db43611..75c0f7bbe 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor): def run(self, information): cmd = self.exec_cmd - if not '{}' in cmd: + if '{}' not in cmd: cmd += ' {}' cmd = cmd.replace('{}', shlex_quote(information['filepath'])) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 4c07a558e..2d2703368 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -79,7 +79,7 @@ def update_self(to_screen, verbose): to_screen(compat_str(traceback.format_exc())) to_screen('ERROR: can\'t obtain versions info. Please try again later.') return - if not 'signature' in versions_info: + if 'signature' not in versions_info: to_screen('ERROR: the versions file is not signed or corrupted. Aborting.') return signature = versions_info['signature'] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2b0f4e589..51a822e4f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -166,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False): xpath = xpath.encode('ascii') n = node.find(xpath) - if n is None: + if n is None or n.text is None: if fatal: name = xpath if name is None else name raise ExtractorError('Could not find XML element %s' % name) @@ -644,17 +644,19 @@ def parse_iso8601(date_str, delimiter='T'): return calendar.timegm(dt.timetuple()) -def unified_strdate(date_str): +def unified_strdate(date_str, day_first=True): """Return a string with the date in the format YYYYMMDD""" if date_str is None: return None - upload_date = None # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + # Remove AM/PM + timezone + date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str) + format_expressions = [ '%d %B %Y', '%d %b %Y', @@ -669,7 +671,6 @@ def unified_strdate(date_str): '%d/%m/%Y', '%d/%m/%y', '%Y/%m/%d %H:%M:%S', - '%d/%m/%Y %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', '%d.%m.%Y %H:%M', @@ -681,6 +682,14 @@ def unified_strdate(date_str): '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', ] + if day_first: + format_expressions.extend([ + '%d/%m/%Y %H:%M:%S', + ]) + else: + format_expressions.extend([ + '%m/%d/%Y %H:%M:%S', + ]) for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -712,8 +721,10 @@ def date_from_str(date_str): Return a datetime object from a string in the format YYYYMMDD or (now|today)[+-][0-9](day|week|month|year)(s)?""" today = datetime.date.today() - if date_str == 'now'or date_str == 'today': + if date_str in ('now', 'today'): return today + if date_str == 'yesterday': + return today - datetime.timedelta(days=1) match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str) if match is not None: sign = match.group('sign') @@ -1024,7 +1035,7 @@ def smuggle_url(url, data): def unsmuggle_url(smug_url, default=None): - if not '#__youtubedl_smuggle' in smug_url: + if '#__youtubedl_smuggle' not in smug_url: return smug_url, default url, _, sdata = smug_url.rpartition('#') jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3ac525557..7289ea4d2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2014.12.06.1' +__version__ = '2014.12.12.1' |