From 4d54ef20a291eabc523119ca60ff05f0962771a4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 24 Aug 2014 04:47:18 +0200 Subject: [ministrygrid] Add extractor (Fixes #2900) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/generic.py | 13 +++++++- youtube_dl/extractor/ministrygrid.py | 57 ++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/ministrygrid.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 52354e6e0..9dccbb8ed 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -184,6 +184,7 @@ from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE +from .ministrygrid import MinistryGridIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import MixcloudIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8b2d1d033..7fce564c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -22,6 +22,7 @@ from ..utils import ( smuggle_url, unescapeHTML, unified_strdate, + unsmuggle_url, url_basename, ) from .brightcove import BrightcoveIE @@ -487,7 +488,14 @@ class GenericIE(InfoExtractor): else: assert ':' in default_search return self.url_result(default_search + url) - video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] + + url, smuggled_data = unsmuggle_url(url) + force_videoid = None + if smuggled_data and 'force_videoid' in smuggled_data: + force_videoid = smuggled_data['force_videoid'] + video_id = force_videoid + else: + video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] self.to_screen('%s: Requesting header' % video_id) @@ -498,6 +506,9 @@ class GenericIE(InfoExtractor): new_url = response.geturl() if url != new_url: self.report_following_redirect(new_url) + if force_videoid: + new_url = smuggle_url( + new_url, {'force_videoid': force_videoid}) return self.url_result(new_url) # Check for direct link to a video diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py new file mode 100644 index 000000000..949ad11db --- /dev/null +++ b/youtube_dl/extractor/ministrygrid.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + smuggle_url, +) + + +class MinistryGridIE(InfoExtractor): + _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P[^/#?]+)/?(?:$|[?#])' + + _TEST = { + 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', + 'md5': '844be0d2a1340422759c2a9101bab017', + 'info_dict': { + 'id': '3453494717001', + 'ext': 'mp4', + 'title': 'The Gospel by Numbers', + 'description': 'Coming soon from T4G 2014!', + 'uploader': 'LifeWay Christian Resources (MG)', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + portlets_json = self._search_regex( + r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list') + portlets = json.loads(portlets_json) + pl_id = self._search_regex( + r'