summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorPhilipp Hagemeister <[email protected]>2014-03-11 16:51:36 +0100
committerPhilipp Hagemeister <[email protected]>2014-03-11 16:51:36 +0100
commit60cc4dc4b49c6ebd4a86a4d7f998133474662eee (patch)
treeb244e93d9f9d2c2d65494361f0f0f1683d9a0e45
parentdb95dc13a1a8e5951d09d3fd555f7a4be590821f (diff)
downloadyoutube-dl-60cc4dc4b49c6ebd4a86a4d7f998133474662eee.tar.gz
youtube-dl-60cc4dc4b49c6ebd4a86a4d7f998133474662eee.zip
[generic/funnyordie] Add support for funnyordie embeds (Fixes #2546)
-rw-r--r--youtube_dl/extractor/funnyordie.py17
-rw-r--r--youtube_dl/extractor/generic.py19
2 files changed, 33 insertions, 3 deletions
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 7c40e6753..5522e4954 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -1,12 +1,13 @@
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
class FunnyOrDieIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
+ _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
_TEST = {
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
'file': '0732f586d7.mp4',
@@ -30,10 +31,20 @@ class FunnyOrDieIE(InfoExtractor):
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
webpage, 'video URL', flags=re.DOTALL)
+ if mobj.group('type') == 'embed':
+ post_json = self._search_regex(
+ r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
+ post = json.loads(post_json)['attachment']
+ title = post['name']
+ description = post.get('description')
+ else:
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
+ 'title': title,
+ 'description': description,
}
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 7666cf207..6e6324779 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -134,6 +134,17 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
+ # funnyordie embed
+ {
+ 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+ 'md5': '7cf780be104d40fea7bae52eed4a470e',
+ 'info_dict': {
+ 'id': '18e820ec3f',
+ 'ext': 'mp4',
+ 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+ 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+ }
+ },
]
def report_download_webpage(self, video_id):
@@ -432,6 +443,14 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
+ # Look for funnyordie embed
+ matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+ if matches:
+ urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
+ for eurl in matches]
+ return self.playlist_result(
+ urlrs, playlist_id=video_id, playlist_title=video_title)
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: