diff options
author | Yen Chi Hsuan <[email protected]> | 2016-07-11 00:52:25 +0800 |
---|---|---|
committer | Yen Chi Hsuan <[email protected]> | 2016-07-11 00:52:25 +0800 |
commit | e2dbcaa1bf65aa502718005d5fbd00189618469f (patch) | |
tree | 49bfc28c85b142abf2cfd939f1cb988906c405ce | |
parent | ae0185016521e6fd284c87e2b138c0a8aca8a849 (diff) | |
download | youtube-dl-e2dbcaa1bf65aa502718005d5fbd00189618469f.tar.gz youtube-dl-e2dbcaa1bf65aa502718005d5fbd00189618469f.zip |
[vuclip] Fix extraction
-rw-r--r-- | youtube_dl/extractor/vuclip.py | 43 |
1 files changed, 15 insertions, 28 deletions
diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index eaa888f00..b73da5cd0 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -9,7 +9,7 @@ from ..compat import ( from ..utils import ( ExtractorError, parse_duration, - qualities, + remove_end, ) @@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor): 'id': '922692425', 'ext': '3gp', 'title': 'The Toy Soldiers - Hollywood Movie Trailer', - 'duration': 180, + 'duration': 177, } } @@ -46,34 +46,21 @@ class VuClipIE(InfoExtractor): '%s said: %s' % (self.IE_NAME, error_msg), expected=True) # These clowns alternate between two page types - links_code = self._search_regex( - r'''(?xs) - (?: - <img\s+src="[^"]*/play.gif".*?>| - <!--\ player\ end\ -->\s*</div><!--\ thumb\ end--> - ) - (.*?) - (?: - <a\s+href="fblike|<div\s+class="social"> - ) - ''', webpage, 'links') - title = self._html_search_regex( - r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip() + video_url = self._search_regex( + r'<a[^>]+href="([^"]+)"[^>]*><img[^>]+src="[^"]*/play\.gif', + webpage, 'video URL', default=None) + if video_url: + formats = [{ + 'url': video_url, + }] + else: + formats = self._parse_html5_media_entries(url, webpage)[0]['formats'] - quality_order = qualities(['Reg', 'Hi']) - formats = [] - for url, q in re.findall( - r'<a\s+href="(?P<url>[^"]+)".*?>(?:<button[^>]*>)?(?P<q>[^<]+)(?:</button>)?</a>', links_code): - format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q - formats.append({ - 'format_id': format_id, - 'url': url, - 'quality': quality_order(q), - }) - self._sort_formats(formats) + title = remove_end(self._html_search_regex( + r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip(), ' - Video') - duration = parse_duration(self._search_regex( - r'\(([0-9:]+)\)</span>', webpage, 'duration', fatal=False)) + duration = parse_duration(self._html_search_regex( + r'[(>]([0-9]+:[0-9]+)(?:<span|\))', webpage, 'duration', fatal=False)) return { 'id': video_id, |