diff options
author | Filippo Valsorda <[email protected]> | 2018-01-07 15:03:28 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-01-07 15:03:28 +0100 |
commit | 97bc05116e15df3cf1b7b7216c60fad7ee0acc9f (patch) | |
tree | 453059413184d22a44241d251a82196c5d2461eb /youtube_dl/extractor/xfileshare.py | |
parent | 7608a91ee7b40c74a35c30b44731606382702592 (diff) | |
parent | 0a5b1295b7c1aa6395b65ee137087c540b37b32b (diff) | |
download | youtube-dl-97bc05116e15df3cf1b7b7216c60fad7ee0acc9f.tar.gz youtube-dl-97bc05116e15df3cf1b7b7216c60fad7ee0acc9f.zip |
Merge branch 'master' into totalwebcastingtotalwebcasting
Diffstat (limited to 'youtube_dl/extractor/xfileshare.py')
-rw-r--r-- | youtube_dl/extractor/xfileshare.py | 107 |
1 files changed, 71 insertions, 36 deletions
diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index e616adce3..ad747978d 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -6,31 +6,36 @@ import re from .common import InfoExtractor from ..utils import ( decode_packed_codes, + determine_ext, ExtractorError, int_or_none, NO_DEFAULT, - sanitized_Request, urlencode_postdata, ) class XFileShareIE(InfoExtractor): _SITES = ( - ('daclips.in', 'DaClips'), - ('filehoot.com', 'FileHoot'), - ('gorillavid.in', 'GorillaVid'), - ('movpod.in', 'MovPod'), - ('powerwatch.pw', 'PowerWatch'), - ('rapidvideo.ws', 'Rapidvideo.ws'), - ('thevideobee.to', 'TheVideoBee'), - ('vidto.me', 'Vidto'), - ('streamin.to', 'Streamin.To'), - ('xvidstage.com', 'XVIDSTAGE'), + (r'daclips\.(?:in|com)', 'DaClips'), + (r'filehoot\.com', 'FileHoot'), + (r'gorillavid\.(?:in|com)', 'GorillaVid'), + (r'movpod\.in', 'MovPod'), + (r'powerwatch\.pw', 'PowerWatch'), + (r'rapidvideo\.ws', 'Rapidvideo.ws'), + (r'thevideobee\.to', 'TheVideoBee'), + (r'vidto\.me', 'Vidto'), + (r'streamin\.to', 'Streamin.To'), + (r'xvidstage\.com', 'XVIDSTAGE'), + (r'vidabc\.com', 'Vid ABC'), + (r'vidbom\.com', 'VidBom'), + (r'vidlo\.us', 'vidlo'), + (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'), + (r'fastvideo\.me', 'FastVideo.me'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' - % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) + % '|'.join(site for site in list(zip(*_SITES))[0])) _FILE_NOT_FOUND_REGEXES = ( r'>(?:404 - )?File Not Found<', @@ -95,6 +100,22 @@ class XFileShareIE(InfoExtractor): # removed by administrator 'url': 'http://xvidstage.com/amfy7atlkx25', 'only_matching': True, + }, { + 'url': 'http://vidabc.com/i8ybqscrphfv', + 'info_dict': { + 'id': 'i8ybqscrphfv', + 'ext': 'mp4', + 'title': 're:Beauty and the Beast 2017', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.rapidvideo.cool/b667kprndr8w', + 'only_matching': True, + }, { + 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html', + 'only_matching': True }] def _real_extract(self, url): @@ -116,12 +137,12 @@ class XFileShareIE(InfoExtractor): if countdown: self._sleep(countdown, video_id) - post = urlencode_postdata(fields) - - req = sanitized_Request(url, post) - req.add_header('Content-type', 'application/x-www-form-urlencoded') - - webpage = self._download_webpage(req, video_id, 'Downloading video page') + webpage = self._download_webpage( + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), headers={ + 'Referer': url, + 'Content-type': 'application/x-www-form-urlencoded', + }) title = (self._search_regex( (r'style="z-index: [0-9]+;">([^<]+)</span>', @@ -133,31 +154,45 @@ class XFileShareIE(InfoExtractor): webpage, 'title', default=None) or self._og_search_title( webpage, default=None) or video_id).strip() - def extract_video_url(default=NO_DEFAULT): - return self._search_regex( - (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,', - r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1', - r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)', - r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'), - webpage, 'file url', default=default, group='url') - - video_url = extract_video_url(default=None) - - if not video_url: + def extract_formats(default=NO_DEFAULT): + urls = [] + for regex in ( + r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1', + r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1', + r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)', + r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'): + for mobj in re.finditer(regex, webpage): + video_url = mobj.group('url') + if video_url not in urls: + urls.append(video_url) + formats = [] + for video_url in urls: + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': 'sd', + }) + if not formats and default is not NO_DEFAULT: + return default + self._sort_formats(formats) + return formats + + formats = extract_formats(default=None) + + if not formats: webpage = decode_packed_codes(self._search_regex( r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", webpage, 'packed code')) - video_url = extract_video_url() + formats = extract_formats() thumbnail = self._search_regex( r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) - formats = [{ - 'format_id': 'sd', - 'url': video_url, - 'quality': 1, - }] - return { 'id': video_id, 'title': title, |