diff options
author | Sergey M․ <[email protected]> | 2018-09-03 02:53:26 +0700 |
---|---|---|
committer | Sergey M․ <[email protected]> | 2018-09-03 02:53:51 +0700 |
commit | 93284ff2ea4eb84c25b8d496012398a056ba89ac (patch) | |
tree | 67436e856e0c35e71cc3b46540d65225a017184e | |
parent | 0a9a8118ce834c206434df04c18187934acbf608 (diff) | |
download | youtube-dl-93284ff2ea4eb84c25b8d496012398a056ba89ac.tar.gz youtube-dl-93284ff2ea4eb84c25b8d496012398a056ba89ac.zip |
[radiojavan] Improve extraction (closes #17151)
-rw-r--r-- | youtube_dl/extractor/radiojavan.py | 30 |
1 files changed, 17 insertions, 13 deletions
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 4124bcd45..3f74f0c01 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -4,15 +4,16 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, + parse_resolution, str_to_int, + unified_strdate, urlencode_postdata, + urljoin, ) class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' - _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', @@ -31,23 +32,26 @@ class RadioJavanIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - download_host = self._download_json( - self._HOST_TRACKER_URL, - video_id, + 'https://www.radiojavan.com/videos/video_host', video_id, data=urlencode_postdata({'id': video_id}), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url, - } - )['host'] + }).get('host', 'https://host1.rjmusicmedia.com') + + webpage = self._download_webpage(url, video_id) - formats = [{ - 'url': '%s/%s' % (download_host, video_path), - 'format_id': '%sp' % height, - 'height': int(height), - } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + formats = [] + for format_id, _, video_path in re.findall( + r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', + webpage): + f = parse_resolution(format_id) + f.update({ + 'url': urljoin(download_host, video_path), + 'format_id': format_id, + }) + formats.append(f) self._sort_formats(formats) title = self._og_search_title(webpage) |