summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRemita Amine <[email protected]>2017-04-11 21:55:59 +0100
committerRemita Amine <[email protected]>2017-04-11 21:55:59 +0100
commit89beedd31f8a00a811787b651a506001833da0e1 (patch)
treebdb50c08f02ded350a201f312192ac1872d52ba2
parente109f1ff438f82305d91bb49944a616c53abc9bd (diff)
downloadyoutube-dl-89beedd31f8a00a811787b651a506001833da0e1.tar.gz
youtube-dl-89beedd31f8a00a811787b651a506001833da0e1.zip
[youtube] improve m3u8 format extraction
-rw-r--r--youtube_dl/extractor/youtube.py47
1 files changed, 15 insertions, 32 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2506cd27c..9e2b9115c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1253,21 +1253,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2)
return video_id
- def _extract_from_m3u8(self, manifest_url, video_id):
- url_map = {}
-
- def _get_urls(_manifest):
- lines = _manifest.split('\n')
- urls = filter(lambda l: l and not l.startswith('#'),
- lines)
- return urls
- manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
- formats_urls = _get_urls(manifest)
- for format_url in formats_urls:
- itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
- url_map[itag] = format_url
- return url_map
-
def _extract_annotations(self, video_id):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
@@ -1573,19 +1558,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
- def _map_to_format_list(urlmap):
- formats = []
- for itag, video_real_url in urlmap.items():
- dct = {
- 'format_id': itag,
- 'url': video_real_url,
- 'player_url': player_url,
- }
- if itag in self._formats:
- dct.update(self._formats[itag])
- formats.append(dct)
- return formats
-
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@@ -1718,11 +1690,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
- url_map = self._extract_from_m3u8(manifest_url, video_id)
- formats = _map_to_format_list(url_map)
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- for a_format in formats:
+ formats = []
+ m3u8_formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', fatal=False)
+ for a_format in m3u8_formats:
+ itag = self._search_regex(
+ r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
+ if itag:
+ a_format['format_id'] = itag
+ if itag in self._formats:
+ dct = self._formats[itag].copy()
+ dct.update(a_format)
+ a_format = dct
+ a_format['player_url'] = player_url
+ # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
+ formats.append(a_format)
else:
unavailable_message = self._html_search_regex(
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',