diff options
author | Sergey M․ <[email protected]> | 2020-05-01 00:40:38 +0700 |
---|---|---|
committer | Sergey M․ <[email protected]> | 2020-05-01 00:40:38 +0700 |
commit | 011e75e641ad2b7fa141a05fa35d3e5d6d0b736b (patch) | |
tree | 208bb3fe1fd7c5b374492310c13ae931ac2ba131 | |
parent | 2468a6fa6416fbe617aea00d8203abc74f481d87 (diff) | |
download | youtube-dl-011e75e641ad2b7fa141a05fa35d3e5d6d0b736b.tar.gz youtube-dl-011e75e641ad2b7fa141a05fa35d3e5d6d0b736b.zip |
[youtube] Use redirected video id if any (closes #25063)
-rw-r--r-- | youtube_dl/extractor/youtube.py | 25 |
1 files changed, 24 insertions, 1 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index afaa12b1b..28886cff2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1227,6 +1227,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q', 'only_matching': True, }, + { + # invalid -> valid video id redirection + 'url': 'DJztXj2GPfl', + 'info_dict': { + 'id': 'DJztXj2GPfk', + 'ext': 'mp4', + 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)', + 'description': 'md5:bf577a41da97918e94fa9798d9228825', + 'upload_date': '20090125', + 'uploader': 'Prochorowka', + 'uploader_id': 'Prochorowka', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka', + 'artist': 'Panjabi MC', + 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix', + 'album': 'Beware of the Boys (Mundian To Bach Ke)', + }, + 'params': { + 'skip_download': True, + }, + } ] def __init__(self, *args, **kwargs): @@ -1678,7 +1698,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get video webpage url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id - video_webpage = self._download_webpage(url, video_id) + video_webpage, urlh = self._download_webpage_handle(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query) + video_id = qs.get('v', [None])[0] or video_id # Attempt to extract SWF player URL mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage) |