[youtube] Rework extractors

WIP
author: Sergey M․ <[email protected]> 2020-11-12 06:16:37 +0700
committer: Sergey M․ <[email protected]> 2020-11-12 06:16:37 +0700
commit: 2de2ca6659a18b6f5ab76565e00491153ae47276 (patch)
tree: 979c14424e00ee0ce6c2afc21eea3cd7939959b4
parent: 051071203ccedcaf6d0de0f0d83af593c78673e6 (diff)
download: youtube-dl-2de2ca6659a18b6f5ab76565e00491153ae47276.tar.gz
youtube-dl-2de2ca6659a18b6f5ab76565e00491153ae47276.zip
3 files changed, 565 insertions, 569 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 81056a999..348744028 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -34,13 +34,13 @@ class TestAllURLsMatching(unittest.TestCase):
         assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
         assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q')  # 585
         assertPlaylist('PL63F0C78739B09958')
-        assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        # assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
         assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
-        assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        # assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
         assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')  # 668
         self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
         # Top tracks
-        assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
+        # assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
 
     def test_youtube_matching(self):
         self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
@@ -51,26 +51,22 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
 
     def test_youtube_channel_matching(self):
-        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
+        assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
         assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
 
-    def test_youtube_user_matching(self):
-        self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
+    # def test_youtube_user_matching(self):
+    #     self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
 
     def test_youtube_feeds(self):
         self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
         self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
-        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
 
-    def test_youtube_show_matching(self):
-        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
-
-    def test_youtube_search_matching(self):
-        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
-        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+    # def test_youtube_search_matching(self):
+    #     self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+    #     self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
 
     def test_youtube_extract(self):
         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index ae7079a6a..9d7fecfe8 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1474,21 +1474,18 @@ from .yourporn import YourPornIE
 from .yourupload import YourUploadIE
 from .youtube import (
     YoutubeIE,
-    YoutubeChannelIE,
-    YoutubeFavouritesIE,
     YoutubeHistoryIE,
     YoutubeLiveIE,
+    YoutubeTabIE,
     YoutubePlaylistIE,
-    YoutubePlaylistsIE,
     YoutubeRecommendedIE,
     YoutubeSearchDateIE,
     YoutubeSearchIE,
-    YoutubeSearchURLIE,
-    YoutubeShowIE,
+    #YoutubeSearchURLIE,
     YoutubeSubscriptionsIE,
     YoutubeTruncatedIDIE,
     YoutubeTruncatedURLIE,
-    YoutubeUserIE,
+    YoutubeYtUserIE,
     YoutubeWatchLaterIE,
 )
 from .zapiks import ZapiksIE
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index c31731ac0..696dec2c1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -16,8 +16,6 @@ from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
 from ..compat import (
     compat_chr,
-    compat_HTTPError,
-    compat_kwargs,
     compat_parse_qs,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
@@ -30,10 +28,8 @@ from ..utils import (
     bool_or_none,
     clean_html,
     error_to_compat_str,
-    extract_attributes,
     ExtractorError,
     float_or_none,
-    get_element_by_attribute,
     get_element_by_id,
     int_or_none,
     mimetype2ext,
@@ -52,6 +48,7 @@ from ..utils import (
     uppercase_escape,
     url_or_none,
     urlencode_postdata,
+    urljoin,
 )
 
 
@@ -269,13 +266,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
         return True
 
-    def _download_webpage_handle(self, *args, **kwargs):
-        query = kwargs.get('query', {}).copy()
-        query['disable_polymer'] = 'true'
-        kwargs['query'] = query
-        return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
-            *args, **compat_kwargs(kwargs))
-
     def _real_initialize(self):
         if self._downloader is None:
             return
@@ -283,93 +273,34 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         if not self._login():
             return
 
+    _DEFAULT_API_DATA = {
+        'context': {
+            'client': {
+                'clientName': 'WEB',
+                'clientVersion': '2.20201021.03.00',
+            }
+        },
+    }
 
-class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
-    # Extract entries from page with "Load more" button
-    def _entries(self, page, playlist_id):
-        more_widget_html = content_html = page
-        for page_num in itertools.count(1):
-            for entry in self._process_page(content_html):
-                yield entry
-
-            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
-            if not mobj:
-                break
-
-            count = 0
-            retries = 3
-            while count <= retries:
-                try:
-                    # Downloading page may result in intermittent 5xx HTTP error
-                    # that is usually worked around with a retry
-                    more = self._download_json(
-                        'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
-                        'Downloading page #%s%s'
-                        % (page_num, ' (retry #%d)' % count if count else ''),
-                        transform_source=uppercase_escape,
-                        headers=self._YOUTUBE_CLIENT_HEADERS)
-                    break
-                except ExtractorError as e:
-                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
-                        count += 1
-                        if count <= retries:
-                            continue
-                    raise
-
-            content_html = more['content_html']
-            if not content_html.strip():
-                # Some webpages show a "Load more" button but they don't
-                # have more videos
-                break
-            more_widget_html = more['load_more_widget_html']
-
+    def _call_api(self, ep, query, video_id):
+        data = self._DEFAULT_API_DATA.copy()
+        data.update(query)
 
-class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
-    def _process_page(self, content):
-        for video_id, video_title in self.extract_videos_from_page(content):
-            yield self.url_result(video_id, 'Youtube', video_id, video_title)
+        response = self._download_json(
+            'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
+            note='Downloading API JSON', errnote='Unable to download API page',
+            data=json.dumps(data).encode('utf8'),
+            headers={'content-type': 'application/json'},
+            query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 
-    def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
-        for mobj in re.finditer(video_re, page):
-            # The link with index 0 is not the first video of the playlist (not sure if still actual)
-            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
-                continue
-            video_id = mobj.group('id')
-            video_title = unescapeHTML(
-                mobj.group('title')) if 'title' in mobj.groupdict() else None
-            if video_title:
-                video_title = video_title.strip()
-            if video_title == '► Play all':
-                video_title = None
-            try:
-                idx = ids_in_page.index(video_id)
-                if video_title and not titles_in_page[idx]:
-                    titles_in_page[idx] = video_title
-            except ValueError:
-                ids_in_page.append(video_id)
-                titles_in_page.append(video_title)
-
-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        titles_in_page = []
-        self.extract_videos_from_page_impl(
-            self._VIDEO_RE, page, ids_in_page, titles_in_page)
-        return zip(ids_in_page, titles_in_page)
-
-
-class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
-    def _process_page(self, content):
-        for playlist_id in orderedSet(re.findall(
-                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
-                content)):
-            yield self.url_result(
-                'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
+        return response
 
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        webpage = self._download_webpage(url, playlist_id)
-        title = self._og_search_title(webpage, fatal=False)
-        return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
+    def _extract_yt_initial_data(self, video_id, webpage):
+        return self._parse_json(
+            self._search_regex(
+                r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',
+                webpage, 'yt initial data'),
+            video_id)
 
 
 class YoutubeIE(YoutubeBaseInfoExtractor):
@@ -430,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
-                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
+                     (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
                      (?!.*?\blist=
                         (?:
                             %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
@@ -1891,6 +1822,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                 add_dash_mpd_pr(player_response)
 
+        if not video_info and not player_response:
+            player_response = extract_player_response(
+                self._search_regex(
+                    r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
+                    'initial player response', default='{}'),
+                video_id)
+
         def extract_unavailable_message():
             messages = []
             for tag, kind in (('h1', 'message'), ('div', 'submessage')):
@@ -2564,38 +2502,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         }
 
 
-class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
-    IE_DESC = 'YouTube.com playlists'
-    _VALID_URL = r"""(?x)(?:
-                        (?:https?://)?
-                        (?:\w+\.)?
-                        (?:
-                            (?:
-                                youtube(?:kids)?\.com|
-                                invidio\.us
-                            )
-                            /
-                            (?:
-                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
-                               \? (?:.*?[&;])*? (?:p|a|list)=
-                            |  p/
-                            )|
-                            youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
-                        )
-                        (
-                            (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
-                            # Top tracks, they can also include dots
-                            |(?:MC)[\w\.]*
-                        )
-                        .*
-                     |
-                        (%(playlist_id)s)
-                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
-    _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
-    _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
-    IE_NAME = 'youtube:playlist'
+class YoutubeTabIE(YoutubeBaseInfoExtractor):
+    IE_DESC = 'YouTube.com tab'
+    _VALID_URL = r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/(?:(?:channel|c|user)/|playlist\?.*?\blist=)(?P<id>[^/?#&]+)'
+    IE_NAME = 'youtube:tab'
+
     _TESTS = [{
+        # playlists, multipage
+        'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
+        'playlist_mincount': 94,
+        'info_dict': {
+            'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+            'title': 'Игорь Клейнер - Playlists',
+        },
+    }, {
+        # playlists, multipage, different order
+        'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+        'playlist_mincount': 94,
+        'info_dict': {
+            'id': 'UCqj7Cz7revf5maW9g5pgNcg',
+            'title': 'Игорь Клейнер - Playlists',
+        },
+    }, {
+        # playlists, singlepage
+        'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
+        'playlist_mincount': 4,
+        'info_dict': {
+            'id': 'ThirstForScience',
+            'title': 'ThirstForScience',
+        }
+    }, {
+        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+        'only_matching': True,
+    }, {
+        # basic, single video playlist
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
         'info_dict': {
             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
@@ -2605,6 +2545,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         },
         'playlist_count': 1,
     }, {
+        # empty playlist
         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
         'info_dict': {
             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
@@ -2614,6 +2555,63 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         },
         'playlist_count': 0,
     }, {
+        # Home tab
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'lex will - Home',
+        },
+        'playlist_mincount': 2,
+    }, {
+        # Videos tab
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'lex will - Videos',
+        },
+        'playlist_mincount': 975,
+    }, {
+        # Videos tab, sorted by popular
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'lex will - Videos',
+        },
+        'playlist_mincount': 199,
+    }, {
+        # Playlists tab
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'lex will - Playlists',
+        },
+        'playlist_mincount': 17,
+    }, {
+        # Community tab
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'lex will - Community',
+        },
+        'playlist_mincount': 18,
+    }, {
+        # Channels tab
+        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
+        'info_dict': {
+            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'lex will - Channels',
+        },
+        'playlist_mincount': 138,
+    }, {
+        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
+        'only_matching': True,
+    }, {
+        'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ',
+        'only_matching': True,
+    }, {
         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
         'info_dict': {
@@ -2624,16 +2622,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         },
         'playlist_count': 96,
     }, {
-        'note': 'issue #673',
-        'url': 'PLBB231211A4F62143',
-        'info_dict': {
-            'title': '[OLD]Team Fortress 2 (Class-based LP)',
-            'id': 'PLBB231211A4F62143',
-            'uploader': 'Wickydoo',
-            'uploader_id': 'Wickydoo',
-        },
-        'playlist_mincount': 26,
-    }, {
         'note': 'Large playlist',
         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
         'info_dict': {
@@ -2642,7 +2630,411 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'uploader': 'Cauchemar',
             'uploader_id': 'Cauchemar89',
         },
-        'playlist_mincount': 799,
+        'playlist_mincount': 1123,
+    }, {
+        # even larger playlist, 8832 videos
+        'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
+        'only_matching': True,
+    }, {
+        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
+        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
+        'info_dict': {
+            'title': 'Uploads from Interstellar Movie',
+            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+            'uploader': 'Interstellar Movie',
+            'uploader_id': 'InterstellarMovie1',
+        },
+        'playlist_mincount': 21,
+    }, {
+        # https://github.com/ytdl-org/youtube-dl/issues/21844
+        'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+        'info_dict': {
+            'title': 'Data Analysis with Dr Mike Pound',
+            'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+            'uploader_id': 'Computerphile',
+            'uploader': 'Computerphile',
+        },
+        'playlist_mincount': 11,
+    }, {
+        'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if YoutubeLiveIE.suitable(url) else super(
+            YoutubeTabIE, cls).suitable(url)
+
+    def _extract_channel_id(self, webpage):
+        channel_id = self._html_search_meta(
+            'channelId', webpage, 'channel id', default=None)
+        if channel_id:
+            return channel_id
+        channel_url = self._html_search_meta(
+            ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
+             'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
+             'twitter:app:url:googleplay'), webpage, 'channel url')
+        return self._search_regex(
+            r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
+            channel_url, 'channel id')
+
+    @staticmethod
+    def _extract_grid_item_renderer(item):
+        for item_kind in ('Playlist', 'Video', 'Channel'):
+            renderer = item.get('grid%sRenderer' % item_kind)
+            if renderer:
+                return renderer
+
+    def _extract_video(self, renderer):
+        video_id = renderer.get('videoId')
+        title = try_get(
+            renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+        description = try_get(
+            renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
+            compat_str)
+        duration = parse_duration(try_get(
+            renderer, lambda x: x['lengthText']['simpleText'], compat_str))
+        view_count_text = try_get(
+            renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+        view_count = int_or_none(self._search_regex(
+            r'^(\d+)', re.sub(r'\s', '', view_count_text),
+            'view count', default=None))
+        uploader = try_get(
+            renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+        return {
+            '_type': 'url_transparent',
+            'ie_key': YoutubeIE.ie_key(),
+            'id': video_id,
+            'url': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'view_count': view_count,
+            'uploader': uploader,
+        }
+
+    def _grid_entries(self, grid_renderer):
+        for item in grid_renderer['items']:
+            if not isinstance(item, dict):
+                continue
+            renderer = self._extract_grid_item_renderer(item)
+            if not isinstance(renderer, dict):
+                continue
+            title = try_get(
+                renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+            # playlist
+            playlist_id = renderer.get('playlistId')
+            if playlist_id:
+                yield self.url_result(
+                    'https://www.youtube.com/playlist?list=%s' % playlist_id,
+                    ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+                    video_title=title)
+            # video
+            video_id = renderer.get('videoId')
+            if video_id:
+                yield self._extract_video(renderer)
+            # channel
+            channel_id = renderer.get('channelId')
+            if channel_id:
+                title = try_get(
+                    renderer, lambda x: x['title']['simpleText'], compat_str)
+                yield self.url_result(
+                    'https://www.youtube.com/channel/%s' % channel_id,
+                    ie=YoutubeTabIE.ie_key(), video_title=title)
+
+    def _shelf_entries_trimmed(self, shelf_renderer):
+        renderer = try_get(
+            shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)
+        if not renderer:
+            return
+        # TODO: add support for nested playlists so each shelf is processed
+        # as separate playlist
+        # TODO: this includes only first N items
+        for entry in self._grid_entries(renderer):
+            yield entry
+
+    def _shelf_entries(self, shelf_renderer):
+        ep = try_get(
+            shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+            compat_str)
+        shelf_url = urljoin('https://www.youtube.com', ep)
+        if not shelf_url:
+            return
+        title = try_get(
+            shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+        yield self.url_result(shelf_url, video_title=title)
+
+    def _playlist_entries(self, video_list_renderer):
+        for content in video_list_renderer['contents']:
+            if not isinstance(content, dict):
+                continue
+            renderer = content.get('playlistVideoRenderer')
+            if not isinstance(renderer, dict):
+                continue
+            video_id = renderer.get('videoId')
+            if not video_id:
+                continue
+            yield self._extract_video(renderer)
+
+    def _video_entry(self, video_renderer):
+        video_id = video_renderer.get('videoId')
+        if video_id:
+            return self._extract_video(video_renderer)
+
+    def _post_thread_entries(self, post_thread_renderer):
+        post_renderer = try_get(
+            post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
+        if not post_renderer:
+            return
+        # video attachment
+        video_renderer = try_get(
+            post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict)
+        video_id = None
+        if video_renderer:
+            entry = self._video_entry(video_renderer)
+            if entry:
+                yield entry
+        # inline video links
+        runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
+        for run in runs:
+            if not isinstance(run, dict):
+                continue
+            ep_url = try_get(
+                run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+            if not ep_url:
+                continue
+            if not YoutubeIE.suitable(ep_url):
+                continue
+            ep_video_id = YoutubeIE._match_id(ep_url)
+            if video_id == ep_video_id:
+                continue
+            yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=video_id)
+
+    def _post_thread_continuation_entries(self, post_thread_continuation):
+        contents = post_thread_continuation.get('contents')
+        if not isinstance(contents, list):
+            return
+        for content in contents:
+            renderer = content.get('backstagePostThreadRenderer')
+            if not isinstance(renderer, dict):
+                continue
+            for entry in self._post_thread_entries(renderer):
+                yield entry
+
+    @staticmethod
+    def _extract_next_continuation_data(renderer):
+        next_continuation = try_get(
+            renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
+        if not next_continuation:
+            return
+        continuation = next_continuation.get('continuation')
+        if not continuation:
+            return
+        ctp = next_continuation.get('clickTrackingParams')
+        return {
+            'ctoken': continuation,
+            'continuation': continuation,
+            'itct': ctp,
+        }
+
+    @classmethod
+    def _extract_continuation(cls, renderer):
+        next_continuation = cls._extract_next_continuation_data(renderer)
+        if next_continuation:
+            return next_continuation
+        contents = renderer.get('contents')
+        if not isinstance(contents, list):
+            return
+        for content in contents:
+            if not isinstance(content, dict):
+                continue
+            continuation_ep = try_get(
+                content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
+                dict)
+            if not continuation_ep:
+                continue
+            continuation = try_get(
+                continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+            if not continuation:
+                continue
+            ctp = continuation_ep.get('clickTrackingParams')
+            if not ctp:
+                continue
+            return {
+                'ctoken': continuation,
+                'continuation': continuation,
+                'itct': ctp,
+            }
+
+    def _entries(self, tab):
+        continuation = None
+        slr_contents = tab['sectionListRenderer']['contents']
+        for slr_content in slr_contents:
+            if not isinstance(slr_content, dict):
+                continue
+            is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
+            if not is_renderer:
+                continue
+            isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
+            for isr_content in isr_contents:
+                if not isinstance(isr_content, dict):
+                    continue
+                renderer = isr_content.get('playlistVideoListRenderer')
+                if renderer:
+                    for entry in self._playlist_entries(renderer):
+                        yield entry
+                    continuation = self._extract_continuation(renderer)
+                    continue
+                renderer = isr_content.get('gridRenderer')
+                if renderer:
+                    for entry in self._grid_entries(renderer):
+                        yield entry
+                    continuation = self._extract_continuation(renderer)
+                    continue
+                renderer = isr_content.get('shelfRenderer')
+                if renderer:
+                    for entry in self._shelf_entries(renderer):
+                        yield entry
+                    continue
+                renderer = isr_content.get('backstagePostThreadRenderer')
+                if renderer:
+                    for entry in self._post_thread_entries(renderer):
+                        yield entry
+                    continuation = self._extract_continuation(renderer)
+                    continue
+                renderer = isr_content.get('videoRenderer')
+                if renderer:
+                    entry = self._video_entry(renderer)
+                    if entry:
+                        yield entry
+
+            if not continuation:
+                continuation = self._extract_continuation(is_renderer)
+
+        for page_num in itertools.count(1):
+            if not continuation:
+                break
+            browse = self._download_json(
+                'https://www.youtube.com/browse_ajax', None,
+                'Downloading page %d' % page_num,
+                headers={
+                    'x-youtube-client-name': '1',
+                    'x-youtube-client-version': '2.20201030.01.00',
+                }, query=continuation, fatal=False)
+            if not browse:
+                break
+            response = try_get(browse, lambda x: x[1]['response'], dict)
+            if not response:
+                break
+
+            continuation_contents = try_get(
+                response, lambda x: x['continuationContents'], dict)
+            if continuation_contents:
+                continuation_renderer = continuation_contents.get('playlistVideoListContinuation')
+                if continuation_renderer:
+                    for entry in self._playlist_entries(continuation_renderer):
+                        yield entry
+                    continuation = self._extract_continuation(continuation_renderer)
+                    continue
+                continuation_renderer = continuation_contents.get('gridContinuation')
+                if continuation_renderer:
+                    for entry in self._grid_entries(continuation_renderer):
+                        yield entry
+                    continuation = self._extract_continuation(continuation_renderer)
+                    continue
+                continuation_renderer = continuation_contents.get('itemSectionContinuation')
+                if continuation_renderer:
+                    for entry in self._post_thread_continuation_entries(continuation_renderer):
+                        yield entry
+                    continuation = self._extract_continuation(continuation_renderer)
+                    continue
+
+            continuation_items = try_get(
+                response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list)
+            if continuation_items:
+                continuation_item = continuation_items[0]
+                if not isinstance(continuation_item, dict):
+                    continue
+                renderer = continuation_item.get('playlistVideoRenderer')
+                if renderer:
+                    video_list_renderer = {'contents': continuation_items}
+                    for entry in self._playlist_entries(video_list_renderer):
+                        yield entry
+                    continuation = self._extract_continuation(video_list_renderer)
+                    continue
+
+            break
+
+    @staticmethod
+    def _extract_selected_tab(tabs):
+        for tab in tabs:
+            if try_get(tab, lambda x: x['tabRenderer']['selected'], bool):
+                return tab['tabRenderer']
+        else:
+            raise ExtractorError('Unable to find selected tab')
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        url = compat_urlparse.urlunparse(
+            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+        webpage = self._download_webpage(url, channel_id)
+        data = self._extract_yt_initial_data(channel_id, webpage)
+        tabs = data['contents']['twoColumnBrowseResultsRenderer']['tabs']
+        selected_tab = self._extract_selected_tab(tabs)
+        channel_title = try_get(
+            data, lambda x: x['metadata']['channelMetadataRenderer']['title'],
+            compat_str)
+        channel_external_id = try_get(
+            data, lambda x: x['metadata']['channelMetadataRenderer']['externalId'],
+            compat_str)
+        tab_title = selected_tab.get('title')
+        title = channel_title or channel_id
+        if tab_title:
+            title += ' - %s' % tab_title
+        return self.playlist_result(
+            self._entries(selected_tab['content']),
+            playlist_id=channel_external_id or channel_id,
+            playlist_title=title)
+
+
+class YoutubePlaylistIE(InfoExtractor):
+    IE_DESC = 'YouTube.com playlists'
+    _VALID_URL = r"""(?x)(?:
+                        (?:https?://)?
+                        (?:\w+\.)?
+                        (?:
+                            (?:
+                                youtube(?:kids)?\.com|
+                                invidio\.us
+                            )
+                            /
+                            (?:
+                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
+                               \? (?:.*?[&;])*? (?:p|a|list)=
+                            |  p/
+                            )|
+                            youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
+                        )
+                        (
+                            (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
+                            # Top tracks, they can also include dots
+                            |(?:MC)[\w\.]*
+                        )
+                        .*
+                     |
+                        (%(playlist_id)s)
+                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+    IE_NAME = 'youtube:playlist'
+    _TESTS = [{
+        'note': 'issue #673',
+        'url': 'PLBB231211A4F62143',
+        'info_dict': {
+            'title': '[OLD]Team Fortress 2 (Class-based LP)',
+            'id': 'PLBB231211A4F62143',
+            'uploader': 'Wickydoo',
+            'uploader_id': 'Wickydoo',
+        },
+        'playlist_mincount': 29,
     }, {
         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
         'info_dict': {
@@ -2663,7 +3055,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         }
     }, {
         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
-        'playlist_mincount': 485,
+        'playlist_mincount': 982,
         'info_dict': {
             'title': '2018 Chinese New Singles (11/6 updated)',
             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
@@ -2680,16 +3072,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         },
         'skip': 'This playlist does not exist',
     }, {
-        'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
-        'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
-        'info_dict': {
-            'title': 'Uploads from Interstellar Movie',
-            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
-            'uploader': 'Interstellar Movie',
-            'uploader_id': 'InterstellarMovie1',
-        },
-        'playlist_mincount': 21,
-    }, {
         # Playlist URL that does not actually serve a playlist
         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
         'info_dict': {
@@ -2734,16 +3116,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
             'skip_download': True,
         },
     }, {
-        # https://github.com/ytdl-org/youtube-dl/issues/21844
-        'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
-        'info_dict': {
-            'title': 'Data Analysis with Dr Mike Pound',
-            'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
-            'uploader_id': 'Computerphile',
-            'uploader': 'Computerphile',
-        },
-        'playlist_mincount': 11,
-    }, {
         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
         'only_matching': True,
     }, {
@@ -2754,152 +3126,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
         'only_matching': True,
     }, {
-        'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
-        'only_matching': True,
-    }, {
         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
         'only_matching': True,
     }]
 
-    def _real_initialize(self):
-        self._login()
-
-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        titles_in_page = []
-
-        for item in re.findall(
-                r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
-            attrs = extract_attributes(item)
-            video_id = attrs['data-video-id']
-            video_title = unescapeHTML(attrs.get('data-title'))
-            if video_title:
-                video_title = video_title.strip()
-            ids_in_page.append(video_id)
-            titles_in_page.append(video_title)
-
-        # Fallback with old _VIDEO_RE
-        self.extract_videos_from_page_impl(
-            self._VIDEO_RE, page, ids_in_page, titles_in_page)
-
-        # Relaxed fallbacks
-        self.extract_videos_from_page_impl(
-            r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
-            ids_in_page, titles_in_page)
-        self.extract_videos_from_page_impl(
-            r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
-            ids_in_page, titles_in_page)
-
-        return zip(ids_in_page, titles_in_page)
-
-    def _extract_mix(self, playlist_id):
-        # The mixes are generated from a single video
-        # the id of the playlist is just 'RD' + video_id
-        ids = []
-        last_id = playlist_id[-11:]
-        for n in itertools.count(1):
-            url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
-            webpage = self._download_webpage(
-                url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
-            new_ids = orderedSet(re.findall(
-                r'''(?xs)data-video-username=".*?".*?
-                           href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
-                webpage))
-            # Fetch new pages until all the videos are repeated, it seems that
-            # there are always 51 unique videos.
-            new_ids = [_id for _id in new_ids if _id not in ids]
-            if not new_ids:
-                break
-            ids.extend(new_ids)
-            last_id = ids[-1]
-
-        url_results = self._ids_to_results(ids)
-
-        search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-        title_span = (
-            search_title('playlist-title')
-            or search_title('title long-title')
-            or search_title('title'))
-        title = clean_html(title_span)
-
-        return self.playlist_result(url_results, playlist_id, title)
-
-    def _extract_playlist(self, playlist_id):
-        url = self._TEMPLATE_URL % playlist_id
-        page = self._download_webpage(url, playlist_id)
-
-        # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
-        for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
-            match = match.strip()
-            # Check if the playlist exists or is private
-            mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
-            if mobj:
-                reason = mobj.group('reason')
-                message = 'This playlist %s' % reason
-                if 'private' in reason:
-                    message += ', use --username or --netrc to access it'
-                message += '.'
-                raise ExtractorError(message, expected=True)
-            elif re.match(r'[^<]*Invalid parameters[^<]*', match):
-                raise ExtractorError(
-                    'Invalid parameters. Maybe URL is incorrect.',
-                    expected=True)
-            elif re.match(r'[^<]*Choose your language[^<]*', match):
-                continue
-            else:
-                self.report_warning('Youtube gives an alert message: ' + match)
-
-        playlist_title = self._html_search_regex(
-            r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
-            page, 'title', default=None)
-
-        _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
-        uploader = self._html_search_regex(
-            r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
-            page, 'uploader', default=None)
-        mobj = re.search(
-            r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
-            page)
-        if mobj:
-            uploader_id = mobj.group('uploader_id')
-            uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
-        else:
-            uploader_id = uploader_url = None
-
-        has_videos = True
-
-        if not playlist_title:
-            try:
-                # Some playlist URLs don't actually serve a playlist (e.g.
-                # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
-                next(self._entries(page, playlist_id))
-            except StopIteration:
-                has_videos = False
-
-        playlist = self.playlist_result(
-            self._entries(page, playlist_id), playlist_id, playlist_title)
-        playlist.update({
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'uploader_url': uploader_url,
-        })
-
-        return has_videos, playlist
-
-    def _check_download_just_video(self, url, playlist_id):
-        # Check if it's a video-specific URL
-        query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        video_id = query_dict.get('v', [None])[0] or self._search_regex(
-            r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
-            'video id', default=None)
-        if video_id:
-            if self._downloader.params.get('noplaylist'):
-                self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-                return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
-            else:
-                self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-                return video_id, None
-        return None, None
+    @classmethod
+    def suitable(cls, url):
+        return False if YoutubeTabIE.suitable(url) else super(
+            YoutubePlaylistIE, cls).suitable(url)
 
     def _real_extract(self, url):
         # Extract playlist id
@@ -2907,184 +3141,23 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         if mobj is None:
             raise ExtractorError('Invalid URL: %s' % url)
         playlist_id = mobj.group(1) or mobj.group(2)
+        return self.url_result(
+            'https://www.youtube.com/playlist?list=%s' % playlist_id,
+            ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
 
-        video_id, video = self._check_download_just_video(url, playlist_id)
-        if video:
-            return video
-
-        if playlist_id.startswith(('RD', 'UL', 'PU')):
-            # Mixes require a custom extraction process
-            return self._extract_mix(playlist_id)
-
-        has_videos, playlist = self._extract_playlist(playlist_id)
-        if has_videos or not video_id:
-            return playlist
-
-        # Some playlist URLs don't actually serve a playlist (see
-        # https://github.com/ytdl-org/youtube-dl/issues/10537).
-        # Fallback to plain video extraction if there is a video id
-        # along with playlist id.
-        return self.url_result(video_id, 'Youtube', video_id=video_id)
-
-
-class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
-    IE_DESC = 'YouTube.com channels'
-    _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
-    _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
-    _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
-    IE_NAME = 'youtube:channel'
-    _TESTS = [{
-        'note': 'paginated channel',
-        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
-        'playlist_mincount': 91,
-        'info_dict': {
-            'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
-            'title': 'Uploads from lex will',
-            'uploader': 'lex will',
-            'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
-        }
-    }, {
-        'note': 'Age restricted channel',
-        # from https://www.youtube.com/user/DeusExOfficial
-        'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
-        'playlist_mincount': 64,
-        'info_dict': {
-            'id': 'UUs0ifCMCm1icqRbqhUINa0w',
-            'title': 'Uploads from Deus Ex',
-            'uploader': 'Deus Ex',
-            'uploader_id': 'DeusExOfficial',
-        },
-    }, {
-        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
-        'only_matching': True,
-    }]
-
-    @classmethod
-    def suitable(cls, url):
-        return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
-                else super(YoutubeChannelIE, cls).suitable(url))
-
-    def _build_template_url(self, url, channel_id):
-        return self._TEMPLATE_URL % channel_id
-
-    def _real_extract(self, url):
-        channel_id = self._match_id(url)
-
-        url = self._build_template_url(url, channel_id)
-
-        # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
-        # Workaround by extracting as a playlist if managed to obtain channel playlist URL
-        # otherwise fallback on channel by page extraction
-        channel_page = self._download_webpage(
-            url + '?view=57', channel_id,
-            'Downloading channel page', fatal=False)
-        if channel_page is False:
-            channel_playlist_id = False
-        else:
-            channel_playlist_id = self._html_search_meta(
-                'channelId', channel_page, 'channel id', default=None)
-            if not channel_playlist_id:
-                channel_url = self._html_search_meta(
-                    ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
-                    channel_page, 'channel url', default=None)
-                if channel_url:
-                    channel_playlist_id = self._search_regex(
-                        r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
-                        channel_url, 'channel id', default=None)
-        if channel_playlist_id and channel_playlist_id.startswith('UC'):
-            playlist_id = 'UU' + channel_playlist_id[2:]
-            return self.url_result(
-                compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
-
-        channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
-        autogenerated = re.search(r'''(?x)
-                class="[^"]*?(?:
-                    channel-header-autogenerated-label|
-                    yt-channel-title-autogenerated
-                )[^"]*"''', channel_page) is not None
-
-        if autogenerated:
-            # The videos are contained in a single page
-            # the ajax pages can't be used, they are empty
-            entries = [
-                self.url_result(
-                    video_id, 'Youtube', video_id=video_id,
-                    video_title=video_title)
-                for video_id, video_title in self.extract_videos_from_page(channel_page)]
-            return self.playlist_result(entries, channel_id)
-
-        try:
-            next(self._entries(channel_page, channel_id))
-        except StopIteration:
-            alert_message = self._html_search_regex(
-                r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
-                channel_page, 'alert', default=None, group='alert')
-            if alert_message:
-                raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
-
-        return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
-
-
-class YoutubeUserIE(YoutubeChannelIE):
-    IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
-    _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
-    IE_NAME = 'youtube:user'
 
+class YoutubeYtUserIE(InfoExtractor):
+    _VALID_URL = r'ytuser:(?P<id>.+)'
     _TESTS = [{
-        'url': 'https://www.youtube.com/user/TheLinuxFoundation',
-        'playlist_mincount': 320,
-        'info_dict': {
-            'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
-            'title': 'Uploads from The Linux Foundation',
-            'uploader': 'The Linux Foundation',
-            'uploader_id': 'TheLinuxFoundation',
-        }
-    }, {
-        # Only available via https://www.youtube.com/c/12minuteathlete/videos
-        # but not https://www.youtube.com/user/12minuteathlete/videos
-        'url': 'https://www.youtube.com/c/12minuteathlete/videos',
-        'playlist_mincount': 249,
-        'info_dict': {
-            'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
-            'title': 'Uploads from 12 Minute Athlete',
-            'uploader': '12 Minute Athlete',
-            'uploader_id': 'the12minuteathlete',
-        }
-    }, {
         'url': 'ytuser:phihag',
         'only_matching': True,
-    }, {
-        'url': 'https://www.youtube.com/c/gametrailers',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.youtube.com/gametrailers',
-        'only_matching': True,
-    }, {
-        # This channel is not available, geo restricted to JP
-        'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
-        'only_matching': True,
     }]
 
-    @classmethod
-    def suitable(cls, url):
-        # Don't return True if the url can be extracted with other youtube
-        # extractor, the regex would is too permissive and it would match.
-        other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
-        if any(ie.suitable(url) for ie in other_yt_ies):
-            return False
-        else:
-            return super(YoutubeUserIE, cls).suitable(url)
-
-    def _build_template_url(self, url, channel_id):
-        mobj = re.match(self._VALID_URL, url)
-        return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+        return self.url_result(
+            'https://www.youtube.com/user/%s' % user_id,
+            ie=YoutubeTabIE.ie_key(), video_id=user_id)
 
 
 class YoutubeLiveIE(YoutubeBaseInfoExtractor):
@@ -3139,45 +3212,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
         return self.url_result(base_url)
 
 
-class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
-    IE_DESC = 'YouTube.com user/channel playlists'
-    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
-    IE_NAME = 'youtube:playlists'
-
-    _TESTS = [{
-        'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
-        'playlist_mincount': 4,
-        'info_dict': {
-            'id': 'ThirstForScience',
-            'title': 'ThirstForScience',
-        },
-    }, {
-        # with "Load more" button
-        'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
-        'playlist_mincount': 70,
-        'info_dict': {
-            'id': 'igorkle1',
-            'title': 'Игорь Клейнер',
-        },
-    }, {
-        'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
-        'playlist_mincount': 17,
-        'info_dict': {
-            'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
-            'title': 'Chem Player',
-        },
-        'skip': 'Blocked',
-    }, {
-        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
-        'only_matching': True,
-    }]
-
-
-class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
-
-
-class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com searches'
     # there doesn't appear to be a real limit, for example if you search for
     # 'python' you get more than 8.000.000 results
@@ -3274,7 +3309,8 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
     _SEARCH_PARAMS = 'CAI%3D'
 
 
-class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
+r"""
+class YoutubeSearchURLIE(YoutubeSearchIE):
     IE_DESC = 'YouTube.com search URLs'
     IE_NAME = 'youtube:search_url'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
@@ -3294,25 +3330,7 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
         query = compat_urllib_parse_unquote_plus(mobj.group('query'))
         webpage = self._download_webpage(url, query)
         return self.playlist_result(self._process_page(webpage), playlist_title=query)
-
-
-class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
-    IE_DESC = 'YouTube.com (multi-season) shows'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
-    IE_NAME = 'youtube:show'
-    _TESTS = [{
-        'url': 'https://www.youtube.com/show/airdisasters',
-        'playlist_mincount': 5,
-        'info_dict': {
-            'id': 'airdisasters',
-            'title': 'Air Disasters',
-        }
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
-        return super(YoutubeShowIE, self)._real_extract(
-            'https://www.youtube.com/show/%s/playlists' % playlist_id)
+"""
 
 
 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
@@ -3369,37 +3387,22 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             self._entries(page), playlist_title=self._PLAYLIST_TITLE)
 
 
-class YoutubeWatchLaterIE(YoutubePlaylistIE):
+class YoutubeWatchLaterIE(InfoExtractor):
     IE_NAME = 'youtube:watchlater'
     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
 
     _TESTS = [{
-        'url': 'https://www.youtube.com/playlist?list=WL',
+        'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
         'only_matching': True,
     }, {
-        'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
+        'url': 'https://www.youtube.com/feed/watch_later',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        _, video = self._check_download_just_video(url, 'WL')
-        if video:
-            return video
-        _, playlist = self._extract_playlist('WL')
-        return playlist
-
-
-class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
-    IE_NAME = 'youtube:favorites'
-    IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
-    _LOGIN_REQUIRED = True
-
-    def _real_extract(self, url):
-        webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
-        playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
-        return self.url_result(playlist_id, 'YoutubePlaylist')
+        return self.url_result(
+            'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
 
 
 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
author	Sergey M․ <[email protected]>	2020-11-12 06:16:37 +0700
committer	Sergey M․ <[email protected]>	2020-11-12 06:16:37 +0700
commit	2de2ca6659a18b6f5ab76565e00491153ae47276 (patch)
tree	979c14424e00ee0ce6c2afc21eea3cd7939959b4
parent	051071203ccedcaf6d0de0f0d83af593c78673e6 (diff)
download	youtube-dl-2de2ca6659a18b6f5ab76565e00491153ae47276.tar.gz youtube-dl-2de2ca6659a18b6f5ab76565e00491153ae47276.zip