diff options
author | dirkf <[email protected]> | 2022-02-04 03:50:32 +0000 |
---|---|---|
committer | dirkf <[email protected]> | 2022-02-04 03:50:32 +0000 |
commit | 5add3f4373287e6346ca3551239edab549284db3 (patch) | |
tree | bcd2bcab580b40150fdfde8c4d04703291a0d6e9 | |
parent | 78ce962f4fe020994c216dd2671546fbe58a5c67 (diff) | |
parent | a3373da70c97d356bd4927eff403abd261dd8f9f (diff) | |
download | youtube-dl-5add3f4373287e6346ca3551239edab549284db3.tar.gz youtube-dl-5add3f4373287e6346ca3551239edab549284db3.zip |
Merge branch 'pukkandan-yt-searchurl' into yt-dl-master
Closes #27749
-rw-r--r-- | test/test_all_urls.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 145 |
3 files changed, 74 insertions, 79 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 365b66bad..26df356b4 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -66,9 +66,9 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) - # def test_youtube_search_matching(self): - # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) - # self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) + def test_youtube_search_matching(self): + self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6e8fc3961..4e9954c6a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1606,7 +1606,7 @@ from .youtube import ( YoutubeRecommendedIE, YoutubeSearchDateIE, YoutubeSearchIE, - #YoutubeSearchURLIE, + YoutubeSearchURLIE, YoutubeSubscriptionsIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8e1254f19..3ab60960a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -342,6 +342,60 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'uploader': uploader, } + def _search_results(self, query, params): + data = { + 'context': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20201021.03.00', + } + }, + 'query': query, + } + if params: + data['params'] = params + for page_num in itertools.count(1): + search = self._download_json( + 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + video_id='query "%s"' % query, + note='Downloading page %s' % page_num, + errnote='Unable to download API page', fatal=False, + data=json.dumps(data).encode('utf8'), + headers={'content-type': 'application/json'}) + if not search: + break + slr_contents = try_get( + search, + (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], + lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), + list) + if not slr_contents: + break + for slr_content in slr_contents: + isr_contents = try_get( + slr_content, + lambda x: x['itemSectionRenderer']['contents'], + list) + if not isr_contents: + continue + for content in isr_contents: + if not isinstance(content, dict): + continue + video = content.get('videoRenderer') + if not isinstance(video, dict): + continue + video_id = video.get('videoId') + if not video_id: + continue + yield self._extract_video(video) + token = try_get( + slr_contents, + lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], + compat_str) + if not token: + break + data['continuation'] = token + class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com' @@ -2135,7 +2189,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): (?: (?:channel|c|user|feed|hashtag)/| (?:playlist|watch)\?.*?\blist=| - (?!(?:watch|embed|v|e)\b) + (?!(?:watch|embed|v|e|results)\b) ) (?P<id>[^/?\#&]+) ''' @@ -3148,93 +3202,35 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com searches' - # there doesn't appear to be a real limit, for example if you search for - # 'python' you get more than 8.000.000 results - _MAX_RESULTS = float('inf') IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' - _SEARCH_PARAMS = None + _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only + _MAX_RESULTS = float('inf') _TESTS = [] - def _entries(self, query, n): - data = { - 'context': { - 'client': { - 'clientName': 'WEB', - 'clientVersion': '2.20201021.03.00', - } - }, - 'query': query, - } - if self._SEARCH_PARAMS: - data['params'] = self._SEARCH_PARAMS - total = 0 - for page_num in itertools.count(1): - search = self._download_json( - 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', - video_id='query "%s"' % query, - note='Downloading page %s' % page_num, - errnote='Unable to download API page', fatal=False, - data=json.dumps(data).encode('utf8'), - headers={'content-type': 'application/json'}) - if not search: - break - slr_contents = try_get( - search, - (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], - lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), - list) - if not slr_contents: - break - for slr_content in slr_contents: - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - yield self._extract_video(video) - total += 1 - if total == n: - return - token = try_get( - slr_contents, - lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], - compat_str) - if not token: - break - data['continuation'] = token - def _get_n_results(self, query, n): """Get a specified number of results for a query""" - return self.playlist_result(self._entries(query, n), query) + entries = itertools.islice(self._search_results(query, self._SEARCH_PARAMS), 0, None if n == float('inf') else n) + return self.playlist_result(entries, query, query) class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube.com searches, newest videos first' - _SEARCH_PARAMS = 'CAI%3D' + _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date + _TESTS = [] -r""" -class YoutubeSearchURLIE(YoutubeSearchIE): - IE_DESC = 'YouTube.com search URLs' - IE_NAME = 'youtube:search_url' - _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' +class YoutubeSearchURLIE(YoutubeBaseInfoExtractor): + IE_DESC = 'YouTube search URLs with sorting and filter support' + IE_NAME = YoutubeSearchIE.IE_NAME + '_url' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' _TESTS = [{ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'playlist_mincount': 5, 'info_dict': { + 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', } }, { @@ -3243,11 +3239,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - query = compat_urllib_parse_unquote_plus(mobj.group('query')) - webpage = self._download_webpage(url, query) - return self.playlist_result(self._process_page(webpage), playlist_title=query) -""" + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query = (qs.get('search_query') or qs.get('q'))[0] + params = qs.get('sp', ('',))[0] + return self.playlist_result(self._search_results(query, params), query, query) class YoutubeFeedsInfoExtractor(YoutubeTabIE): |