diff options
author | df <[email protected]> | 2021-10-18 15:54:26 +0100 |
---|---|---|
committer | dirkf <[email protected]> | 2022-02-05 03:02:56 +0000 |
commit | ddc080a562cce984ac4a86969f511b1ae59421bf (patch) | |
tree | 0ce73abc4291e835ce7854e2e2ca37d42b50e920 | |
parent | 16a3fe2ba6b4c86e60bca930253c81c8efdd676b (diff) | |
download | youtube-dl-ddc080a562cce984ac4a86969f511b1ae59421bf.tar.gz youtube-dl-ddc080a562cce984ac4a86969f511b1ae59421bf.zip |
Add ArteTVCategoryIE to support category playlists
-rw-r--r-- | youtube_dl/extractor/arte.py | 47 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 1 |
2 files changed, 48 insertions, 0 deletions
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 03abdbfaf..5bfe57b10 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -12,6 +12,7 @@ from ..utils import ( ExtractorError, int_or_none, qualities, + strip_or_none, try_get, unified_strdate, url_or_none, @@ -252,3 +253,49 @@ class ArteTVPlaylistIE(ArteTVBaseIE): title = collection.get('title') description = collection.get('shortDescription') or collection.get('teaserText') return self.playlist_result(entries, playlist_id, title, description) + + +class ArteTVCategoryIE(ArteTVBaseIE): + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _TESTS = [{ + 'url': 'https://www.arte.tv/en/videos/politics-and-society/', + 'info_dict': { + 'id': 'politics-and-society', + 'title': 'Politics and society', + 'description': 'Investigative documentary series, geopolitical analysis, and international commentary', + }, + 'playlist_mincount': 13, + }, + ] + + @classmethod + def suitable(cls, url): + return ( + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + and super(ArteTVCategoryIE, cls).suitable(url)) + + def _real_extract(self, url): + lang, playlist_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, playlist_id) + + items = [] + for video in re.finditer( + r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + webpage): + video = video.group('url') + if video == url: + continue + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + items.append(video) + + if items: + title = (self._og_search_title(webpage, default=None) + or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None)) + title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url) + + result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title) + if result: + description = self._og_search_description(webpage, default=None) + if description: + result['description'] = description + return result diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e70daf2b1..50b7cb4a0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,7 @@ from .arte import ( ArteTVIE, ArteTVEmbedIE, ArteTVPlaylistIE, + ArteTVCategoryIE, ) from .arnes import ArnesIE from .asiancrush import ( |