diff options
author | Sergey M․ <[email protected]> | 2019-01-27 04:14:54 +0700 |
---|---|---|
committer | Sergey M․ <[email protected]> | 2019-01-27 04:14:54 +0700 |
commit | 845333acf6280761d19f91b3e018c418d922a0de (patch) | |
tree | 9c6336a5ea6a342605cc1b3697419df67331f8bf /youtube_dl/extractor/wakanim.py | |
parent | 252abb1e8b881aa9d3942c436711ac33235b37cd (diff) | |
download | youtube-dl-845333acf6280761d19f91b3e018c418d922a0de.tar.gz youtube-dl-845333acf6280761d19f91b3e018c418d922a0de.zip |
[wakanim] Add extractor (closes #14374)
Diffstat (limited to 'youtube_dl/extractor/wakanim.py')
-rw-r--r-- | youtube_dl/extractor/wakanim.py | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py new file mode 100644 index 000000000..1d588bdd6 --- /dev/null +++ b/youtube_dl/extractor/wakanim.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + merge_dicts, + urljoin, +) + + +class WakanimIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)' + _TEST = { + 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu', + 'info_dict': { + 'id': '2997', + 'ext': 'mp4', + 'title': 'Episode 02', + 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d', + 'series': 'The Asterisk War (OmU.)', + 'season_number': 1, + 'episode': 'Episode 02', + 'episode_number': 2, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + m3u8_url = urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url', + group='url')) + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + info = self._search_json_ld(webpage, video_id, default={}) + + title = self._search_regex( + (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, group='title') + + return merge_dicts(info, { + 'id': video_id, + 'title': title, + 'formats': formats, + }) |