summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSergey M․ <[email protected]>2017-04-08 19:41:14 +0700
committerSergey M․ <[email protected]>2017-04-08 19:41:14 +0700
commite18f1da97a8840522b0cefac3af0995751733d67 (patch)
treefc86468b600dc38277455212a07d6c12150e0c40
parent78280352ca8e4af4e82fc92228200aef3f06ce53 (diff)
downloadyoutube-dl-e18f1da97a8840522b0cefac3af0995751733d67.tar.gz
youtube-dl-e18f1da97a8840522b0cefac3af0995751733d67.zip
[ceskateleveize] Improve extraction and remove URL replacement hacks
-rw-r--r--youtube_dl/extractor/ceskatelevize.py61
1 files changed, 24 insertions, 37 deletions
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index dd2529a6d..0daee313f 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -18,7 +18,7 @@ from ..utils import (
class CeskaTelevizeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
@@ -62,40 +62,12 @@ class CeskaTelevizeIE(InfoExtractor):
},
'skip': 'Georestricted to Czech Republic',
}, {
- # video with 18+ caution trailer
- 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
- 'info_dict': {
- 'id': '215562210900007-bogotart',
- 'title': 'Queer: Bogotart',
- 'description': 'Alternativní průvodce současným queer světem',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '61924494876844842',
- 'ext': 'mp4',
- 'title': 'Queer: Bogotart (Varování 18+)',
- 'duration': 10.2,
- },
- }, {
- 'info_dict': {
- 'id': '61924494877068022',
- 'ext': 'mp4',
- 'title': 'Queer: Bogotart (Queer)',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 1558.3,
- },
- }],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
-
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
@@ -103,13 +75,28 @@ class CeskaTelevizeIE(InfoExtractor):
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
- typ = self._html_search_regex(
- r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
- episode_id = self._html_search_regex(
- r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
+ type_ = None
+ episode_id = None
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
+ default='{}'), playlist_id)
+ if playlist:
+ type_ = playlist.get('type')
+ episode_id = playlist.get('id')
+
+ if not type_:
+ type_ = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
+ webpage, 'type')
+ if not episode_id:
+ episode_id = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
+ webpage, 'episode_id')
data = {
- 'playlist[0][type]': typ,
+ 'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestSource': 'iVysilani',