aboutsummaryrefslogtreecommitdiffhomepage
path: root/youtube_dl/extractor/redtube.py
diff options
context:
space:
mode:
authorSergey M․ <[email protected]>2018-02-02 22:32:53 +0700
committerSergey M․ <[email protected]>2018-02-02 22:32:53 +0700
commit1367c798e3fd9ab29ebfa2ad62b90af6b793d653 (patch)
treebcf6ee99d9bdde99c0fe042c61ff2dd169b8cbd3 /youtube_dl/extractor/redtube.py
parent9a340af37ec595783f3c87a9347b4f8ea6713344 (diff)
downloadyoutube-dl-1367c798e3fd9ab29ebfa2ad62b90af6b793d653.tar.gz
youtube-dl-1367c798e3fd9ab29ebfa2ad62b90af6b793d653.zip
[redtube] Fix metadata extraction (closes #15472)
Diffstat (limited to 'youtube_dl/extractor/redtube.py')
-rw-r--r--youtube_dl/extractor/redtube.py12
1 files changed, 7 insertions, 5 deletions
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index f70a75256..843e45d36 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -46,9 +46,10 @@ class RedTubeIE(InfoExtractor):
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
title = self._html_search_regex(
- (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>',
- r'videoTitle\s*:\s*(["\'])(?P<title>)\1'),
- webpage, 'title', group='title')
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
formats = []
sources = self._parse_json(
@@ -87,12 +88,13 @@ class RedTubeIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
- r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
+ r'<span[^>]+>ADDED ([^<]+)<',
webpage, 'upload date', fatal=False))
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
- r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
+ (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
webpage, 'view count', fatal=False))
# No self-labeling, but they describe themselves as