aboutsummaryrefslogtreecommitdiffhomepage
path: root/youtube_dl/extractor/elpais.py
diff options
context:
space:
mode:
authorSergey M․ <[email protected]>2016-03-05 16:43:29 +0600
committerSergey M․ <[email protected]>2016-03-05 16:43:29 +0600
commitd1b29d13421fd91093fdf3699aea17d8ba4b636b (patch)
treeb6753fb559928b1521c8d25c5abbe0c78edfbf6a /youtube_dl/extractor/elpais.py
parent845817aadfa5f811afe5834c0b34f50f6c3a2a67 (diff)
downloadyoutube-dl-d1b29d13421fd91093fdf3699aea17d8ba4b636b.tar.gz
youtube-dl-d1b29d13421fd91093fdf3699aea17d8ba4b636b.zip
[elpais] Add support for alternative layout (Closes #8744)
Diffstat (limited to 'youtube_dl/extractor/elpais.py')
-rw-r--r--youtube_dl/extractor/elpais.py31
1 files changed, 21 insertions, 10 deletions
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py
index 00a69e631..8c725a4e6 100644
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País'
- _TEST = {
+ _TESTS = [{
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55',
'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206',
}
- }
+ }, {
+ 'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+ 'md5': '3bd5b09509f3519d7d9e763179b013de',
+ 'info_dict': {
+ 'id': '1456340311_668921',
+ 'ext': 'mp4',
+ 'title': 'Cómo hacer el mejor café con cafetera italiana',
+ 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+ 'upload_date': '20160303',
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
- r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+ r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex(
- r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+ r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
- r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
- fatal=False)
+ r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+ webpage, 'thumbnail URL', fatal=False)
thumbnail = (
None if thumbnail_suffix is None
else prefix + thumbnail_suffix)
title = self._html_search_regex(
- '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+ (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+ r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
webpage, 'title')
- date_str = self._search_regex(
+ upload_date = unified_strdate(self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
- webpage, 'upload date', fatal=False)
- upload_date = (None if date_str is None else unified_strdate(date_str))
+ webpage, 'upload date', default=None) or self._html_search_meta(
+ 'datePublished', webpage, 'timestamp'))
return {
'id': video_id,