aboutsummaryrefslogtreecommitdiffhomepage
path: root/youtube_dl/extractor/wsj.py
diff options
context:
space:
mode:
authorFilippo Valsorda <[email protected]>2018-01-07 15:03:28 +0100
committerGitHub <[email protected]>2018-01-07 15:03:28 +0100
commit97bc05116e15df3cf1b7b7216c60fad7ee0acc9f (patch)
tree453059413184d22a44241d251a82196c5d2461eb /youtube_dl/extractor/wsj.py
parent7608a91ee7b40c74a35c30b44731606382702592 (diff)
parent0a5b1295b7c1aa6395b65ee137087c540b37b32b (diff)
downloadyoutube-dl-97bc05116e15df3cf1b7b7216c60fad7ee0acc9f.tar.gz
youtube-dl-97bc05116e15df3cf1b7b7216c60fad7ee0acc9f.zip
Merge branch 'master' into totalwebcastingtotalwebcasting
Diffstat (limited to 'youtube_dl/extractor/wsj.py')
-rw-r--r--youtube_dl/extractor/wsj.py58
1 files changed, 46 insertions, 12 deletions
diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py
index deb7483ae..67236f377 100644
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dl/extractor/wsj.py
@@ -10,12 +10,14 @@ from ..utils import (
class WSJIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://
- (?:
- video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
- (?:www\.)?wsj\.com/video/[^/]+/
- )
- (?P<id>[a-zA-Z0-9-]+)'''
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
+ https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
+ wsj:
+ )
+ (?P<id>[a-fA-F0-9-]{36})
+ '''
IE_DESC = 'Wall Street Journal'
_TESTS = [{
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
@@ -33,17 +35,28 @@ class WSJIE(InfoExtractor):
}, {
'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html',
'only_matching': True,
+ }, {
+ 'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- api_url = (
- 'http://video-api.wsj.com/api-video/find_all_videos.asp?'
- 'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
- 'thumbnailList,author,description,name,duration,videoURL,'
- 'titletag,formattedCreationDate,keywords,editor' % video_id)
- info = self._download_json(api_url, video_id)['items'][0]
+ info = self._download_json(
+ 'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
+ query={
+ 'type': 'guid',
+ 'count': 1,
+ 'query': video_id,
+ 'fields': ','.join((
+ 'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
+ 'description', 'name', 'duration', 'videoURL', 'titletag',
+ 'formattedCreationDate', 'keywords', 'editor')),
+ })['items'][0]
title = info.get('name', info.get('titletag'))
formats = []
@@ -87,3 +100,24 @@ class WSJIE(InfoExtractor):
'title': title,
'categories': info.get('keywords'),
}
+
+
+class WSJArticleIE(InfoExtractor):
+ _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
+ 'info_dict': {
+ 'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
+ 'ext': 'mp4',
+ 'upload_date': '20170221',
+ 'uploader_id': 'ralcaraz',
+ 'title': 'Bao Bao the Panda Leaves for China',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+ video_id = self._search_regex(
+ r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
+ return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)