summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorPhilipp Hagemeister <[email protected]>2014-11-21 22:36:24 +0100
committerPhilipp Hagemeister <[email protected]>2014-11-21 22:36:24 +0100
commit6127693ed9b7157bff55dd495a1da66e0f61c4d1 (patch)
tree971fd2010703f752b86cb6a101697fd6e91ff94a
parent71069d215791a85dc627ec3dcfa0a87f3d5643eb (diff)
downloadyoutube-dl-6127693ed9b7157bff55dd495a1da66e0f61c4d1.tar.gz
youtube-dl-6127693ed9b7157bff55dd495a1da66e0f61c4d1.zip
[folketinget] Add extractor (Fixes #4262)
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/folketinget.py75
2 files changed, 76 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index f45ce05ab..70708e41b 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -115,6 +115,7 @@ from .fktv import (
FKTVPosteckeIE,
)
from .flickr import FlickrIE
+from .folketinget import FolketingetIE
from .fourtube import FourTubeIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
new file mode 100644
index 000000000..68e2db943
--- /dev/null
+++ b/youtube_dl/extractor/folketinget.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_parse_qs
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class FolketingetIE(InfoExtractor):
+ IE_DESC = 'Folketinget (ft.dk; Danish parliament)'
+ _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
+ _TEST = {
+ 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+ 'info_dict': {
+ 'id': '1165642',
+ 'ext': 'mp4',
+ 'title': 'Åbent samråd i Erhvervsudvalget',
+ 'description': 'Åbent samråd med erhvervs- og vækstministeren om regeringens politik på teleområdet',
+ 'view_count': int,
+ 'width': 768,
+ 'height': 432,
+ 'tbr': 928000,
+ 'timestamp': 1416493800,
+ 'upload_date': '20141120',
+ 'duration': 3960,
+ },
+ 'params': {
+ 'skip_download': 'rtmpdump required',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<',
+ webpage, 'description', fatal=False)
+
+ player_params = compat_parse_qs(self._search_regex(
+ r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"',
+ webpage, 'player params'))
+ xml_url = player_params['xml'][0]
+ doc = self._download_xml(xml_url, video_id)
+
+ timestamp = parse_iso8601(xpath_text(doc, './/date'))
+ duration = parse_duration(xpath_text(doc, './/duration'))
+ width = int_or_none(xpath_text(doc, './/width'))
+ height = int_or_none(xpath_text(doc, './/height'))
+ view_count = int_or_none(xpath_text(doc, './/views'))
+
+ formats = [{
+ 'format_id': n.attrib['bitrate'],
+ 'url': xpath_text(n, './url', fatal=True),
+ 'tbr': int_or_none(n.attrib['bitrate']),
+ } for n in doc.findall('.//streams/stream')]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'width': width,
+ 'height': height,
+ 'duration': duration,
+ 'view_count': view_count,
+ }