From c4251b9aaa9a69e7f7b55197b3907e52b17150d4 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Fri, 13 Jan 2017 10:08:51 +0100
Subject: [common] add possibility to customize akamai manifest host

---
 youtube_dl/extractor/common.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6fa7c334e..dce8c7d0d 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1967,10 +1967,13 @@ class InfoExtractor(object):
                 entries.append(media_info)
         return entries
 
-    def _extract_akamai_formats(self, manifest_url, video_id):
+    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
         formats = []
         hdcore_sign = 'hdcore=3.7.0'
-        f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        hds_host = hosts.get('hds')
+        if hds_host:
+            f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
         if 'hdcore=' not in f4m_url:
             f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
         f4m_formats = self._extract_f4m_formats(
@@ -1978,7 +1981,10 @@ class InfoExtractor(object):
         for entry in f4m_formats:
             entry.update({'extra_param_to_segment_url': hdcore_sign})
         formats.extend(f4m_formats)
-        m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        hls_host = hosts.get('hls')
+        if hls_host:
+            m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
         formats.extend(self._extract_m3u8_formats(
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False))
-- 
cgit v1.2.3


From d04621daf451d601dba80dc0f2baa29e404e4ca6 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 29 Jan 2017 05:36:53 +0700
Subject: [extractor/common] Fix duration per dash segment (closes #11868)

---
 youtube_dl/extractor/common.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index dce8c7d0d..a3048fb59 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1751,14 +1751,16 @@ class InfoExtractor(object):
                             # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
                             # or any YouTube dashsegments video
                             fragments = []
-                            s_num = 0
-                            for segment_url in representation_ms_info['segment_urls']:
-                                s = representation_ms_info['s'][s_num]
+                            segment_index = 0
+                            timescale = representation_ms_info['timescale']
+                            for s in representation_ms_info['s']:
+                                duration = float_or_none(s['d'], timescale)
                                 for r in range(s.get('r', 0) + 1):
                                     fragments.append({
-                                        'url': segment_url,
-                                        'duration': float_or_none(s['d'], representation_ms_info['timescale']),
+                                        'url': representation_ms_info['segment_urls'][segment_index],
+                                        'duration': duration,
                                     })
+                                    segment_index += 1
                             representation_ms_info['fragments'] = fragments
                         # NB: MPD manifest may contain direct URLs to unfragmented media.
                         # No fragments key is present in this case.
-- 
cgit v1.2.3


From c58c2d63cbde07af66885829b7c3dbcdfbc096dd Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 29 Jan 2017 05:56:43 +0700
Subject: [extractor/common] Document forgotten fragment base and path
 interfaces

---
 youtube_dl/extractor/common.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a3048fb59..fb484b6f2 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -121,9 +121,19 @@ class InfoExtractor(object):
                                  download, lower-case.
                                  "http", "https", "rtsp", "rtmp", "rtmpe",
                                  "m3u8", "m3u8_native" or "http_dash_segments".
-                    * fragments  A list of fragments of the fragmented media,
-                                 with the following entries:
-                                 * "url" (mandatory) - fragment's URL
+                    * fragment_base_url
+                                 Base URL for fragments. Each fragment's path
+                                 value (if present) will be relative to
+                                 this URL.
+                    * fragments  A list of fragments of a fragmented media.
+                                 Each fragment entry must contain either an url
+                                 or a path. If an url is present it should be
+                                 considered by a client. Otherwise both path and
+                                 fragment_base_url must be present. Here is
+                                 the list of all potential fields:
+                                 * "url" - fragment's URL
+                                 * "path" - fragment's path relative to
+                                            fragment_base_url
                                  * "duration" (optional, int or float)
                                  * "filesize" (optional, int)
                     * preference Order number of this format. If this field is
-- 
cgit v1.2.3


From e228616c6e73561f0c6d32d6b681bbba321c06aa Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 29 Jan 2017 06:57:39 +0700
Subject: [extractor/common] Fix initialization template (closes #11605, closes
 #11825)

---
 youtube_dl/extractor/common.py | 48 ++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 14 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index fb484b6f2..5a15a9536 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1637,12 +1637,12 @@ class InfoExtractor(object):
                 segment_template = element.find(_add_ns('SegmentTemplate'))
                 if segment_template is not None:
                     extract_common(segment_template)
-                    media_template = segment_template.get('media')
-                    if media_template:
-                        ms_info['media_template'] = media_template
+                    media = segment_template.get('media')
+                    if media:
+                        ms_info['media'] = media
                     initialization = segment_template.get('initialization')
                     if initialization:
-                        ms_info['initialization_url'] = initialization
+                        ms_info['initialization'] = initialization
                     else:
                         extract_Initialization(segment_template)
             return ms_info
@@ -1686,6 +1686,7 @@ class InfoExtractor(object):
                         lang = representation_attrib.get('lang')
                         url_el = representation.find(_add_ns('BaseURL'))
                         filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
                         f = {
                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                             'url': base_url,
@@ -1693,7 +1694,7 @@ class InfoExtractor(object):
                             'ext': mimetype2ext(mime_type),
                             'width': int_or_none(representation_attrib.get('width')),
                             'height': int_or_none(representation_attrib.get('height')),
-                            'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
+                            'tbr': int_or_none(bandwidth, 1000),
                             'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                             'fps': int_or_none(representation_attrib.get('frameRate')),
                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
@@ -1702,13 +1703,32 @@ class InfoExtractor(object):
                         }
                         f.update(parse_codecs(representation_attrib.get('codecs')))
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-                        if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
 
-                            media_template = representation_ms_info['media_template']
-                            media_template = media_template.replace('$RepresentationID$', representation_id)
-                            media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
-                            media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
-                            media_template.replace('$$', '$')
+                        def prepare_template(template_name, identifiers):
+                            t = representation_ms_info[template_name]
+                            t = t.replace('$RepresentationID$', representation_id)
+                            t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+                            t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+                            t.replace('$$', '$')
+                            return t
+
+                        # @initialization is a regular template like @media one
+                        # so it should be handled just the same way (see
+                        # https://github.com/rg3/youtube-dl/issues/11605)
+                        if 'initialization' in representation_ms_info:
+                            initialization_template = prepare_template(
+                                'initialization',
+                                # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+                                # $Time$ shall not be included for @initialization thus
+                                # only $Bandwidth$ remains
+                                ('Bandwidth', ))
+                            representation_ms_info['initialization_url'] = initialization_template % {
+                                'Bandwidth': bandwidth,
+                            }
+
+                        if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+                            media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
 
                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                             # can't be used at the same time
@@ -1720,7 +1740,7 @@ class InfoExtractor(object):
                                 representation_ms_info['fragments'] = [{
                                     'url': media_template % {
                                         'Number': segment_number,
-                                        'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+                                        'Bandwidth': bandwidth,
                                     },
                                     'duration': segment_duration,
                                 } for segment_number in range(
@@ -1738,7 +1758,7 @@ class InfoExtractor(object):
                                 def add_segment_url():
                                     segment_url = media_template % {
                                         'Time': segment_time,
-                                        'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+                                        'Bandwidth': bandwidth,
                                         'Number': segment_number,
                                     }
                                     representation_ms_info['fragments'].append({
@@ -1780,7 +1800,7 @@ class InfoExtractor(object):
                                 'protocol': 'http_dash_segments',
                             })
                             if 'initialization_url' in representation_ms_info:
-                                initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
+                                initialization_url = representation_ms_info['initialization_url']
                                 if not f.get('url'):
                                     f['url'] = initialization_url
                                 f['fragments'].append({'url': initialization_url})
-- 
cgit v1.2.3


From 45024183aea169dc898902388f782485de02cbac Mon Sep 17 00:00:00 2001
From: Mattias Wadman <mattias.wadman@gmail.com>
Date: Fri, 3 Feb 2017 05:10:13 +0100
Subject: [infoq] Add audio only format if available (#11565)

* [infoq] Add audio only format if available

Refactor cookie code into a function.
Renamed formats to http_video, http_audio, rtmp_video
Renamed extract functions to video instead of videos as they return
one or no video.

* [infoq] Rename to _extract_cookies as it more than one

* [infoq] Remove redundant determine_ext

* [infoq] Add comment about hardcoded URL

* [infoq] Use _hidden_inputs instead of messy regex

* [infoq] Probe if audio URL is valid

Make it possible to pass headers to _is_valid_url

* [infoq] Add audio only test
---
 youtube_dl/extractor/common.py |  4 +--
 youtube_dl/extractor/infoq.py  | 63 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 55 insertions(+), 12 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 5a15a9536..2c8ec1417 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1025,13 +1025,13 @@ class InfoExtractor(object):
                 unique_formats.append(f)
         formats[:] = unique_formats
 
-    def _is_valid_url(self, url, video_id, item='video'):
+    def _is_valid_url(self, url, video_id, item='video', headers={}):
         url = self._proto_relative_url(url, scheme='http:')
         # For now assume non HTTP(S) URLs always valid
         if not (url.startswith('http://') or url.startswith('https://')):
             return True
         try:
-            self._request_webpage(url, video_id, 'Checking %s URL' % item)
+            self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
             return True
         except ExtractorError as e:
             if isinstance(e.cause, compat_urllib_error.URLError):
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index cca0b8a93..9fb71e8ef 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
 
 import base64
 
-from ..compat import compat_urllib_parse_unquote
+from ..compat import (
+    compat_urllib_parse_unquote,
+    compat_urlparse,
+)
 from ..utils import determine_ext
 from .bokecc import BokeCCBaseIE
 
@@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE):
             'ext': 'flv',
             'description': 'md5:308d981fb28fa42f49f9568322c683ff',
         },
+    }, {
+        'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
+        'md5': '0e34642d4d9ef44bf86f66f6399672db',
+        'info_dict': {
+            'id': 'Simple-Made-Easy',
+            'title': 'Simple Made Easy',
+            'ext': 'mp3',
+            'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
+        },
+        'params': {
+            'format': 'bestaudio',
+        },
     }]
 
-    def _extract_rtmp_videos(self, webpage):
+    def _extract_rtmp_video(self, webpage):
         # The server URL is hardcoded
         video_url = 'rtmpe://video.infoq.com/cfx/st/'
 
@@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE):
         playpath = 'mp4:' + real_id
 
         return [{
-            'format_id': 'rtmp',
+            'format_id': 'rtmp_video',
             'url': video_url,
             'ext': determine_ext(playpath),
             'play_path': playpath,
         }]
 
-    def _extract_http_videos(self, webpage):
-        http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
-
+    def _extract_cookies(self, webpage):
         policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
         signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
         key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
+        return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
+            policy, signature, key_pair_id)
 
+    def _extract_http_video(self, webpage):
+        http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
         return [{
-            'format_id': 'http',
+            'format_id': 'http_video',
             'url': http_video_url,
             'http_headers': {
-                'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
-                    policy, signature, key_pair_id),
+                'Cookie': self._extract_cookies(webpage)
             },
         }]
 
+    def _extract_http_audio(self, webpage, video_id):
+        fields = self._hidden_inputs(webpage)
+        http_audio_url = fields['filename']
+        if http_audio_url is None:
+            return []
+
+        cookies_header = {'Cookie': self._extract_cookies(webpage)}
+
+        # base URL is found in the Location header in the response returned by
+        # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
+        http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
+
+        # audio file seem to be missing some times even if there is a download link
+        # so probe URL to make sure
+        if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
+            return []
+
+        return [{
+            'format_id': 'http_audio',
+            'url': http_audio_url,
+            'vcodec': 'none',
+            'http_headers': cookies_header,
+        }]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
@@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE):
             # for China videos, HTTP video URL exists but always fails with 403
             formats = self._extract_bokecc_formats(webpage, video_id)
         else:
-            formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
+            formats = (
+                self._extract_rtmp_video(webpage) +
+                self._extract_http_video(webpage) +
+                self._extract_http_audio(webpage, video_id))
 
         self._sort_formats(formats)
 
-- 
cgit v1.2.3


From 2aec7256ae3ef877cc57f7d27f0fa171a7f25a98 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Mon, 6 Feb 2017 00:20:30 +0700
Subject: [extractor/common] Speed-up media tags regex (closes #11979)

---
 youtube_dl/extractor/common.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2c8ec1417..519188622 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1959,7 +1959,12 @@ class InfoExtractor(object):
         media_tags = [(media_tag, media_type, '')
                       for media_tag, media_type
                       in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
-        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        media_tags.extend(re.findall(
+            # We only allow video|audio followed by a whitespace or '>'.
+            # Allowing more characters may end up in significant slow down (see
+            # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+            # http://www.porntrex.com/maps/videositemap.xml).
+            r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
         for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
-- 
cgit v1.2.3


From 242a14a1f6ea14b8e7de9aa1eddaf938de825e1e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 7 Feb 2017 00:22:16 +0700
Subject: [extractor/common] Fix audio only with audio group in m3u8 (closes
 #11995)

---
 youtube_dl/extractor/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 519188622..0b4e2ac20 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1315,8 +1315,8 @@ class InfoExtractor(object):
                         'abr': abr,
                     })
                 f.update(parse_codecs(last_info.get('CODECS')))
-                if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
-                    # TODO: update acodec for for audio only formats with the same GROUP-ID
+                if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
+                    # TODO: update acodec for audio only formats with the same GROUP-ID
                     f['acodec'] = 'none'
                 formats.append(f)
                 last_info = {}
-- 
cgit v1.2.3


From 08a00eef79c8f8b12211513b241404394ef6120c Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Fri, 10 Feb 2017 16:51:41 +0100
Subject: [extractor/common] skip m3u8 manifests protected with Adobe Flash
 Access

---
 youtube_dl/extractor/common.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0b4e2ac20..9681453ca 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1208,6 +1208,9 @@ class InfoExtractor(object):
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
+        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
+            return []
+
         formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
 
         format_url = lambda u: (
-- 
cgit v1.2.3


From a4a554a79354981fcab55de8eaab7b95a40bbb48 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Thu, 16 Feb 2017 23:42:36 +0800
Subject: [generic] Try parsing JWPlayer embedded videos (closes #12030)

---
 ChangeLog                                |   6 ++
 youtube_dl/extractor/archiveorg.py       |   4 +-
 youtube_dl/extractor/common.py           | 118 +++++++++++++++++++++++++++
 youtube_dl/extractor/generic.py          |  20 +++++
 youtube_dl/extractor/jwplatform.py       | 132 +------------------------------
 youtube_dl/extractor/ondemandkorea.py    |   4 +-
 youtube_dl/extractor/pornhub.py          |  44 -----------
 youtube_dl/extractor/pornoxo.py          |   4 +-
 youtube_dl/extractor/rentv.py            |   3 +-
 youtube_dl/extractor/rudo.py             |   4 +-
 youtube_dl/extractor/screencastomatic.py |   4 +-
 youtube_dl/extractor/sendtonews.py       |   4 +-
 youtube_dl/extractor/thisav.py           |   4 +-
 youtube_dl/extractor/tvnoe.py            |   4 +-
 youtube_dl/extractor/vidzi.py            |   4 +-
 youtube_dl/extractor/wimp.py             |   4 +-
 16 files changed, 166 insertions(+), 197 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/ChangeLog b/ChangeLog
index 8ef8a8307..4e69b03d0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Extractors
++ [generic] Support complex JWPlayer embedded videos (#12030)
+
+
 version 2017.02.16
 
 Core
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 486dff82d..e21045bed 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -1,13 +1,13 @@
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     unified_strdate,
     clean_html,
 )
 
 
-class ArchiveOrgIE(JWPlatformBaseIE):
+class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9681453ca..f6ff56eda 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -40,6 +40,7 @@ from ..utils import (
     fix_xml_ampersands,
     float_or_none,
     int_or_none,
+    js_to_json,
     parse_iso8601,
     RegexNotFoundError,
     sanitize_filename,
@@ -2073,6 +2074,123 @@ class InfoExtractor(object):
                     })
         return formats
 
+    @staticmethod
+    def _find_jwplayer_data(webpage):
+        mobj = re.search(
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+            webpage)
+        if mobj:
+            return mobj.group('options')
+
+    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+        jwplayer_data = self._parse_json(
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
+        return self._parse_jwplayer_data(
+            jwplayer_data, video_id, *args, **kwargs)
+
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        # JWPlayer backward compatibility: flattened playlists
+        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+        if 'playlist' not in jwplayer_data:
+            jwplayer_data = {'playlist': [jwplayer_data]}
+
+        entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+        for video_data in jwplayer_data['playlist']:
+            # JWPlayer backward compatibility: flattened sources
+            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+            if 'sources' not in video_data:
+                video_data['sources'] = [video_data]
+
+            this_video_id = video_id or video_data['mediaid']
+
+            formats = []
+            for source in video_data['sources']:
+                source_url = self._proto_relative_url(source['file'])
+                if base_url:
+                    source_url = compat_urlparse.urljoin(base_url, source_url)
+                source_type = source.get('type') or ''
+                ext = mimetype2ext(source_type) or determine_ext(source_url)
+                if source_type == 'hls' or ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
+                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                    formats.append({
+                        'url': source_url,
+                        'vcodec': 'none',
+                        'ext': ext,
+                    })
+                else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
+                    a_format = {
+                        'url': source_url,
+                        'width': int_or_none(source.get('width')),
+                        'height': height,
+                        'ext': ext,
+                    }
+                    if source_url.startswith('rtmp'):
+                        a_format['ext'] = 'flv'
+
+                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                        # of jwplayer.flash.swf
+                        rtmp_url_parts = re.split(
+                            r'((?:mp4|mp3|flv):)', source_url, 1)
+                        if len(rtmp_url_parts) == 3:
+                            rtmp_url, prefix, play_path = rtmp_url_parts
+                            a_format.update({
+                                'url': rtmp_url,
+                                'play_path': prefix + play_path,
+                            })
+                        if rtmp_params:
+                            a_format.update(rtmp_params)
+                    formats.append(a_format)
+            self._sort_formats(formats)
+
+            subtitles = {}
+            tracks = video_data.get('tracks')
+            if tracks and isinstance(tracks, list):
+                for track in tracks:
+                    if track.get('kind') != 'captions':
+                        continue
+                    track_url = urljoin(base_url, track.get('file'))
+                    if not track_url:
+                        continue
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track_url)
+                    })
+
+            entries.append({
+                'id': this_video_id,
+                'title': video_data['title'] if require_title else video_data.get('title'),
+                'description': video_data.get('description'),
+                'thumbnail': self._proto_relative_url(video_data.get('image')),
+                'timestamp': int_or_none(video_data.get('pubdate')),
+                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+                'subtitles': subtitles,
+                'formats': formats,
+            })
+        if len(entries) == 1:
+            return entries[0]
+        else:
+            return self.playlist_result(entries)
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a2b0298ec..3db31debe 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -20,6 +20,7 @@ from ..utils import (
     float_or_none,
     HEADRequest,
     is_html,
+    js_to_json,
     orderedSet,
     sanitized_Request,
     smuggle_url,
@@ -961,6 +962,16 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             }
         },
+        # Complex jwplayer
+        {
+            'url': 'http://www.indiedb.com/games/king-machine/videos',
+            'info_dict': {
+                'id': 'videos',
+                'ext': 'mp4',
+                'title': 'king machine trailer 1',
+                'thumbnail': r're:^https?://.*\.jpg$',
+            },
+        },
         # rtl.nl embed
         {
             'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -2488,6 +2499,15 @@ class GenericIE(InfoExtractor):
                 self._sort_formats(entry['formats'])
             return self.playlist_result(entries)
 
+        jwplayer_data_str = self._find_jwplayer_data(webpage)
+        if jwplayer_data_str:
+            try:
+                jwplayer_data = self._parse_json(
+                    jwplayer_data_str, video_id, transform_source=js_to_json)
+                return self._parse_jwplayer_data(jwplayer_data, video_id)
+            except ExtractorError:
+                pass
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index aff7ab49a..33d55f770 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -4,139 +4,9 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
-    determine_ext,
-    float_or_none,
-    int_or_none,
-    js_to_json,
-    mimetype2ext,
-    urljoin,
-)
 
 
-class JWPlatformBaseIE(InfoExtractor):
-    @staticmethod
-    def _find_jwplayer_data(webpage):
-        # TODO: Merge this with JWPlayer-related codes in generic.py
-
-        mobj = re.search(
-            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
-            webpage)
-        if mobj:
-            return mobj.group('options')
-
-    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
-        jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id,
-            transform_source=js_to_json)
-        return self._parse_jwplayer_data(
-            jwplayer_data, video_id, *args, **kwargs)
-
-    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
-                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
-        # JWPlayer backward compatibility: flattened playlists
-        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
-        if 'playlist' not in jwplayer_data:
-            jwplayer_data = {'playlist': [jwplayer_data]}
-
-        entries = []
-
-        # JWPlayer backward compatibility: single playlist item
-        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
-        if not isinstance(jwplayer_data['playlist'], list):
-            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
-
-        for video_data in jwplayer_data['playlist']:
-            # JWPlayer backward compatibility: flattened sources
-            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
-            if 'sources' not in video_data:
-                video_data['sources'] = [video_data]
-
-            this_video_id = video_id or video_data['mediaid']
-
-            formats = []
-            for source in video_data['sources']:
-                source_url = self._proto_relative_url(source['file'])
-                if base_url:
-                    source_url = compat_urlparse.urljoin(base_url, source_url)
-                source_type = source.get('type') or ''
-                ext = mimetype2ext(source_type) or determine_ext(source_url)
-                if source_type == 'hls' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
-                elif ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
-                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-                    formats.append({
-                        'url': source_url,
-                        'vcodec': 'none',
-                        'ext': ext,
-                    })
-                else:
-                    height = int_or_none(source.get('height'))
-                    if height is None:
-                        # Often no height is provided but there is a label in
-                        # format like 1080p.
-                        height = int_or_none(self._search_regex(
-                            r'^(\d{3,})[pP]$', source.get('label') or '',
-                            'height', default=None))
-                    a_format = {
-                        'url': source_url,
-                        'width': int_or_none(source.get('width')),
-                        'height': height,
-                        'ext': ext,
-                    }
-                    if source_url.startswith('rtmp'):
-                        a_format['ext'] = 'flv'
-
-                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
-                        # of jwplayer.flash.swf
-                        rtmp_url_parts = re.split(
-                            r'((?:mp4|mp3|flv):)', source_url, 1)
-                        if len(rtmp_url_parts) == 3:
-                            rtmp_url, prefix, play_path = rtmp_url_parts
-                            a_format.update({
-                                'url': rtmp_url,
-                                'play_path': prefix + play_path,
-                            })
-                        if rtmp_params:
-                            a_format.update(rtmp_params)
-                    formats.append(a_format)
-            self._sort_formats(formats)
-
-            subtitles = {}
-            tracks = video_data.get('tracks')
-            if tracks and isinstance(tracks, list):
-                for track in tracks:
-                    if track.get('kind') != 'captions':
-                        continue
-                    track_url = urljoin(base_url, track.get('file'))
-                    if not track_url:
-                        continue
-                    subtitles.setdefault(track.get('label') or 'en', []).append({
-                        'url': self._proto_relative_url(track_url)
-                    })
-
-            entries.append({
-                'id': this_video_id,
-                'title': video_data['title'] if require_title else video_data.get('title'),
-                'description': video_data.get('description'),
-                'thumbnail': self._proto_relative_url(video_data.get('image')),
-                'timestamp': int_or_none(video_data.get('pubdate')),
-                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
-                'subtitles': subtitles,
-                'formats': formats,
-            })
-        if len(entries) == 1:
-            return entries[0]
-        else:
-            return self.playlist_result(entries)
-
-
-class JWPlatformIE(JWPlatformBaseIE):
+class JWPlatformIE(InfoExtractor):
     _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
     _TEST = {
         'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py
index de1d6b08a..dcd157777 100644
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dl/extractor/ondemandkorea.py
@@ -1,14 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     js_to_json,
 )
 
 
-class OnDemandKoreaIE(JWPlatformBaseIE):
+class OnDemandKoreaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 7a2737032..9b413590a 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -169,50 +169,6 @@ class PornHubIE(InfoExtractor):
         comment_count = self._extract_count(
             r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
 
-        """
-        video_variables = {}
-        for video_variablename, quote, video_variable in re.findall(
-                r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
-            video_variables[video_variablename] = video_variable
-
-        video_urls = []
-        for encoded_video_url in re.findall(
-                r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
-            for varname, varval in video_variables.items():
-                encoded_video_url = encoded_video_url.replace(varname, varval)
-            video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))
-
-        if webpage.find('"encrypted":true') != -1:
-            password = compat_urllib_parse_unquote_plus(
-                self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
-            video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
-
-        formats = []
-        for video_url in video_urls:
-            path = compat_urllib_parse_urlparse(video_url).path
-            extension = os.path.splitext(path)[1][1:]
-            format = path.split('/')[5].split('_')[:2]
-            format = '-'.join(format)
-
-            m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format)
-            if m is None:
-                height = None
-                tbr = None
-            else:
-                height = int(m.group('height'))
-                tbr = int(m.group('tbr'))
-
-            formats.append({
-                'url': video_url,
-                'ext': extension,
-                'format': format,
-                'format_id': format,
-                'tbr': tbr,
-                'height': height,
-            })
-        self._sort_formats(formats)
-        """
-
         page_params = self._parse_json(self._search_regex(
             r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
             webpage, 'page parameters', group='data', default='{}'),
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py
index 1a0cce7e0..2831368b6 100644
--- a/youtube_dl/extractor/pornoxo.py
+++ b/youtube_dl/extractor/pornoxo.py
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     str_to_int,
 )
 
 
-class PornoXOIE(JWPlatformBaseIE):
+class PornoXOIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
     _TEST = {
         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py
index 422c02cff..d338b3a93 100644
--- a/youtube_dl/extractor/rentv.py
+++ b/youtube_dl/extractor/rentv.py
@@ -2,11 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from .jwplatform import JWPlatformBaseIE
 from ..compat import compat_str
 
 
-class RENTVIE(JWPlatformBaseIE):
+class RENTVIE(InfoExtractor):
     _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)'
     _TESTS = [{
         'url': 'http://ren.tv/video/epizod/118577',
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
index 3bfe934d8..51644011e 100644
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     js_to_json,
     get_element_by_class,
@@ -11,7 +11,7 @@ from ..utils import (
 )
 
 
-class RudoIE(JWPlatformBaseIE):
+class RudoIE(InfoExtractor):
     _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
 
     _TEST = {
diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py
index 94a2a37d2..b5e76c9af 100644
--- a/youtube_dl/extractor/screencastomatic.py
+++ b/youtube_dl/extractor/screencastomatic.py
@@ -1,11 +1,11 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import js_to_json
 
 
-class ScreencastOMaticIE(JWPlatformBaseIE):
+class ScreencastOMaticIE(InfoExtractor):
     _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
     _TEST = {
         'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py
index 9880a5a78..9d9652949 100644
--- a/youtube_dl/extractor/sendtonews.py
+++ b/youtube_dl/extractor/sendtonews.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     float_or_none,
     parse_iso8601,
@@ -14,7 +14,7 @@ from ..utils import (
 )
 
 
-class SendtoNewsIE(JWPlatformBaseIE):
+class SendtoNewsIE(InfoExtractor):
     _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
 
     _TEST = {
diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py
index 4473a3c77..b7b3568cb 100644
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dl/extractor/thisav.py
@@ -3,11 +3,11 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import remove_end
 
 
-class ThisAVIE(JWPlatformBaseIE):
+class ThisAVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
     _TESTS = [{
         'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html',
diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py
index 6d5c74826..1a5b76bf2 100644
--- a/youtube_dl/extractor/tvnoe.py
+++ b/youtube_dl/extractor/tvnoe.py
@@ -1,7 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     clean_html,
     get_element_by_class,
@@ -9,7 +9,7 @@ from ..utils import (
 )
 
 
-class TVNoeIE(JWPlatformBaseIE):
+class TVNoeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
     _TEST = {
         'url': 'http://www.tvnoe.cz/video/10362',
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
index 9950c62ad..1f1828fce 100644
--- a/youtube_dl/extractor/vidzi.py
+++ b/youtube_dl/extractor/vidzi.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .jwplatform import JWPlatformBaseIE
+from .common import InfoExtractor
 from ..utils import (
     decode_packed_codes,
     js_to_json,
@@ -12,7 +12,7 @@ from ..utils import (
 )
 
 
-class VidziIE(JWPlatformBaseIE):
+class VidziIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vidzi\.tv/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
     _TESTS = [{
         'url': 'http://vidzi.tv/cghql9yq6emu.html',
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index 54eb51427..c022fb33e 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
+from .common import InfoExtractor
 from .youtube import YoutubeIE
-from .jwplatform import JWPlatformBaseIE
 
 
-class WimpIE(JWPlatformBaseIE):
+class WimpIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.wimp.com/maru-is-exhausted/',
-- 
cgit v1.2.3


From 773f291dcbce486fefe24e1abd29735d374d0a9e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 4 Feb 2017 18:49:58 +0700
Subject: Add experimental geo restriction bypass mechanism Based on faking
 X-Forwarded-For HTTP header

---
 youtube_dl/YoutubeDL.py        |  17 +++
 youtube_dl/__init__.py         |   2 +
 youtube_dl/extractor/common.py |  48 +++++++-
 youtube_dl/options.py          |  12 ++
 youtube_dl/utils.py            | 267 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 340 insertions(+), 6 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index a7bf5a1b0..ebace6b57 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -56,6 +56,8 @@ from .utils import (
     ExtractorError,
     format_bytes,
     formatSeconds,
+    GeoRestrictedError,
+    ISO3166Utils,
     locked_file,
     make_HTTPS_handler,
     MaxDownloadsReached,
@@ -272,6 +274,13 @@ class YoutubeDL(object):
                        If it returns None, the video is downloaded.
                        match_filter_func in utils.py is one example for this.
     no_color:          Do not emit color codes in output.
+    bypass_geo_restriction:
+                       Bypass geographic restriction via faking X-Forwarded-For
+                       HTTP header (experimental)
+    bypass_geo_restriction_as_country:
+                       Two-letter ISO 3166-2 country code that will be used for
+                       explicit geographic restriction bypassing via faking
+                       X-Forwarded-For HTTP header (experimental)
 
     The following options determine which downloader is picked:
     external_downloader: Executable of the external downloader to call.
@@ -707,6 +716,14 @@ class YoutubeDL(object):
                     return self.process_ie_result(ie_result, download, extra_info)
                 else:
                     return ie_result
+            except GeoRestrictedError as e:
+                msg = e.msg
+                if e.countries:
+                    msg += '\nThis video is available in %s.' % ', '.join(
+                        map(ISO3166Utils.short2full, e.countries))
+                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+                self.report_error(msg)
+                break
             except ExtractorError as e:  # An error we somewhat expected
                 self.report_error(compat_str(e), e.format_traceback())
                 break
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 5c5b8094b..94f461a78 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -414,6 +414,8 @@ def _real_main(argv=None):
         'cn_verification_proxy': opts.cn_verification_proxy,
         'geo_verification_proxy': opts.geo_verification_proxy,
         'config_location': opts.config_location,
+        'bypass_geo_restriction': opts.bypass_geo_restriction,
+        'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f6ff56eda..96815099d 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -6,6 +6,7 @@ import hashlib
 import json
 import netrc
 import os
+import random
 import re
 import socket
 import sys
@@ -39,6 +40,8 @@ from ..utils import (
     ExtractorError,
     fix_xml_ampersands,
     float_or_none,
+    GeoRestrictedError,
+    GeoUtils,
     int_or_none,
     js_to_json,
     parse_iso8601,
@@ -320,17 +323,25 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
+    _BYPASS_GEO attribute may be set to False in order to disable
+    geo restriction bypass mechanisms for a particular extractor.
+    Though it won't disable explicit geo restriction bypass based on
+    country code provided with bypass_geo_restriction_as_country.
+
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
     """
 
     _ready = False
     _downloader = None
+    _x_forwarded_for_ip = None
+    _BYPASS_GEO = True
     _WORKING = True
 
     def __init__(self, downloader=None):
         """Constructor. Receives an optional downloader."""
         self._ready = False
+        self._x_forwarded_for_ip = None
         self.set_downloader(downloader)
 
     @classmethod
@@ -359,6 +370,10 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
+        if not self._x_forwarded_for_ip:
+            country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None)
+            if country_code:
+                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
         if not self._ready:
             self._real_initialize()
             self._ready = True
@@ -366,8 +381,22 @@ class InfoExtractor(object):
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
         try:
-            self.initialize()
-            return self._real_extract(url)
+            for _ in range(2):
+                try:
+                    self.initialize()
+                    return self._real_extract(url)
+                except GeoRestrictedError as e:
+                    if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and
+                            self._BYPASS_GEO and
+                            self._downloader.params.get('bypass_geo_restriction', True) and
+                            not self._x_forwarded_for_ip and
+                            e.countries):
+                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
+                        if self._x_forwarded_for_ip:
+                            self.report_warning(
+                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+                            continue
+                    raise
         except ExtractorError:
             raise
         except compat_http_client.IncompleteRead as e:
@@ -434,6 +463,15 @@ class InfoExtractor(object):
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
+        # Some sites check X-Forwarded-For HTTP header in order to figure out
+        # the origin of the client behind proxy. This allows bypassing geo
+        # restriction by faking this header's value to IP that belongs to some
+        # geo unrestricted country. We will do so once we encounter any
+        # geo restriction error.
+        if self._x_forwarded_for_ip:
+            if 'X-Forwarded-For' not in headers:
+                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
         if urlh is False:
             assert not fatal
@@ -609,10 +647,8 @@ class InfoExtractor(object):
             expected=True)
 
     @staticmethod
-    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
-        raise ExtractorError(
-            '%s. You might want to use --proxy to workaround.' % msg,
-            expected=True)
+    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+        raise GeoRestrictedError(msg, countries=countries)
 
     # Methods for following #608
     @staticmethod
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index deff54324..2e194f6dc 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -549,6 +549,18 @@ def parseOpts(overrideArguments=None):
             'Upper bound of a range for randomized sleep before each download '
             '(maximum possible number of seconds to sleep). Must only be used '
             'along with --min-sleep-interval.'))
+    workarounds.add_option(
+        '--bypass-geo',
+        action='store_true', dest='bypass_geo_restriction', default=True,
+        help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+    workarounds.add_option(
+        '--no-bypass-geo',
+        action='store_false', dest='bypass_geo_restriction', default=True,
+        help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
+    workarounds.add_option(
+        '--bypass-geo-as-country', metavar='CODE',
+        dest='bypass_geo_restriction_as_country', default=None,
+        help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
 
     verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
     verbosity.add_option(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3f9e592e3..4e76b6b7b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -23,6 +23,7 @@ import operator
 import os
 import pipes
 import platform
+import random
 import re
 import socket
 import ssl
@@ -747,6 +748,18 @@ class RegexNotFoundError(ExtractorError):
     pass
 
 
+class GeoRestrictedError(ExtractorError):
+    """Geographic restriction Error exception.
+
+    This exception may be thrown when a video is not available from your
+    geographic location due to geographic restrictions imposed by a website.
+    """
+    def __init__(self, msg, countries=None):
+        super(GeoRestrictedError, self).__init__(msg, expected=True)
+        self.msg = msg
+        self.countries = countries
+
+
 class DownloadError(YoutubeDLError):
     """Download Error exception.
 
@@ -3027,6 +3040,260 @@ class ISO3166Utils(object):
         return cls._country_map.get(code.upper())
 
 
+class GeoUtils(object):
+    # Major IPv4 address blocks per country
+    _country_ip_map = {
+        'AD': '85.94.160.0/19',
+        'AE': '94.200.0.0/13',
+        'AF': '149.54.0.0/17',
+        'AG': '209.59.64.0/18',
+        'AI': '204.14.248.0/21',
+        'AL': '46.99.0.0/16',
+        'AM': '46.70.0.0/15',
+        'AO': '105.168.0.0/13',
+        'AP': '159.117.192.0/21',
+        'AR': '181.0.0.0/12',
+        'AS': '202.70.112.0/20',
+        'AT': '84.112.0.0/13',
+        'AU': '1.128.0.0/11',
+        'AW': '181.41.0.0/18',
+        'AZ': '5.191.0.0/16',
+        'BA': '31.176.128.0/17',
+        'BB': '65.48.128.0/17',
+        'BD': '114.130.0.0/16',
+        'BE': '57.0.0.0/8',
+        'BF': '129.45.128.0/17',
+        'BG': '95.42.0.0/15',
+        'BH': '37.131.0.0/17',
+        'BI': '154.117.192.0/18',
+        'BJ': '137.255.0.0/16',
+        'BL': '192.131.134.0/24',
+        'BM': '196.12.64.0/18',
+        'BN': '156.31.0.0/16',
+        'BO': '161.56.0.0/16',
+        'BQ': '161.0.80.0/20',
+        'BR': '152.240.0.0/12',
+        'BS': '24.51.64.0/18',
+        'BT': '119.2.96.0/19',
+        'BW': '168.167.0.0/16',
+        'BY': '178.120.0.0/13',
+        'BZ': '179.42.192.0/18',
+        'CA': '99.224.0.0/11',
+        'CD': '41.243.0.0/16',
+        'CF': '196.32.200.0/21',
+        'CG': '197.214.128.0/17',
+        'CH': '85.0.0.0/13',
+        'CI': '154.232.0.0/14',
+        'CK': '202.65.32.0/19',
+        'CL': '152.172.0.0/14',
+        'CM': '165.210.0.0/15',
+        'CN': '36.128.0.0/10',
+        'CO': '181.240.0.0/12',
+        'CR': '201.192.0.0/12',
+        'CU': '152.206.0.0/15',
+        'CV': '165.90.96.0/19',
+        'CW': '190.88.128.0/17',
+        'CY': '46.198.0.0/15',
+        'CZ': '88.100.0.0/14',
+        'DE': '53.0.0.0/8',
+        'DJ': '197.241.0.0/17',
+        'DK': '87.48.0.0/12',
+        'DM': '192.243.48.0/20',
+        'DO': '152.166.0.0/15',
+        'DZ': '41.96.0.0/12',
+        'EC': '186.68.0.0/15',
+        'EE': '90.190.0.0/15',
+        'EG': '156.160.0.0/11',
+        'ER': '196.200.96.0/20',
+        'ES': '88.0.0.0/11',
+        'ET': '196.188.0.0/14',
+        'EU': '2.16.0.0/13',
+        'FI': '91.152.0.0/13',
+        'FJ': '144.120.0.0/16',
+        'FM': '119.252.112.0/20',
+        'FO': '88.85.32.0/19',
+        'FR': '90.0.0.0/9',
+        'GA': '41.158.0.0/15',
+        'GB': '25.0.0.0/8',
+        'GD': '74.122.88.0/21',
+        'GE': '31.146.0.0/16',
+        'GF': '161.22.64.0/18',
+        'GG': '62.68.160.0/19',
+        'GH': '45.208.0.0/14',
+        'GI': '85.115.128.0/19',
+        'GL': '88.83.0.0/19',
+        'GM': '160.182.0.0/15',
+        'GN': '197.149.192.0/18',
+        'GP': '104.250.0.0/19',
+        'GQ': '105.235.224.0/20',
+        'GR': '94.64.0.0/13',
+        'GT': '168.234.0.0/16',
+        'GU': '168.123.0.0/16',
+        'GW': '197.214.80.0/20',
+        'GY': '181.41.64.0/18',
+        'HK': '113.252.0.0/14',
+        'HN': '181.210.0.0/16',
+        'HR': '93.136.0.0/13',
+        'HT': '148.102.128.0/17',
+        'HU': '84.0.0.0/14',
+        'ID': '39.192.0.0/10',
+        'IE': '87.32.0.0/12',
+        'IL': '79.176.0.0/13',
+        'IM': '5.62.80.0/20',
+        'IN': '117.192.0.0/10',
+        'IO': '203.83.48.0/21',
+        'IQ': '37.236.0.0/14',
+        'IR': '2.176.0.0/12',
+        'IS': '82.221.0.0/16',
+        'IT': '79.0.0.0/10',
+        'JE': '87.244.64.0/18',
+        'JM': '72.27.0.0/17',
+        'JO': '176.29.0.0/16',
+        'JP': '126.0.0.0/8',
+        'KE': '105.48.0.0/12',
+        'KG': '158.181.128.0/17',
+        'KH': '36.37.128.0/17',
+        'KI': '103.25.140.0/22',
+        'KM': '197.255.224.0/20',
+        'KN': '198.32.32.0/19',
+        'KP': '175.45.176.0/22',
+        'KR': '175.192.0.0/10',
+        'KW': '37.36.0.0/14',
+        'KY': '64.96.0.0/15',
+        'KZ': '2.72.0.0/13',
+        'LA': '115.84.64.0/18',
+        'LB': '178.135.0.0/16',
+        'LC': '192.147.231.0/24',
+        'LI': '82.117.0.0/19',
+        'LK': '112.134.0.0/15',
+        'LR': '41.86.0.0/19',
+        'LS': '129.232.0.0/17',
+        'LT': '78.56.0.0/13',
+        'LU': '188.42.0.0/16',
+        'LV': '46.109.0.0/16',
+        'LY': '41.252.0.0/14',
+        'MA': '105.128.0.0/11',
+        'MC': '88.209.64.0/18',
+        'MD': '37.246.0.0/16',
+        'ME': '178.175.0.0/17',
+        'MF': '74.112.232.0/21',
+        'MG': '154.126.0.0/17',
+        'MH': '117.103.88.0/21',
+        'MK': '77.28.0.0/15',
+        'ML': '154.118.128.0/18',
+        'MM': '37.111.0.0/17',
+        'MN': '49.0.128.0/17',
+        'MO': '60.246.0.0/16',
+        'MP': '202.88.64.0/20',
+        'MQ': '109.203.224.0/19',
+        'MR': '41.188.64.0/18',
+        'MS': '208.90.112.0/22',
+        'MT': '46.11.0.0/16',
+        'MU': '105.16.0.0/12',
+        'MV': '27.114.128.0/18',
+        'MW': '105.234.0.0/16',
+        'MX': '187.192.0.0/11',
+        'MY': '175.136.0.0/13',
+        'MZ': '197.218.0.0/15',
+        'NA': '41.182.0.0/16',
+        'NC': '101.101.0.0/18',
+        'NE': '197.214.0.0/18',
+        'NF': '203.17.240.0/22',
+        'NG': '105.112.0.0/12',
+        'NI': '186.76.0.0/15',
+        'NL': '145.96.0.0/11',
+        'NO': '84.208.0.0/13',
+        'NP': '36.252.0.0/15',
+        'NR': '203.98.224.0/19',
+        'NU': '49.156.48.0/22',
+        'NZ': '49.224.0.0/14',
+        'OM': '5.36.0.0/15',
+        'PA': '186.72.0.0/15',
+        'PE': '186.160.0.0/14',
+        'PF': '123.50.64.0/18',
+        'PG': '124.240.192.0/19',
+        'PH': '49.144.0.0/13',
+        'PK': '39.32.0.0/11',
+        'PL': '83.0.0.0/11',
+        'PM': '70.36.0.0/20',
+        'PR': '66.50.0.0/16',
+        'PS': '188.161.0.0/16',
+        'PT': '85.240.0.0/13',
+        'PW': '202.124.224.0/20',
+        'PY': '181.120.0.0/14',
+        'QA': '37.210.0.0/15',
+        'RE': '139.26.0.0/16',
+        'RO': '79.112.0.0/13',
+        'RS': '178.220.0.0/14',
+        'RU': '5.136.0.0/13',
+        'RW': '105.178.0.0/15',
+        'SA': '188.48.0.0/13',
+        'SB': '202.1.160.0/19',
+        'SC': '154.192.0.0/11',
+        'SD': '154.96.0.0/13',
+        'SE': '78.64.0.0/12',
+        'SG': '152.56.0.0/14',
+        'SI': '188.196.0.0/14',
+        'SK': '78.98.0.0/15',
+        'SL': '197.215.0.0/17',
+        'SM': '89.186.32.0/19',
+        'SN': '41.82.0.0/15',
+        'SO': '197.220.64.0/19',
+        'SR': '186.179.128.0/17',
+        'SS': '105.235.208.0/21',
+        'ST': '197.159.160.0/19',
+        'SV': '168.243.0.0/16',
+        'SX': '190.102.0.0/20',
+        'SY': '5.0.0.0/16',
+        'SZ': '41.84.224.0/19',
+        'TC': '65.255.48.0/20',
+        'TD': '154.68.128.0/19',
+        'TG': '196.168.0.0/14',
+        'TH': '171.96.0.0/13',
+        'TJ': '85.9.128.0/18',
+        'TK': '27.96.24.0/21',
+        'TL': '180.189.160.0/20',
+        'TM': '95.85.96.0/19',
+        'TN': '197.0.0.0/11',
+        'TO': '175.176.144.0/21',
+        'TR': '78.160.0.0/11',
+        'TT': '186.44.0.0/15',
+        'TV': '202.2.96.0/19',
+        'TW': '120.96.0.0/11',
+        'TZ': '156.156.0.0/14',
+        'UA': '93.72.0.0/13',
+        'UG': '154.224.0.0/13',
+        'US': '3.0.0.0/8',
+        'UY': '167.56.0.0/13',
+        'UZ': '82.215.64.0/18',
+        'VA': '212.77.0.0/19',
+        'VC': '24.92.144.0/20',
+        'VE': '186.88.0.0/13',
+        'VG': '172.103.64.0/18',
+        'VI': '146.226.0.0/16',
+        'VN': '14.160.0.0/11',
+        'VU': '202.80.32.0/20',
+        'WF': '117.20.32.0/21',
+        'WS': '202.4.32.0/19',
+        'YE': '134.35.0.0/16',
+        'YT': '41.242.116.0/22',
+        'ZA': '41.0.0.0/11',
+        'ZM': '165.56.0.0/13',
+        'ZW': '41.85.192.0/19',
+    }
+
+    @classmethod
+    def random_ipv4(cls, code):
+        block = cls._country_ip_map.get(code.upper())
+        if not block:
+            return None
+        addr, preflen = block.split('/')
+        addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+        addr_max = addr_min | (0xffffffff >> int(preflen))
+        return socket.inet_ntoa(
+            compat_struct_pack('!I', random.randint(addr_min, addr_max)))
+
+
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
     def __init__(self, proxies=None):
         # Set default handlers
-- 
cgit v1.2.3


From 0016b84e16965a07c52946c4672363153e8b18a2 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 4 Feb 2017 21:06:07 +0700
Subject: Add faked X-Forwarded-For to formats' HTTP headers

---
 youtube_dl/YoutubeDL.py        | 14 ++++++++++++++
 youtube_dl/extractor/common.py |  5 ++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ebace6b57..1c04e46c1 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -864,8 +864,14 @@ class YoutubeDL(object):
             if self.params.get('playlistrandom', False):
                 random.shuffle(entries)
 
+            x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
             for i, entry in enumerate(entries, 1):
                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+                # This __x_forwarded_for_ip thing is a bit ugly but requires
+                # minimal changes
+                if x_forwarded_for:
+                    entry['__x_forwarded_for_ip'] = x_forwarded_for
                 extra = {
                     'n_entries': n_entries,
                     'playlist': playlist,
@@ -1250,6 +1256,11 @@ class YoutubeDL(object):
         if cookies:
             res['Cookie'] = cookies
 
+        if 'X-Forwarded-For' not in res:
+            x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+            if x_forwarded_for_ip:
+                res['X-Forwarded-For'] = x_forwarded_for_ip
+
         return res
 
     def _calc_cookies(self, info_dict):
@@ -1392,6 +1403,9 @@ class YoutubeDL(object):
             full_format_info = info_dict.copy()
             full_format_info.update(format)
             format['http_headers'] = self._calc_headers(full_format_info)
+        # Remove private housekeeping stuff
+        if '__x_forwarded_for_ip' in info_dict:
+            del info_dict['__x_forwarded_for_ip']
 
         # TODO Central sorting goes here
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 96815099d..c1f7f28a0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -384,7 +384,10 @@ class InfoExtractor(object):
             for _ in range(2):
                 try:
                     self.initialize()
-                    return self._real_extract(url)
+                    ie_result = self._real_extract(url)
+                    if self._x_forwarded_for_ip:
+                        ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+                    return ie_result
                 except GeoRestrictedError as e:
                     if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and
                             self._BYPASS_GEO and
-- 
cgit v1.2.3


From 0a840f584c3f1fedb6957c05587dec697143f2d5 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 19 Feb 2017 01:53:41 +0700
Subject: Rename bypass geo restriction options

---
 youtube_dl/YoutubeDL.py        |  5 ++---
 youtube_dl/__init__.py         |  4 ++--
 youtube_dl/extractor/common.py |  8 ++++----
 youtube_dl/options.py          | 12 ++++++------
 4 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 1c04e46c1..68000dea2 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -274,10 +274,9 @@ class YoutubeDL(object):
                        If it returns None, the video is downloaded.
                        match_filter_func in utils.py is one example for this.
     no_color:          Do not emit color codes in output.
-    bypass_geo_restriction:
-                       Bypass geographic restriction via faking X-Forwarded-For
+    geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
                        HTTP header (experimental)
-    bypass_geo_restriction_as_country:
+    geo_bypass_country:
                        Two-letter ISO 3166-2 country code that will be used for
                        explicit geographic restriction bypassing via faking
                        X-Forwarded-For HTTP header (experimental)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 94f461a78..f91d29a7b 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -414,8 +414,8 @@ def _real_main(argv=None):
         'cn_verification_proxy': opts.cn_verification_proxy,
         'geo_verification_proxy': opts.geo_verification_proxy,
         'config_location': opts.config_location,
-        'bypass_geo_restriction': opts.bypass_geo_restriction,
-        'bypass_geo_restriction_as_country': opts.bypass_geo_restriction_as_country,
+        'geo_bypass': opts.geo_bypass,
+        'geo_bypass_country': opts.geo_bypass_country,
     }
 
     with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index c1f7f28a0..6eb6a25b8 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -326,7 +326,7 @@ class InfoExtractor(object):
     _BYPASS_GEO attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
     Though it won't disable explicit geo restriction bypass based on
-    country code provided with bypass_geo_restriction_as_country.
+    country code provided with geo_bypass_country.
 
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
@@ -371,7 +371,7 @@ class InfoExtractor(object):
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
         if not self._x_forwarded_for_ip:
-            country_code = self._downloader.params.get('bypass_geo_restriction_as_country', None)
+            country_code = self._downloader.params.get('geo_bypass_country', None)
             if country_code:
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
         if not self._ready:
@@ -389,9 +389,9 @@ class InfoExtractor(object):
                         ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
                     return ie_result
                 except GeoRestrictedError as e:
-                    if (not self._downloader.params.get('bypass_geo_restriction_as_country', None) and
+                    if (not self._downloader.params.get('geo_bypass_country', None) and
                             self._BYPASS_GEO and
-                            self._downloader.params.get('bypass_geo_restriction', True) and
+                            self._downloader.params.get('geo_bypass', True) and
                             not self._x_forwarded_for_ip and
                             e.countries):
                         self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 2e194f6dc..ae3f50754 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -550,16 +550,16 @@ def parseOpts(overrideArguments=None):
             '(maximum possible number of seconds to sleep). Must only be used '
             'along with --min-sleep-interval.'))
     workarounds.add_option(
-        '--bypass-geo',
-        action='store_true', dest='bypass_geo_restriction', default=True,
+        '--geo-bypass',
+        action='store_true', dest='geo_bypass', default=True,
         help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
     workarounds.add_option(
-        '--no-bypass-geo',
-        action='store_false', dest='bypass_geo_restriction', default=True,
+        '--no-geo-bypass',
+        action='store_false', dest='geo_bypass', default=True,
         help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header (experimental)')
     workarounds.add_option(
-        '--bypass-geo-as-country', metavar='CODE',
-        dest='bypass_geo_restriction_as_country', default=None,
+        '--geo-bypass-country', metavar='CODE',
+        dest='geo_bypass_country', default=None,
         help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code (experimental)')
 
     verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
-- 
cgit v1.2.3


From 4248dad92bd87650c791194276296b148f668e68 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 19 Feb 2017 03:53:23 +0700
Subject: Improve geo bypass mechanism * Rename options to preffixly match with
 --geo-verification-proxy * Introduce _GEO_COUNTRIES for extractors *
 Implement faking IP right away for sites with known geo restriction

---
 youtube_dl/extractor/common.py        | 57 +++++++++++++++++++++++++----------
 youtube_dl/extractor/dramafever.py    |  3 +-
 youtube_dl/extractor/go.py            |  3 +-
 youtube_dl/extractor/itv.py           |  4 ++-
 youtube_dl/extractor/nrk.py           |  4 ++-
 youtube_dl/extractor/ondemandkorea.py |  3 +-
 youtube_dl/extractor/pbs.py           |  5 ++-
 youtube_dl/extractor/srgssr.py        |  6 ++--
 youtube_dl/extractor/svt.py           |  4 ++-
 youtube_dl/extractor/vbox7.py         |  3 +-
 youtube_dl/extractor/vgtv.py          |  5 +--
 youtube_dl/extractor/viki.py          |  2 +-
 youtube_dl/utils.py                   |  2 +-
 13 files changed, 71 insertions(+), 30 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6eb6a25b8..272da74b6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -323,10 +323,15 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
-    _BYPASS_GEO attribute may be set to False in order to disable
+    _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
     Though it won't disable explicit geo restriction bypass based on
-    country code provided with geo_bypass_country.
+    country code provided with geo_bypass_country. (experimental)
+
+    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+    countries for this extractor. One of these countries will be used by
+    geo restriction bypass mechanism right away in order to bypass
+    geo restriction, of course, if the mechanism is not disabled. (experimental)
 
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
@@ -335,7 +340,8 @@ class InfoExtractor(object):
     _ready = False
     _downloader = None
     _x_forwarded_for_ip = None
-    _BYPASS_GEO = True
+    _GEO_BYPASS = True
+    _GEO_COUNTRIES = None
     _WORKING = True
 
     def __init__(self, downloader=None):
@@ -370,13 +376,27 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
+        self.__initialize_geo_bypass()
+        if not self._ready:
+            self._real_initialize()
+            self._ready = True
+
+    def __initialize_geo_bypass(self):
         if not self._x_forwarded_for_ip:
             country_code = self._downloader.params.get('geo_bypass_country', None)
+            # If there is no explicit country for geo bypass specified and
+            # the extractor is known to be geo restricted let's fake IP
+            # as X-Forwarded-For right away.
+            if (not country_code and
+                    self._GEO_BYPASS and
+                    self._downloader.params.get('geo_bypass', True) and
+                    self._GEO_COUNTRIES):
+                country_code = random.choice(self._GEO_COUNTRIES)
             if country_code:
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
-        if not self._ready:
-            self._real_initialize()
-            self._ready = True
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_stdout(
+                        '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
 
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
@@ -389,16 +409,8 @@ class InfoExtractor(object):
                         ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
                     return ie_result
                 except GeoRestrictedError as e:
-                    if (not self._downloader.params.get('geo_bypass_country', None) and
-                            self._BYPASS_GEO and
-                            self._downloader.params.get('geo_bypass', True) and
-                            not self._x_forwarded_for_ip and
-                            e.countries):
-                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
-                        if self._x_forwarded_for_ip:
-                            self.report_warning(
-                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
-                            continue
+                    if self.__maybe_fake_ip_and_retry(e.countries):
+                        continue
                     raise
         except ExtractorError:
             raise
@@ -407,6 +419,19 @@ class InfoExtractor(object):
         except (KeyError, StopIteration) as e:
             raise ExtractorError('An extractor error has occurred.', cause=e)
 
+    def __maybe_fake_ip_and_retry(self, countries):
+        if (not self._downloader.params.get('geo_bypass_country', None) and
+                self._GEO_BYPASS and
+                self._downloader.params.get('geo_bypass', True) and
+                not self._x_forwarded_for_ip and
+                countries):
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
+            if self._x_forwarded_for_ip:
+                self.report_warning(
+                    'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+                return True
+        return False
+
     def set_downloader(self, downloader):
         """Sets the downloader for this IE."""
         self._downloader = downloader
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index 755db806a..e7abc8889 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -20,6 +20,7 @@ from ..utils import (
 class DramaFeverBaseIE(AMPIE):
     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
     _NETRC_MACHINE = 'dramafever'
+    _GEO_COUNTRIES = ['US', 'CA']
 
     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
 
@@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
             if isinstance(e.cause, compat_HTTPError):
                 self.raise_geo_restricted(
                     msg='Currently unavailable in your country',
-                    countries=['US', 'CA'])
+                    countries=self._GEO_COUNTRIES)
             raise
 
         series_id, episode_number = video_id.split('.')
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index ec902c670..b205bfc7c 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
         }
     }
     _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+    _GEO_COUNTRIES = ['US']
     _TESTS = [{
         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
         'info_dict': {
@@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
                         for error in errors:
                             if error.get('code') == 1002:
                                 self.raise_geo_restricted(
-                                    error['message'], countries=['US'])
+                                    error['message'], countries=self._GEO_COUNTRIES)
                         error_message = ', '.join([error['message'] for error in errors])
                         raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
                     asset_url += '?' + entitlement['uplynkData']['sessionKey']
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index aabde15f3..021c6b278 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -24,6 +24,7 @@ from ..utils import (
 
 class ITVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+    _GEO_COUNTRIES = ['GB']
     _TEST = {
         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
         'info_dict': {
@@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
             fault_code = xpath_text(resp_env, './/faultcode')
             fault_string = xpath_text(resp_env, './/faultstring')
             if fault_code == 'InvalidGeoRegion':
-                self.raise_geo_restricted(msg=fault_string, countries=['GB'])
+                self.raise_geo_restricted(
+                    msg=fault_string, countries=self._GEO_COUNTRIES)
             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
         title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
         video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 78ece33e1..13af9ed1f 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -14,6 +14,7 @@ from ..utils import (
 
 
 class NRKBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['NO']
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
             # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
             if 'IsGeoBlocked' in message_type:
                 self.raise_geo_restricted(
-                    msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
+                    msg=MESSAGES.get('ProgramIsGeoBlocked'),
+                    countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, MESSAGES.get(
                     message_type, message_type)),
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py
index 0c85d549e..df1ce3c1d 100644
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dl/extractor/ondemandkorea.py
@@ -10,6 +10,7 @@ from ..utils import (
 
 class OnDemandKoreaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+    _GEO_COUNTRIES = ['US', 'CA']
     _TEST = {
         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
         'info_dict': {
@@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
         if 'msg_block_01.png' in webpage:
             self.raise_geo_restricted(
                 msg='This content is not available in your region',
-                countries=['US', 'CA'])
+                countries=self._GEO_COUNTRIES)
 
         if 'This video is only available to ODK PLUS members.' in webpage:
             raise ExtractorError(
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 64f47bae3..3e51b4dd7 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
         )
     ''' % '|'.join(list(zip(*_STATIONS))[0])
 
+    _GEO_COUNTRIES = ['US']
+
     _TESTS = [
         {
             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
                 message = self._ERRORS.get(
                     redirect_info['http_code'], redirect_info['message'])
                 if redirect_info['http_code'] == 403:
-                    self.raise_geo_restricted(msg=message, countries=['US'])
+                    self.raise_geo_restricted(
+                        msg=message, countries=self._GEO_COUNTRIES)
                 raise ExtractorError(
                     '%s said: %s' % (self.IE_NAME, message), expected=True)
 
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index a35a0a538..bb73eb1d5 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -14,7 +14,8 @@ from ..utils import (
 
 class SRGSSRIE(InfoExtractor):
     _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
+    _GEO_COUNTRIES = ['CH']
 
     _ERRORS = {
         'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
         if media_data.get('block') and media_data['block'] in self._ERRORS:
             message = self._ERRORS[media_data['block']]
             if media_data['block'] == 'GEOBLOCK':
-                self.raise_geo_restricted(msg=message, countries=['CH'])
+                self.raise_geo_restricted(
+                    msg=message, countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, message), expected=True)
 
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index f2a2200bf..9e2c9fcc6 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -13,6 +13,7 @@ from ..utils import (
 
 
 class SVTBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['SE']
     def _extract_video(self, video_info, video_id):
         formats = []
         for vr in video_info['videoReferences']:
@@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
                 })
         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
             self.raise_geo_restricted(
-                'This video is only available in Sweden', countries=['SE'])
+                'This video is only available in Sweden',
+                countries=self._GEO_COUNTRIES)
         self._sort_formats(formats)
 
         subtitles = {}
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index f86d804c1..8152acefd 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
                         )
                         (?P<id>[\da-fA-F]+)
                     '''
+    _GEO_COUNTRIES = ['BG']
     _TESTS = [{
         'url': 'http://vbox7.com/play:0946fff23c',
         'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
         video_url = video['src']
 
         if '/na.mp4' in video_url:
-            self.raise_geo_restricted(countries=['BG'])
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
         uploader = video.get('uploader')
 
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 1709fd6bb..0f8c156a7 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -14,7 +14,7 @@ from ..utils import (
 
 class VGTVIE(XstreamIE):
     IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
 
     _HOST_TO_APPNAME = {
         'vgtv.no': 'vgtv',
@@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
             properties = try_get(
                 data, lambda x: x['streamConfiguration']['properties'], list)
             if properties and 'geoblocked' in properties:
-                raise self.raise_geo_restricted(countries=['NO'])
+                raise self.raise_geo_restricted(
+                    countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
 
         self._sort_formats(info['formats'])
 
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 68a74e246..e9c8bf824 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
     _APP_VERSION = '2.2.5.1428709186'
     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
 
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
     _NETRC_MACHINE = 'viki'
 
     _token = None
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index cbf7639c5..17b83794a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3291,7 +3291,7 @@ class GeoUtils(object):
         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
         addr_max = addr_min | (0xffffffff >> int(preflen))
         return compat_str(socket.inet_ntoa(
-            compat_struct_pack('!I', random.randint(addr_min, addr_max))))
+            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 
 
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
-- 
cgit v1.2.3


From 3ccdde8cb76cacb7b2b64469ca51d3b1877da1f6 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Mon, 20 Feb 2017 23:21:15 +0700
Subject: [extractor/common] Emphasize geo bypass APIs are experimental

---
 youtube_dl/extractor/common.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 272da74b6..1ae264722 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -333,6 +333,9 @@ class InfoExtractor(object):
     geo restriction bypass mechanism right away in order to bypass
     geo restriction, of course, if the mechanism is not disabled. (experimental)
 
+    NB: both these geo attributes are experimental and may change in future
+    or be completely removed.
+
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
     """
-- 
cgit v1.2.3


From e39b5d4ab83de7a466c6d4c9528d385758566b22 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 21 Feb 2017 23:00:43 +0700
Subject: [extractor/common] Allow calling _initialize_geo_bypass from
 extractors (#11970)

---
 youtube_dl/extractor/common.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1ae264722..86aff3312 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -379,12 +379,31 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
-        self.__initialize_geo_bypass()
+        self._initialize_geo_bypass(self._GEO_COUNTRIES)
         if not self._ready:
             self._real_initialize()
             self._ready = True
 
-    def __initialize_geo_bypass(self):
+    def _initialize_geo_bypass(self, countries):
+        """
+        Initialize geo restriction bypass mechanism.
+
+        This method is used to initialize geo bypass mechanism based on faking
+        X-Forwarded-For HTTP header. A random country from provided country list
+        is selected and a random IP brlonging to this country is generated. This
+        IP will be passed as X-Forwarded-For HTTP header in all subsequent
+        HTTP requests.
+        Method does nothing if no countries are specified.
+
+        This method will be used for initial geo bypass mechanism initialization
+        during the instance initialization with _GEO_COUNTRIES.
+
+        You may also manually call it from extractor's code if geo countries
+        information is not available beforehand (e.g. obtained during
+        extraction) or due to some another reason.
+        """
+        if not countries:
+            return
         if not self._x_forwarded_for_ip:
             country_code = self._downloader.params.get('geo_bypass_country', None)
             # If there is no explicit country for geo bypass specified and
@@ -393,8 +412,8 @@ class InfoExtractor(object):
             if (not country_code and
                     self._GEO_BYPASS and
                     self._downloader.params.get('geo_bypass', True) and
-                    self._GEO_COUNTRIES):
-                country_code = random.choice(self._GEO_COUNTRIES)
+                    countries):
+                country_code = random.choice(countries)
             if country_code:
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
                 if self._downloader.params.get('verbose', False):
-- 
cgit v1.2.3


From dc0a869e5ee7a75218a759706bb11f17c4de6b72 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 21 Feb 2017 23:05:31 +0700
Subject: [extractor/common] Fix typo

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 86aff3312..6d4789d96 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -390,7 +390,7 @@ class InfoExtractor(object):
 
         This method is used to initialize geo bypass mechanism based on faking
         X-Forwarded-For HTTP header. A random country from provided country list
-        is selected and a random IP brlonging to this country is generated. This
+        is selected and a random IP belonging to this country is generated. This
         IP will be passed as X-Forwarded-For HTTP header in all subsequent
         HTTP requests.
         Method does nothing if no countries are specified.
-- 
cgit v1.2.3


From 336a76551b92db1c040cbf3c4a9b1857e125ad45 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 21 Feb 2017 23:09:41 +0700
Subject: [extractor/common] Do not quit _initialize_geo_bypass on empty
 countries

---
 youtube_dl/extractor/common.py | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6d4789d96..a34fbbc9b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -393,7 +393,6 @@ class InfoExtractor(object):
         is selected and a random IP belonging to this country is generated. This
         IP will be passed as X-Forwarded-For HTTP header in all subsequent
         HTTP requests.
-        Method does nothing if no countries are specified.
 
         This method will be used for initial geo bypass mechanism initialization
         during the instance initialization with _GEO_COUNTRIES.
@@ -402,8 +401,6 @@ class InfoExtractor(object):
         information is not available beforehand (e.g. obtained during
         extraction) or due to some another reason.
         """
-        if not countries:
-            return
         if not self._x_forwarded_for_ip:
             country_code = self._downloader.params.get('geo_bypass_country', None)
             # If there is no explicit country for geo bypass specified and
-- 
cgit v1.2.3


From eea0716cae1290fe08faea89e24a58ec91098638 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 21 Feb 2017 23:14:33 +0700
Subject: [extractor/common] Print origin country for fake IP

---
 youtube_dl/extractor/common.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a34fbbc9b..4252d6825 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -415,7 +415,8 @@ class InfoExtractor(object):
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
                 if self._downloader.params.get('verbose', False):
                     self._downloader.to_stdout(
-                        '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+                        '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+                        % (self._x_forwarded_for_ip, country_code.upper()))
 
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
@@ -444,10 +445,12 @@ class InfoExtractor(object):
                 self._downloader.params.get('geo_bypass', True) and
                 not self._x_forwarded_for_ip and
                 countries):
-            self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
+            country_code = random.choice(countries)
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
             if self._x_forwarded_for_ip:
                 self.report_warning(
-                    'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+                    'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+                    % (self._x_forwarded_for_ip, country_code.upper()))
                 return True
         return False
 
-- 
cgit v1.2.3


From eeb0a9568442a4dbbf3478579abe2696fbe890e2 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sat, 25 Feb 2017 18:40:05 +0800
Subject: [extractor/common] Add 'preference' to _parse_html5_media_entries

Some websites, like NJPWorld, put different qualities on different
player pages.
---
 youtube_dl/extractor/common.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 4252d6825..eb3c091aa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2010,7 +2010,7 @@ class InfoExtractor(object):
                 })
         return formats
 
-    def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
+    def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
         def absolute_url(video_url):
             return compat_urlparse.urljoin(base_url, video_url)
 
@@ -2032,7 +2032,8 @@ class InfoExtractor(object):
                 is_plain_url = False
                 formats = self._extract_m3u8_formats(
                     full_url, video_id, ext='mp4',
-                    entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
+                    entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
+                    preference=preference)
             elif ext == 'mpd':
                 is_plain_url = False
                 formats = self._extract_mpd_formats(
-- 
cgit v1.2.3


From ed0cf9b38394b28bae5f05fd6b00c85a9c0e6755 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 5 Mar 2017 23:22:27 +0700
Subject: [extractor/common] Move jwplayer formats extraction in separate
 method

---
 youtube_dl/extractor/common.py | 106 ++++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 50 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index eb3c091aa..9b73a948c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2198,56 +2198,7 @@ class InfoExtractor(object):
 
             this_video_id = video_id or video_data['mediaid']
 
-            formats = []
-            for source in video_data['sources']:
-                source_url = self._proto_relative_url(source['file'])
-                if base_url:
-                    source_url = compat_urlparse.urljoin(base_url, source_url)
-                source_type = source.get('type') or ''
-                ext = mimetype2ext(source_type) or determine_ext(source_url)
-                if source_type == 'hls' or ext == 'm3u8':
-                    formats.extend(self._extract_m3u8_formats(
-                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
-                elif ext == 'mpd':
-                    formats.extend(self._extract_mpd_formats(
-                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
-                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-                    formats.append({
-                        'url': source_url,
-                        'vcodec': 'none',
-                        'ext': ext,
-                    })
-                else:
-                    height = int_or_none(source.get('height'))
-                    if height is None:
-                        # Often no height is provided but there is a label in
-                        # format like 1080p.
-                        height = int_or_none(self._search_regex(
-                            r'^(\d{3,})[pP]$', source.get('label') or '',
-                            'height', default=None))
-                    a_format = {
-                        'url': source_url,
-                        'width': int_or_none(source.get('width')),
-                        'height': height,
-                        'ext': ext,
-                    }
-                    if source_url.startswith('rtmp'):
-                        a_format['ext'] = 'flv'
-
-                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
-                        # of jwplayer.flash.swf
-                        rtmp_url_parts = re.split(
-                            r'((?:mp4|mp3|flv):)', source_url, 1)
-                        if len(rtmp_url_parts) == 3:
-                            rtmp_url, prefix, play_path = rtmp_url_parts
-                            a_format.update({
-                                'url': rtmp_url,
-                                'play_path': prefix + play_path,
-                            })
-                        if rtmp_params:
-                            a_format.update(rtmp_params)
-                    formats.append(a_format)
+            formats = self._parse_jwplayer_formats(video_data['sources'], this_video_id)
             self._sort_formats(formats)
 
             subtitles = {}
@@ -2278,6 +2229,61 @@ class InfoExtractor(object):
         else:
             return self.playlist_result(entries)
 
+    def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+                                m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        formats = []
+        for source in jwplayer_sources_data :
+            source_url = self._proto_relative_url(source['file'])
+            if base_url:
+                source_url = compat_urlparse.urljoin(base_url, source_url)
+            source_type = source.get('type') or ''
+            ext = mimetype2ext(source_type) or determine_ext(source_url)
+            if source_type == 'hls' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    source_url, video_id, mpd_id=mpd_id, fatal=False))
+            # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+            elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                formats.append({
+                    'url': source_url,
+                    'vcodec': 'none',
+                    'ext': ext,
+                })
+            else:
+                height = int_or_none(source.get('height'))
+                if height is None:
+                    # Often no height is provided but there is a label in
+                    # format like 1080p.
+                    height = int_or_none(self._search_regex(
+                        r'^(\d{3,})[pP]$', source.get('label') or '',
+                        'height', default=None))
+                a_format = {
+                    'url': source_url,
+                    'width': int_or_none(source.get('width')),
+                    'height': height,
+                    'ext': ext,
+                }
+                if source_url.startswith('rtmp'):
+                    a_format['ext'] = 'flv'
+
+                    # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                    # of jwplayer.flash.swf
+                    rtmp_url_parts = re.split(
+                        r'((?:mp4|mp3|flv):)', source_url, 1)
+                    if len(rtmp_url_parts) == 3:
+                        rtmp_url, prefix, play_path = rtmp_url_parts
+                        a_format.update({
+                            'url': rtmp_url,
+                            'play_path': prefix + play_path,
+                        })
+                    if rtmp_params:
+                        a_format.update(rtmp_params)
+                formats.append(a_format)
+        return formats
+
+
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
-- 
cgit v1.2.3


From 0236cd0dfde1dda540c1067a9c5982d482005c47 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 5 Mar 2017 23:25:03 +0700
Subject: [extractor/common] Improve height extraction and extract bitrate

---
 youtube_dl/extractor/common.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9b73a948c..2887db0c3 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2240,12 +2240,14 @@ class InfoExtractor(object):
             ext = mimetype2ext(source_type) or determine_ext(source_url)
             if source_type == 'hls' or ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
-                    source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                    source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id=m3u8_id, fatal=False))
             elif ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
                     source_url, video_id, mpd_id=mpd_id, fatal=False))
             # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
-            elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+            elif source_type.startswith('audio') or ext in (
+                    'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
                 formats.append({
                     'url': source_url,
                     'vcodec': 'none',
@@ -2255,19 +2257,19 @@ class InfoExtractor(object):
                 height = int_or_none(source.get('height'))
                 if height is None:
                     # Often no height is provided but there is a label in
-                    # format like 1080p.
+                    # format like "1080p", "720p SD", or 1080.
                     height = int_or_none(self._search_regex(
-                        r'^(\d{3,})[pP]$', source.get('label') or '',
+                        r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
                         'height', default=None))
                 a_format = {
                     'url': source_url,
                     'width': int_or_none(source.get('width')),
                     'height': height,
+                    'tbr': int_or_none(source.get('bitrate')),
                     'ext': ext,
                 }
                 if source_url.startswith('rtmp'):
                     a_format['ext'] = 'flv'
-
                     # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
                     # of jwplayer.flash.swf
                     rtmp_url_parts = re.split(
-- 
cgit v1.2.3


From 1a2192cb904ff42a309ab2c2477fc226f8651f33 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 5 Mar 2017 23:28:32 +0700
Subject: [extractor/common] Pass arguments to _parse_jwplayer_formats and PEP8

---
 youtube_dl/extractor/common.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2887db0c3..78dc5be24 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2198,7 +2198,9 @@ class InfoExtractor(object):
 
             this_video_id = video_id or video_data['mediaid']
 
-            formats = self._parse_jwplayer_formats(video_data['sources'], this_video_id)
+            formats = self._parse_jwplayer_formats(
+                video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+                mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
             self._sort_formats(formats)
 
             subtitles = {}
@@ -2232,7 +2234,7 @@ class InfoExtractor(object):
     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
         formats = []
-        for source in jwplayer_sources_data :
+        for source in jwplayer_sources_data:
             source_url = self._proto_relative_url(source['file'])
             if base_url:
                 source_url = compat_urlparse.urljoin(base_url, source_url)
@@ -2285,7 +2287,6 @@ class InfoExtractor(object):
                 formats.append(a_format)
         return formats
 
-
     def _live_title(self, name):
         """ Generate the title for a live video """
         now = datetime.datetime.now()
-- 
cgit v1.2.3


From b51dc9db0e6ffc6a7725d92fa2c5de45a5b1be20 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Thu, 16 Mar 2017 03:30:53 +0700
Subject: [extractor/common] Extract SMIL formats from jwplayer

---
 youtube_dl/extractor/common.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 78dc5be24..b51799bfa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2247,6 +2247,9 @@ class InfoExtractor(object):
             elif ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
                     source_url, video_id, mpd_id=mpd_id, fatal=False))
+            elif ext == 'smil':
+                formats.extend(self._extract_smil_formats(
+                    source_url, video_id, fatal=False))
             # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
             elif source_type.startswith('audio') or ext in (
                     'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
-- 
cgit v1.2.3


From 46b18f2349670d395b9d84a57ee3d9b5d221ff4b Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Wed, 8 Mar 2017 18:13:54 -0500
Subject: [BostonGlobe] New. Nonstandard version of Brightcove.

Has a "data-brightcove-video-id" instead of a "data-video-id," otherwise
pretty much just Brightcove. Except the Globe isn't all Brightcove
videos, so fallback to Generic, too.

Also, abstract playlist_from_matches() from generic.py to common.py, and use
it here.

History of these changes can be found in
51170427d4b1143572a498dedaee61863a5b2c5b.
---
 youtube_dl/extractor/bostonglobe.py | 72 ++++++++++++++++++++++++++++++++++
 youtube_dl/extractor/common.py      | 28 +++++++++-----
 youtube_dl/extractor/extractors.py  |  1 +
 youtube_dl/extractor/generic.py     | 77 +++++++++++++++++--------------------
 4 files changed, 126 insertions(+), 52 deletions(-)
 create mode 100644 youtube_dl/extractor/bostonglobe.py

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dl/extractor/bostonglobe.py
new file mode 100644
index 000000000..57882fbee
--- /dev/null
+++ b/youtube_dl/extractor/bostonglobe.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+    extract_attributes,
+)
+
+
+class BostonGlobeIE(InfoExtractor):
+    _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?'
+    _TESTS = [
+        {
+            'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html',
+            'md5': '0a62181079c85c2d2b618c9a738aedaf',
+            'info_dict': {
+                'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood',
+                'id': '5320421710001',
+                'ext': 'mp4',
+                'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.',
+                'timestamp': 1486877593,
+                'upload_date': '20170212',
+                'uploader_id': '245991542',
+            },
+        },
+        {
+            # Embedded youtube video; we hand it off to the Generic extractor.
+            'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html',
+            'md5': '582b40327089d5c0c949b3c54b13c24b',
+            'info_dict': {
+                'title': "Who Is Matt Damon's Favorite Batman?",
+                'id': 'ZW1QCnlA6Qc',
+                'ext': 'mp4',
+                'upload_date': '20170217',
+                'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb',
+                'uploader': 'The Late Late Show with James Corden',
+                'uploader_id': 'TheLateLateShow',
+            },
+            'expected_warnings': ['404'],
+        },
+    ]
+
+    def _real_extract(self, url):
+        page_id = self._match_id(url)
+        webpage = self._download_webpage(url, page_id)
+
+        page_title = self._og_search_title(webpage, default=None)
+
+        # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject">
+        entries = []
+        for video in re.findall(r'(?i)(<video[^>]+>)', webpage):
+            attrs = extract_attributes(video)
+
+            video_id = attrs.get('data-brightcove-video-id')
+            account_id = attrs.get('data-account')
+            player_id = attrs.get('data-player')
+            embed = attrs.get('data-embed')
+
+            if video_id and account_id and player_id and embed:
+                entries.append(
+                    'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
+                    % (account_id, player_id, embed, video_id))
+
+        if len(entries) == 0:
+            return self.url_result(url, 'Generic')
+        elif len(entries) == 1:
+            return self.url_result(entries[0], 'BrightcoveNew')
+        else:
+            return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew')
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index b51799bfa..0852b8e8c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -36,34 +36,35 @@ from ..utils import (
     clean_html,
     compiled_regex_type,
     determine_ext,
+    determine_protocol,
     error_to_compat_str,
     ExtractorError,
+    extract_attributes,
     fix_xml_ampersands,
     float_or_none,
     GeoRestrictedError,
     GeoUtils,
     int_or_none,
     js_to_json,
+    mimetype2ext,
+    orderedSet,
+    parse_codecs,
+    parse_duration,
     parse_iso8601,
+    parse_m3u8_attributes,
     RegexNotFoundError,
-    sanitize_filename,
     sanitized_Request,
+    sanitize_filename,
     unescapeHTML,
     unified_strdate,
     unified_timestamp,
+    update_Request,
+    update_url_query,
+    urljoin,
     url_basename,
     xpath_element,
     xpath_text,
     xpath_with_ns,
-    determine_protocol,
-    parse_duration,
-    mimetype2ext,
-    update_Request,
-    update_url_query,
-    parse_m3u8_attributes,
-    extract_attributes,
-    parse_codecs,
-    urljoin,
 )
 
 
@@ -714,6 +715,13 @@ class InfoExtractor(object):
             video_info['title'] = video_title
         return video_info
 
+    def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
+        urlrs = orderedSet(
+            self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
+            for m in matches)
+        return self.playlist_result(
+            urlrs, playlist_id=video_id, playlist_title=video_title)
+
     @staticmethod
     def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
         """Returns a playlist"""
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 97d68d9ca..40a5c9842 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -117,6 +117,7 @@ from .bleacherreport import (
 from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
+from .bostonglobe import BostonGlobeIE
 from .bpb import BpbIE
 from .br import BRIE
 from .bravotv import BravoTVIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 0fcb3fdac..a71d6bac0 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1841,14 +1841,6 @@ class GenericIE(InfoExtractor):
         video_description = self._og_search_description(webpage, default=None)
         video_thumbnail = self._og_search_thumbnail(webpage, default=None)
 
-        # Helper method
-        def _playlist_from_matches(matches, getter=None, ie=None):
-            urlrs = orderedSet(
-                self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
-                for m in matches)
-            return self.playlist_result(
-                urlrs, playlist_id=video_id, playlist_title=video_title)
-
         # Look for Brightcove Legacy Studio embeds
         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
         if bc_urls:
@@ -1869,28 +1861,28 @@ class GenericIE(InfoExtractor):
         # Look for Brightcove New Studio embeds
         bc_urls = BrightcoveNewIE._extract_urls(webpage)
         if bc_urls:
-            return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+            return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew')
 
         # Look for ThePlatform embeds
         tp_urls = ThePlatformIE._extract_urls(webpage)
         if tp_urls:
-            return _playlist_from_matches(tp_urls, ie='ThePlatform')
+            return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
 
         # Look for Vessel embeds
         vessel_urls = VesselIE._extract_urls(webpage)
         if vessel_urls:
-            return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
+            return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
 
         # Look for embedded rtl.nl player
         matches = re.findall(
             r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
             webpage)
         if matches:
-            return _playlist_from_matches(matches, ie='RtlNl')
+            return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
 
         vimeo_urls = VimeoIE._extract_urls(url, webpage)
         if vimeo_urls:
-            return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
+            return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
 
         vid_me_embed_url = self._search_regex(
             r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -1912,25 +1904,25 @@ class GenericIE(InfoExtractor):
                 (?:embed|v|p)/.+?)
             \1''', webpage)
         if matches:
-            return _playlist_from_matches(
-                matches, lambda m: unescapeHTML(m[1]))
+            return self.playlist_from_matches(
+                matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
 
         # Look for lazyYT YouTube embed
         matches = re.findall(
             r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
         if matches:
-            return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
+            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
 
         # Look for Wordpress "YouTube Video Importer" plugin
         matches = re.findall(r'''(?x)<div[^>]+
             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
         if matches:
-            return _playlist_from_matches(matches, lambda m: m[-1])
+            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
 
         matches = DailymotionIE._extract_urls(webpage)
         if matches:
-            return _playlist_from_matches(matches)
+            return self.playlist_from_matches(matches, video_id, video_title)
 
         # Look for embedded Dailymotion playlist player (#3822)
         m = re.search(
@@ -1939,8 +1931,8 @@ class GenericIE(InfoExtractor):
             playlists = re.findall(
                 r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
             if playlists:
-                return _playlist_from_matches(
-                    playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
+                return self.playlist_from_matches(
+                    playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
 
         # Look for embedded Wistia player
         match = re.search(
@@ -2047,8 +2039,9 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
             if embeds:
-                return _playlist_from_matches(
-                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+                return self.playlist_from_matches(
+                    embeds, video_id, video_title,
+                    getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
 
         # Look for Aparat videos
         mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -2110,13 +2103,13 @@ class GenericIE(InfoExtractor):
         # Look for funnyordie embed
         matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
         if matches:
-            return _playlist_from_matches(
-                matches, getter=unescapeHTML, ie='FunnyOrDie')
+            return self.playlist_from_matches(
+                matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
 
         # Look for BBC iPlayer embed
         matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
         if matches:
-            return _playlist_from_matches(matches, ie='BBCCoUk')
+            return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
 
         # Look for embedded RUTV player
         rutv_url = RUTVIE._extract_url(webpage)
@@ -2131,32 +2124,32 @@ class GenericIE(InfoExtractor):
         # Look for embedded SportBox player
         sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
         if sportbox_urls:
-            return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
+            return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
 
         # Look for embedded XHamster player
         xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
         if xhamster_urls:
-            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+            return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
 
         # Look for embedded TNAFlixNetwork player
         tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
         if tnaflix_urls:
-            return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+            return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
 
         # Look for embedded PornHub player
         pornhub_urls = PornHubIE._extract_urls(webpage)
         if pornhub_urls:
-            return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+            return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
 
         # Look for embedded DrTuber player
         drtuber_urls = DrTuberIE._extract_urls(webpage)
         if drtuber_urls:
-            return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+            return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
 
         # Look for embedded RedTube player
         redtube_urls = RedTubeIE._extract_urls(webpage)
         if redtube_urls:
-            return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+            return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
 
         # Look for embedded Tvigle player
         mobj = re.search(
@@ -2202,12 +2195,12 @@ class GenericIE(InfoExtractor):
         # Look for embedded soundcloud player
         soundcloud_urls = SoundcloudIE._extract_urls(webpage)
         if soundcloud_urls:
-            return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+            return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
 
         # Look for tunein player
         tunein_urls = TuneInBaseIE._extract_urls(webpage)
         if tunein_urls:
-            return _playlist_from_matches(tunein_urls)
+            return self.playlist_from_matches(tunein_urls, video_id, video_title)
 
         # Look for embedded mtvservices player
         mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@@ -2490,35 +2483,35 @@ class GenericIE(InfoExtractor):
         # Look for DBTV embeds
         dbtv_urls = DBTVIE._extract_urls(webpage)
         if dbtv_urls:
-            return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+            return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
 
         # Look for Videa embeds
         videa_urls = VideaIE._extract_urls(webpage)
         if videa_urls:
-            return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+            return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
 
         # Look for 20 minuten embeds
         twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
         if twentymin_urls:
-            return _playlist_from_matches(
-                twentymin_urls, ie=TwentyMinutenIE.ie_key())
+            return self.playlist_from_matches(
+                twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
 
         # Look for Openload embeds
         openload_urls = OpenloadIE._extract_urls(webpage)
         if openload_urls:
-            return _playlist_from_matches(
-                openload_urls, ie=OpenloadIE.ie_key())
+            return self.playlist_from_matches(
+                openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
 
         # Look for VideoPress embeds
         videopress_urls = VideoPressIE._extract_urls(webpage)
         if videopress_urls:
-            return _playlist_from_matches(
-                videopress_urls, ie=VideoPressIE.ie_key())
+            return self.playlist_from_matches(
+                videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
 
         # Look for Rutube embeds
         rutube_urls = RutubeIE._extract_urls(webpage)
         if rutube_urls:
-            return _playlist_from_matches(
+            return self.playlist_from_matches(
                 rutube_urls, ie=RutubeIE.ie_key())
 
         # Looking for http://schema.org/VideoObject
-- 
cgit v1.2.3


From c73e330e7adc9c0c15ac51aeea8fbb7dad95351a Mon Sep 17 00:00:00 2001
From: Random User <rndusr@posteo.de>
Date: Sat, 25 Mar 2017 19:38:30 +0100
Subject: _find_jwplayer_data() returns dict or None

This simplifies code for callers of `_find_jwplayer_data()` which no longer have
to run `_parse_json()` on the return value.

It also makes sure that `_find_jwplayer_data()` returns either a `dict` or
`None` and nothing else.
---
 youtube_dl/extractor/common.py  | 18 ++++++++++++------
 youtube_dl/extractor/generic.py | 12 ++++--------
 youtube_dl/extractor/tvnoe.py   |  5 ++---
 3 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index eb3c091aa..c2ca73ee1 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2161,18 +2161,24 @@ class InfoExtractor(object):
                     })
         return formats
 
-    @staticmethod
-    def _find_jwplayer_data(webpage):
+    def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
         mobj = re.search(
             r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
-            return mobj.group('options')
+            try:
+                jwplayer_data = self._parse_json(mobj.group('options'),
+                                                 video_id=video_id,
+                                                 transform_source=transform_source)
+            except ExtractorError:
+                pass
+            else:
+                if isinstance(jwplayer_data, dict):
+                    return jwplayer_data
 
     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
-        jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id,
-            transform_source=js_to_json)
+        jwplayer_data = self._find_jwplayer_data(
+            webpage, video_id, transform_source=js_to_json)
         return self._parse_jwplayer_data(
             jwplayer_data, video_id, *args, **kwargs)
 
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c8c103ae3..3fe0237b6 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2518,14 +2518,10 @@ class GenericIE(InfoExtractor):
                 self._sort_formats(entry['formats'])
             return self.playlist_result(entries)
 
-        jwplayer_data_str = self._find_jwplayer_data(webpage)
-        if jwplayer_data_str:
-            try:
-                jwplayer_data = self._parse_json(
-                    jwplayer_data_str, video_id, transform_source=js_to_json)
-                return self._parse_jwplayer_data(jwplayer_data, video_id)
-            except ExtractorError:
-                pass
+        jwplayer_data = self._find_jwplayer_data(
+            webpage, video_id, transform_source=js_to_json)
+        if jwplayer_data:
+            return self._parse_jwplayer_data(jwplayer_data, video_id)
 
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py
index 1a5b76bf2..26a5aeae4 100644
--- a/youtube_dl/extractor/tvnoe.py
+++ b/youtube_dl/extractor/tvnoe.py
@@ -31,9 +31,8 @@ class TVNoeIE(InfoExtractor):
             r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
 
         ifs_page = self._download_webpage(iframe_url, video_id)
-        jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(ifs_page),
-            video_id, transform_source=js_to_json)
+        jwplayer_data = self._find_jwplayer_data(
+            ifs_page, video_id, transform_source=js_to_json)
         info_dict = self._parse_jwplayer_data(
             jwplayer_data, video_id, require_title=False, base_url=iframe_url)
 
-- 
cgit v1.2.3


From 4457823dda410c5406f5ab5474b9b1f9325fa7ed Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 2 Apr 2017 03:56:49 +0700
Subject: [extractor/common] Move censorship checks to a separate method and
 add check for just another ISP

---
 youtube_dl/extractor/common.py | 48 +++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 19 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6c3c095f7..cdfa7000b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -547,6 +547,34 @@ class InfoExtractor(object):
 
         return encoding
 
+    def __check_blocked(self, content):
+        first_block = content[:512]
+        if ('<title>Access to this site is blocked</title>' in content and
+                'Websense' in first_block):
+            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
+            blocked_iframe = self._html_search_regex(
+                r'<iframe src="([^"]+)"', content,
+                'Websense information URL', default=None)
+            if blocked_iframe:
+                msg += ' Visit %s for more details' % blocked_iframe
+            raise ExtractorError(msg, expected=True)
+        if '<title>The URL you requested has been blocked</title>' in first_block:
+            msg = (
+                'Access to this webpage has been blocked by Indian censorship. '
+                'Use a VPN or proxy server (with --proxy) to route around it.')
+            block_msg = self._html_search_regex(
+                r'</h1><p>(.*?)</p>',
+                content, 'block message', default=None)
+            if block_msg:
+                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+            raise ExtractorError(msg, expected=True)
+        if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
+                'blocklist.rkn.gov.ru' in content):
+            raise ExtractorError(
+                'Access to this webpage has been blocked by decision of the Russian government. '
+                'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
+                expected=True)
+
     def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
         content_type = urlh.headers.get('Content-Type', '')
         webpage_bytes = urlh.read()
@@ -588,25 +616,7 @@ class InfoExtractor(object):
         except LookupError:
             content = webpage_bytes.decode('utf-8', 'replace')
 
-        if ('<title>Access to this site is blocked</title>' in content and
-                'Websense' in content[:512]):
-            msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
-            blocked_iframe = self._html_search_regex(
-                r'<iframe src="([^"]+)"', content,
-                'Websense information URL', default=None)
-            if blocked_iframe:
-                msg += ' Visit %s for more details' % blocked_iframe
-            raise ExtractorError(msg, expected=True)
-        if '<title>The URL you requested has been blocked</title>' in content[:512]:
-            msg = (
-                'Access to this webpage has been blocked by Indian censorship. '
-                'Use a VPN or proxy server (with --proxy) to route around it.')
-            block_msg = self._html_search_regex(
-                r'</h1><p>(.*?)</p>',
-                content, 'block message', default=None)
-            if block_msg:
-                msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
-            raise ExtractorError(msg, expected=True)
+        self.__check_blocked(content)
 
         return content
 
-- 
cgit v1.2.3


From fd47550885fc0abcdddbfdec70e00bc0d6a9865e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 2 Apr 2017 04:42:10 +0700
Subject: [extractor/common] Add coding cookie

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index cdfa7000b..ae8af61de 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import base64
-- 
cgit v1.2.3


From 40fcba5edb0f54f09e33a193a0ffefb5668ca694 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Wed, 12 Apr 2017 20:38:43 +0100
Subject: improve coding style

---
 youtube_dl/YoutubeDL.py                       | 2 +-
 youtube_dl/compat.py                          | 2 +-
 youtube_dl/downloader/rtmp.py                 | 2 +-
 youtube_dl/extractor/bbc.py                   | 4 ++--
 youtube_dl/extractor/common.py                | 2 +-
 youtube_dl/extractor/rudo.py                  | 2 +-
 youtube_dl/extractor/viewlift.py              | 2 +-
 youtube_dl/extractor/vlive.py                 | 4 ++--
 youtube_dl/postprocessor/ffmpeg.py            | 2 +-
 youtube_dl/postprocessor/metadatafromtitle.py | 2 +-
 10 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 54bc8b06d..7953670a7 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -855,7 +855,7 @@ class YoutubeDL(object):
 
             return self.process_ie_result(
                 new_result, download=download, extra_info=extra_info)
-        elif result_type == 'playlist' or result_type == 'multi_video':
+        elif result_type in ('playlist', 'multi_video'):
             # We process each entry in the playlist
             playlist = ie_result.get('title') or ie_result.get('id')
             self.to_screen('[download] Downloading playlist: %s' % playlist)
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 0c119e417..39527117f 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -2692,7 +2692,7 @@ else:
                 userhome = pwent.pw_dir
             userhome = userhome.rstrip('/')
             return (userhome + path[i:]) or '/'
-    elif compat_os_name == 'nt' or compat_os_name == 'ce':
+    elif compat_os_name in ('nt', 'ce'):
         def compat_expanduser(path):
             """Expand ~ and ~user constructs.
 
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index 9de6e70bb..b823b5171 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -169,7 +169,7 @@ class RtmpFD(FileDownloader):
             self.report_error('[rtmpdump] Could not connect to RTMP server.')
             return False
 
-        while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
+        while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
             self.to_screen('[rtmpdump] %s bytes' % prevsize)
             time.sleep(5.0)  # This seems to be needed
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 0e05b782b..dd65b8d86 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -409,7 +409,7 @@ class BBCCoUkIE(InfoExtractor):
                 description = smp_config['summary']
                 for item in smp_config['items']:
                     kind = item['kind']
-                    if kind != 'programme' and kind != 'radioProgramme':
+                    if kind not in ('programme', 'radioProgramme'):
                         continue
                     programme_id = item.get('vpid')
                     duration = int_or_none(item.get('duration'))
@@ -450,7 +450,7 @@ class BBCCoUkIE(InfoExtractor):
 
         for item in self._extract_items(playlist):
             kind = item.get('kind')
-            if kind != 'programme' and kind != 'radioProgramme':
+            if kind not in ('programme', 'radioProgramme'):
                 continue
             title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
             description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ae8af61de..dcc9d628a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1779,7 +1779,7 @@ class InfoExtractor(object):
                     if content_type == 'text':
                         # TODO implement WebVTT downloading
                         pass
-                    elif content_type == 'video' or content_type == 'audio':
+                    elif content_type in ('video', 'audio'):
                         base_url = ''
                         for element in (representation, adaptation_set, period, mpd_doc):
                             base_url_e = element.find(_add_ns('BaseURL'))
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
index 51644011e..f036f6757 100644
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@@ -26,7 +26,7 @@ class RudoIE(InfoExtractor):
     }
 
     @classmethod
-    def _extract_url(self, webpage):
+    def _extract_url(cls, webpage):
         mobj = re.search(
             r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
             webpage)
diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py
index 18735cfb2..1f29c273f 100644
--- a/youtube_dl/extractor/viewlift.py
+++ b/youtube_dl/extractor/viewlift.py
@@ -68,7 +68,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
             type_ = source.get('type')
             ext = determine_ext(file_)
             format_id = source.get('label') or ext
-            if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
+            if all(v in ('m3u8', 'hls') for v in (type_, ext)):
                 formats.extend(self._extract_m3u8_formats(
                     file_, video_id, 'mp4', m3u8_id='hls'))
             else:
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index b9718901b..e58940607 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -70,9 +70,9 @@ class VLiveIE(InfoExtractor):
         status, long_video_id, key = params[2], params[5], params[6]
         status = remove_start(status, 'PRODUCT_')
 
-        if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
+        if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
             return self._live(video_id, webpage)
-        elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
+        elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
             if long_video_id and key:
                 return self._replay(video_id, webpage, long_video_id, key)
             else:
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 7c162d92a..665109558 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -552,7 +552,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
             sub_filenames.append(old_file)
             new_file = subtitles_filename(filename, lang, new_ext)
 
-            if ext == 'dfxp' or ext == 'ttml' or ext == 'tt':
+            if ext in ('dfxp', 'ttml', 'tt'):
                 self._downloader.report_warning(
                     'You have requested to convert dfxp (TTML) subtitles into another format, '
                     'which results in style information loss')
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
index 164edd3a8..a7d637a3c 100644
--- a/youtube_dl/postprocessor/metadatafromtitle.py
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -26,7 +26,7 @@ class MetadataFromTitlePP(PostProcessor):
             regex += r'(?P<' + match.group(1) + '>.+)'
             lastpos = match.end()
         if lastpos < len(fmt):
-            regex += re.escape(fmt[lastpos:len(fmt)])
+            regex += re.escape(fmt[lastpos:])
         return regex
 
     def run(self, info):
-- 
cgit v1.2.3


From bf1b87cd919f07b7fef204838be73981e122ee11 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Mon, 17 Apr 2017 08:48:24 +0100
Subject: [common] Relax JWPlayer regex and remove duplicate urls(#12768)

---
 youtube_dl/extractor/common.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index dcc9d628a..12e010a0d 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2182,7 +2182,7 @@ class InfoExtractor(object):
 
     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
         mobj = re.search(
-            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+            r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\).*?\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
             try:
@@ -2258,11 +2258,17 @@ class InfoExtractor(object):
 
     def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                                 m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        urls = []
         formats = []
         for source in jwplayer_sources_data:
-            source_url = self._proto_relative_url(source['file'])
+            source_url = self._proto_relative_url(source.get('file'))
+            if not source_url:
+                continue
             if base_url:
                 source_url = compat_urlparse.urljoin(base_url, source_url)
+            if source_url in urls:
+                continue
+            urls.append(source_url)
             source_type = source.get('type') or ''
             ext = mimetype2ext(source_type) or determine_ext(source_url)
             if source_type == 'hls' or ext == 'm3u8':
-- 
cgit v1.2.3


From bae1404893341ed89f4c9b556aa4068c13ed9f7a Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 18 Apr 2017 22:21:38 +0700
Subject: [extractor/common] Add support for video of WebPage context in
 _json_ld (closes #12778)

---
 youtube_dl/extractor/common.py | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 12e010a0d..61d97ab72 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -976,6 +976,22 @@ class InfoExtractor(object):
             return info
         if isinstance(json_ld, dict):
             json_ld = [json_ld]
+
+        def extract_video_object(e):
+            assert e['@type'] == 'VideoObject'
+            info.update({
+                'url': e.get('contentUrl'),
+                'title': unescapeHTML(e.get('name')),
+                'description': unescapeHTML(e.get('description')),
+                'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+                'duration': parse_duration(e.get('duration')),
+                'timestamp': unified_timestamp(e.get('uploadDate')),
+                'filesize': float_or_none(e.get('contentSize')),
+                'tbr': int_or_none(e.get('bitrate')),
+                'width': int_or_none(e.get('width')),
+                'height': int_or_none(e.get('height')),
+            })
+
         for e in json_ld:
             if e.get('@context') == 'http://schema.org':
                 item_type = e.get('@type')
@@ -1000,18 +1016,11 @@ class InfoExtractor(object):
                         'description': unescapeHTML(e.get('articleBody')),
                     })
                 elif item_type == 'VideoObject':
-                    info.update({
-                        'url': e.get('contentUrl'),
-                        'title': unescapeHTML(e.get('name')),
-                        'description': unescapeHTML(e.get('description')),
-                        'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
-                        'duration': parse_duration(e.get('duration')),
-                        'timestamp': unified_timestamp(e.get('uploadDate')),
-                        'filesize': float_or_none(e.get('contentSize')),
-                        'tbr': int_or_none(e.get('bitrate')),
-                        'width': int_or_none(e.get('width')),
-                        'height': int_or_none(e.get('height')),
-                    })
+                    extract_video_object(e)
+                elif item_type == 'WebPage':
+                    video = e.get('video')
+                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+                        extract_video_object(video)
                 break
         return dict((k, v) for k, v in info.items() if v is not None)
 
-- 
cgit v1.2.3


From cb2520802d7b8efdc71d6d97aeca984b5f878716 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 22 Apr 2017 07:01:00 +0700
Subject: [extractor/common] Improve m3u8 extraction (closes #12211) * Extract
 m3u8 parsing to separate method * Improve rendition groups extraction * Build
 stream name according stream GROUP-ID * Ignore reference to AUDIO group
 without URI when stream has no CODECS + Add test coverage for parsing m3u8
 from #11507, #11995, #12211 and twitch vod

---
 .gitignore                                   |   1 -
 test/test_InfoExtractor.py                   | 308 ++++++++++++++++++++++++++-
 test/testdata/m3u8/pluzz_francetv_11507.m3u8 |  14 ++
 test/testdata/m3u8/teamcoco_11995.m3u8       |  16 ++
 test/testdata/m3u8/toggle_mobile_12211.m3u8  |  13 ++
 test/testdata/m3u8/twitch_vod.m3u8           |  20 ++
 youtube_dl/extractor/common.py               | 154 +++++++++-----
 7 files changed, 466 insertions(+), 60 deletions(-)
 create mode 100644 test/testdata/m3u8/pluzz_francetv_11507.m3u8
 create mode 100644 test/testdata/m3u8/teamcoco_11995.m3u8
 create mode 100644 test/testdata/m3u8/toggle_mobile_12211.m3u8
 create mode 100644 test/testdata/m3u8/twitch_vod.m3u8

(limited to 'youtube_dl/extractor/common.py')

diff --git a/.gitignore b/.gitignore
index 9ce4b5e2d..bef084baa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,7 +36,6 @@ updates_key.pem
 *.swf
 *.part
 *.swp
-test/testdata
 test/local_parameters.json
 .tox
 youtube-dl.zsh
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 881197afb..57205f6ae 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -3,12 +3,13 @@
 from __future__ import unicode_literals
 
 # Allow direct execution
+import io
 import os
 import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import FakeYDL, expect_dict
+from test.helper import FakeYDL, expect_dict, expect_value
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -175,6 +176,311 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 }]
             })
 
+    def test_parse_m3u8_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/11507
+                # http://pluzz.francetv.fr/videos/le_ministere.html
+                'pluzz_francetv_11507',
+                'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                [{
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
+                    'ext': 'mp4',
+                    'format_id': 'meta',
+                    'format_note': 'Quality selection URL',
+                    'protocol': 'm3u8',
+                    'preference': -100,
+                    'resolution': 'multiple'
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
+                    'ext': 'mp4',
+                    'format_id': '180',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 180,
+                    'width': 256,
+                    'height': 144,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
+                    'ext': 'mp4',
+                    'format_id': '303',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 303,
+                    'width': 320,
+                    'height': 180,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
+                    'ext': 'mp4',
+                    'format_id': '575',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.66.30',
+                    'tbr': 575,
+                    'width': 512,
+                    'height': 288,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
+                    'ext': 'mp4',
+                    'format_id': '831',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.77.30',
+                    'tbr': 831,
+                    'width': 704,
+                    'height': 396,
+                }, {
+                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
+                    'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
+                    'ext': 'mp4',
+                    'protocol': 'm3u8',
+                    'format_id': '1467',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.77.30',
+                    'tbr': 1467,
+                    'width': 1024,
+                    'height': 576,
+                }]
+            ),
+            (
+                # https://github.com/rg3/youtube-dl/issues/11995
+                # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
+                'teamcoco_11995',
+                'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                [{
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'meta',
+                    'format_note': 'Quality selection URL',
+                    'protocol': 'm3u8',
+                    'preference': -100,
+                    'resolution': 'multiple',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-0-Default',
+                    'protocol': 'm3u8',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-1-Default',
+                    'protocol': 'm3u8',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '71',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.5',
+                    'vcodec': 'none',
+                    'tbr': 71,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '413',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001e',
+                    'tbr': 413,
+                    'width': 400,
+                    'height': 224,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '522',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001e',
+                    'tbr': 522,
+                    'width': 400,
+                    'height': 224,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '1205',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001e',
+                    'tbr': 1205,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
+                    'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '2374',
+                    'protocol': 'm3u8',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 2374,
+                    'width': 1024,
+                    'height': 576,
+                }]
+            ),
+            (
+                # https://github.com/rg3/youtube-dl/issues/12211
+                # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
+                'toggle_mobile_12211',
+                'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                [{
+                    'url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'meta',
+                    'format_note': 'Quality selection URL',
+                    'protocol': 'm3u8',
+                    'preference': -100,
+                    'resolution': 'multiple'
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-English',
+                    'protocol': 'm3u8',
+                    'language': 'eng',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'audio-Undefined',
+                    'protocol': 'm3u8',
+                    'language': 'und',
+                    'vcodec': 'none',
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '155',
+                    'protocol': 'm3u8',
+                    'tbr': 155,
+                    'width': 320,
+                    'height': 180,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '502',
+                    'protocol': 'm3u8',
+                    'tbr': 502,
+                    'width': 480,
+                    'height': 270,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '827',
+                    'protocol': 'm3u8',
+                    'tbr': 827,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
+                    'manifest_url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '1396',
+                    'protocol': 'm3u8',
+                    'tbr': 1396,
+                    'width': 854,
+                    'height': 480,
+                }]
+            ),
+            (
+                # http://www.twitch.tv/riotgames/v/6528877
+                'twitch_vod',
+                'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                [{
+                    'url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
+                    'ext': 'mp4',
+                    'format_id': 'meta',
+                    'format_note': 'Quality selection URL',
+                    'protocol': 'm3u8',
+                    'preference': -100,
+                    'resolution': 'multiple'
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'Audio Only',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 182,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'Mobile',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C00D',
+                    'tbr': 280,
+                    'width': 400,
+                    'height': 226,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'Low',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01E',
+                    'tbr': 628,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'Medium',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01E',
+                    'tbr': 893,
+                    'width': 852,
+                    'height': 480,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'High',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.42C01F',
+                    'tbr': 1603,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
+                    'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'Source',
+                    'protocol': 'm3u8',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc1.100.31',
+                    'tbr': 3214,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            )
+        ]
+
+        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_m3u8_formats(
+                    f.read(), m3u8_url, ext='mp4')
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/testdata/m3u8/pluzz_francetv_11507.m3u8 b/test/testdata/m3u8/pluzz_francetv_11507.m3u8
new file mode 100644
index 000000000..0809f5aa0
--- /dev/null
+++ b/test/testdata/m3u8/pluzz_francetv_11507.m3u8
@@ -0,0 +1,14 @@
+#EXTM3U
+    #EXT-X-VERSION:5
+    #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
+    #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
+#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
+http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0
diff --git a/test/testdata/m3u8/teamcoco_11995.m3u8 b/test/testdata/m3u8/teamcoco_11995.m3u8
new file mode 100644
index 000000000..a6e421697
--- /dev/null
+++ b/test/testdata/m3u8/teamcoco_11995.m3u8
@@ -0,0 +1,16 @@
+#EXTM3U
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
+#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-2m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-1m_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-400k_v4.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
+hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8
diff --git a/test/testdata/m3u8/toggle_mobile_12211.m3u8 b/test/testdata/m3u8/toggle_mobile_12211.m3u8
new file mode 100644
index 000000000..69604e683
--- /dev/null
+++ b/test/testdata/m3u8/toggle_mobile_12211.m3u8
@@ -0,0 +1,13 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
+
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
+http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8
diff --git a/test/testdata/m3u8/twitch_vod.m3u8 b/test/testdata/m3u8/twitch_vod.m3u8
new file mode 100644
index 000000000..7617277ca
--- /dev/null
+++ b/test/testdata/m3u8/twitch_vod.m3u8
@@ -0,0 +1,20 @@
+#EXTM3U
+#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
+#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
+#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
+https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 61d97ab72..359c549c5 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1312,17 +1312,25 @@ class InfoExtractor(object):
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False):
-
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
             errnote=errnote or 'Failed to download m3u8 information',
             fatal=fatal)
+
         if res is False:
             return []
+
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
+        return self._parse_m3u8_formats(
+            m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+            preference=preference, m3u8_id=m3u8_id, live=live)
+
+    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+                            entry_protocol='m3u8', preference=None,
+                            m3u8_id=None, live=False):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
 
@@ -1333,19 +1341,21 @@ class InfoExtractor(object):
             if re.match(r'^https?://', u)
             else compat_urlparse.urljoin(m3u8_url, u))
 
-        # We should try extracting formats only from master playlists [1], i.e.
-        # playlists that describe available qualities. On the other hand media
-        # playlists [2] should be returned as is since they contain just the media
-        # without qualities renditions.
+        # References:
+        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
+        # 2. https://github.com/rg3/youtube-dl/issues/12211
+
+        # We should try extracting formats only from master playlists [1, 4.3.4],
+        # i.e. playlists that describe available qualities. On the other hand
+        # media playlists [1, 4.3.3] should be returned as is since they contain
+        # just the media without qualities renditions.
         # Fortunately, master playlist can be easily distinguished from media
-        # playlist based on particular tags availability. As of [1, 2] master
-        # playlist tags MUST NOT appear in a media playist and vice versa.
-        # As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
-        # and MUST NOT appear in master playlist thus we can clearly detect media
-        # playlist with this criterion.
-        # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
-        # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
-        # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
+        # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
+        # master playlist tags MUST NOT appear in a media playist and vice versa.
+        # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
+        # media playlist and MUST NOT appear in master playlist thus we can
+        # clearly detect media playlist with this criterion.
+
         if '#EXT-X-TARGETDURATION' in m3u8_doc:  # media playlist, return as is
             return [{
                 'url': m3u8_url,
@@ -1354,52 +1364,67 @@ class InfoExtractor(object):
                 'protocol': entry_protocol,
                 'preference': preference,
             }]
-        audio_in_video_stream = {}
-        last_info = {}
-        last_media = {}
+
+        groups = {}
+        last_stream_inf = {}
+
+        def extract_media(x_media_line):
+            media = parse_m3u8_attributes(x_media_line)
+            # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
+            media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
+            if not (media_type and group_id and name):
+                return
+            groups.setdefault(group_id, []).append(media)
+            if media_type not in ('VIDEO', 'AUDIO'):
+                return
+            media_url = media.get('URI')
+            if media_url:
+                format_id = []
+                for v in (group_id, name):
+                    if v:
+                        format_id.append(v)
+                f = {
+                    'format_id': '-'.join(format_id),
+                    'url': format_url(media_url),
+                    'language': media.get('LANGUAGE'),
+                    'ext': ext,
+                    'protocol': entry_protocol,
+                    'preference': preference,
+                }
+                if media_type == 'AUDIO':
+                    f['vcodec'] = 'none'
+                formats.append(f)
+
+        def build_stream_name():
+            # Despite specification does not mention NAME attribute for
+            # EXT-X-STREAM-INF it still sometimes may be present
+            stream_name = last_stream_inf.get('NAME')
+            if stream_name:
+                return stream_name
+            # If there is no NAME in EXT-X-STREAM-INF it will be obtained
+            # from corresponding rendition group
+            stream_group_id = last_stream_inf.get('VIDEO')
+            if not stream_group_id:
+                return
+            stream_group = groups.get(stream_group_id)
+            if not stream_group:
+                return stream_group_id
+            rendition = stream_group[0]
+            return rendition.get('NAME') or stream_group_id
+
         for line in m3u8_doc.splitlines():
             if line.startswith('#EXT-X-STREAM-INF:'):
-                last_info = parse_m3u8_attributes(line)
+                last_stream_inf = parse_m3u8_attributes(line)
             elif line.startswith('#EXT-X-MEDIA:'):
-                media = parse_m3u8_attributes(line)
-                media_type = media.get('TYPE')
-                if media_type in ('VIDEO', 'AUDIO'):
-                    group_id = media.get('GROUP-ID')
-                    media_url = media.get('URI')
-                    if media_url:
-                        format_id = []
-                        for v in (group_id, media.get('NAME')):
-                            if v:
-                                format_id.append(v)
-                        f = {
-                            'format_id': '-'.join(format_id),
-                            'url': format_url(media_url),
-                            'language': media.get('LANGUAGE'),
-                            'ext': ext,
-                            'protocol': entry_protocol,
-                            'preference': preference,
-                        }
-                        if media_type == 'AUDIO':
-                            f['vcodec'] = 'none'
-                            if group_id and not audio_in_video_stream.get(group_id):
-                                audio_in_video_stream[group_id] = False
-                        formats.append(f)
-                    else:
-                        # When there is no URI in EXT-X-MEDIA let this tag's
-                        # data be used by regular URI lines below
-                        last_media = media
-                        if media_type == 'AUDIO' and group_id:
-                            audio_in_video_stream[group_id] = True
+                extract_media(line)
             elif line.startswith('#') or not line.strip():
                 continue
             else:
-                tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
+                tbr = int_or_none(last_stream_inf.get('AVERAGE-BANDWIDTH') or last_stream_inf.get('BANDWIDTH'), scale=1000)
                 format_id = []
                 if m3u8_id:
                     format_id.append(m3u8_id)
-                # Despite specification does not mention NAME attribute for
-                # EXT-X-STREAM-INF it still sometimes may be present
-                stream_name = last_info.get('NAME') or last_media.get('NAME')
+                stream_name = build_stream_name()
                 # Bandwidth of live streams may differ over time thus making
                 # format_id unpredictable. So it's better to keep provided
                 # format_id intact.
@@ -1412,11 +1437,11 @@ class InfoExtractor(object):
                     'manifest_url': manifest_url,
                     'tbr': tbr,
                     'ext': ext,
-                    'fps': float_or_none(last_info.get('FRAME-RATE')),
+                    'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
                     'protocol': entry_protocol,
                     'preference': preference,
                 }
-                resolution = last_info.get('RESOLUTION')
+                resolution = last_stream_inf.get('RESOLUTION')
                 if resolution:
                     mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
                     if mobj:
@@ -1432,13 +1457,26 @@ class InfoExtractor(object):
                         'vbr': vbr,
                         'abr': abr,
                     })
-                f.update(parse_codecs(last_info.get('CODECS')))
-                if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
-                    # TODO: update acodec for audio only formats with the same GROUP-ID
-                    f['acodec'] = 'none'
+                codecs = parse_codecs(last_stream_inf.get('CODECS'))
+                f.update(codecs)
+                audio_group_id = last_stream_inf.get('AUDIO')
+                # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+                # references a rendition group MUST have a CODECS attribute.
+                # However, this is not always respected, for example, [2]
+                # contains EXT-X-STREAM-INF tag which references AUDIO
+                # rendition group but does not have CODECS and despite
+                # referencing audio group an audio group, it represents
+                # a complete (with audio and video) format. So, for such cases
+                # we will ignore references to rendition groups and treat them
+                # as complete formats.
+                if audio_group_id and codecs and f.get('vcodec') != 'none':
+                    audio_group = groups.get(audio_group_id)
+                    if audio_group and audio_group[0].get('URI'):
+                        # TODO: update acodec for audio only formats with
+                        # the same GROUP-ID
+                        f['acodec'] = 'none'
                 formats.append(f)
-                last_info = {}
-                last_media = {}
+                last_stream_inf = {}
         return formats
 
     @staticmethod
-- 
cgit v1.2.3


From 9c99bef704d185783b61a20ea1f5b58c4c55aba6 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 23 Apr 2017 11:33:19 +0700
Subject: [extractor/common] Use float for scaled tbr

---
 youtube_dl/extractor/common.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 359c549c5..2a099480f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1420,7 +1420,9 @@ class InfoExtractor(object):
             elif line.startswith('#') or not line.strip():
                 continue
             else:
-                tbr = int_or_none(last_stream_inf.get('AVERAGE-BANDWIDTH') or last_stream_inf.get('BANDWIDTH'), scale=1000)
+                tbr = float_or_none(
+                    last_stream_inf.get('AVERAGE-BANDWIDTH') or
+                    last_stream_inf.get('BANDWIDTH'), scale=1000)
                 format_id = []
                 if m3u8_id:
                     format_id.append(m3u8_id)
@@ -1850,7 +1852,7 @@ class InfoExtractor(object):
                             'ext': mimetype2ext(mime_type),
                             'width': int_or_none(representation_attrib.get('width')),
                             'height': int_or_none(representation_attrib.get('height')),
-                            'tbr': int_or_none(bandwidth, 1000),
+                            'tbr': float_or_none(bandwidth, 1000),
                             'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                             'fps': int_or_none(representation_attrib.get('frameRate')),
                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
-- 
cgit v1.2.3


From ddd258f92270f48649b57ba4288027a2433af079 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 23 Apr 2017 11:49:57 +0700
Subject: [test_InfoExtractor] Add m3u8 parsing test for NAME attribute in
 EXT-X-STREAM-INF tag

---
 test/test_InfoExtractor.py     | 63 +++++++++++++++++++++++++++++++++++-------
 youtube_dl/extractor/common.py |  4 ++-
 2 files changed, 56 insertions(+), 11 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 57205f6ae..2a3c3f7cb 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -361,7 +361,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'ext': 'mp4',
                     'format_id': '155',
                     'protocol': 'm3u8',
-                    'tbr': 155,
+                    'tbr': 155.648,
                     'width': 320,
                     'height': 180,
                 }, {
@@ -370,7 +370,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'ext': 'mp4',
                     'format_id': '502',
                     'protocol': 'm3u8',
-                    'tbr': 502,
+                    'tbr': 502.784,
                     'width': 480,
                     'height': 270,
                 }, {
@@ -379,7 +379,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'ext': 'mp4',
                     'format_id': '827',
                     'protocol': 'm3u8',
-                    'tbr': 827,
+                    'tbr': 827.392,
                     'width': 640,
                     'height': 360,
                 }, {
@@ -388,7 +388,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'ext': 'mp4',
                     'format_id': '1396',
                     'protocol': 'm3u8',
-                    'tbr': 1396,
+                    'tbr': 1396.736,
                     'width': 854,
                     'height': 480,
                 }]
@@ -413,7 +413,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'protocol': 'm3u8',
                     'acodec': 'mp4a.40.2',
                     'vcodec': 'none',
-                    'tbr': 182,
+                    'tbr': 182.725,
                 }, {
                     'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
                     'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
@@ -422,7 +422,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'protocol': 'm3u8',
                     'acodec': 'mp4a.40.2',
                     'vcodec': 'avc1.42C00D',
-                    'tbr': 280,
+                    'tbr': 280.474,
                     'width': 400,
                     'height': 226,
                 }, {
@@ -433,7 +433,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'protocol': 'm3u8',
                     'acodec': 'mp4a.40.2',
                     'vcodec': 'avc1.42C01E',
-                    'tbr': 628,
+                    'tbr': 628.347,
                     'width': 640,
                     'height': 360,
                 }, {
@@ -444,7 +444,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'protocol': 'm3u8',
                     'acodec': 'mp4a.40.2',
                     'vcodec': 'avc1.42C01E',
-                    'tbr': 893,
+                    'tbr': 893.387,
                     'width': 852,
                     'height': 480,
                 }, {
@@ -455,7 +455,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'protocol': 'm3u8',
                     'acodec': 'mp4a.40.2',
                     'vcodec': 'avc1.42C01F',
-                    'tbr': 1603,
+                    'tbr': 1603.789,
                     'width': 1280,
                     'height': 720,
                 }, {
@@ -466,7 +466,50 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'protocol': 'm3u8',
                     'acodec': 'mp4a.40.2',
                     'vcodec': 'avc1.100.31',
-                    'tbr': 3214,
+                    'tbr': 3214.134,
+                    'width': 1280,
+                    'height': 720,
+                }]
+            ),
+            (
+                # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+                # EXT-X-STREAM-INF tag with NAME attribute that is not defined
+                # in HLS specification
+                'vidio',
+                'https://www.vidio.com/videos/165683/playlist.m3u8',
+                [{
+                    'url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
+                    'ext': 'mp4',
+                    'format_id': 'meta',
+                    'format_note': 'Quality selection URL',
+                    'protocol': 'm3u8',
+                    'preference': -100,
+                    'resolution': 'multiple'
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
+                    'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '270p 3G',
+                    'protocol': 'm3u8',
+                    'tbr': 300,
+                    'width': 480,
+                    'height': 270,
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
+                    'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '360p SD',
+                    'protocol': 'm3u8',
+                    'tbr': 600,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
+                    'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
+                    'ext': 'mp4',
+                    'format_id': '720p HD',
+                    'protocol': 'm3u8',
+                    'tbr': 1200,
                     'width': 1280,
                     'height': 720,
                 }]
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2a099480f..6d023106e 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1397,7 +1397,9 @@ class InfoExtractor(object):
 
         def build_stream_name():
             # Despite specification does not mention NAME attribute for
-            # EXT-X-STREAM-INF it still sometimes may be present
+            # EXT-X-STREAM-INF tag (see [1] or vidio test in
+            # test_parse_m3u8_formats) it still sometimes may be present
+            # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
             stream_name = last_stream_inf.get('NAME')
             if stream_name:
                 return stream_name
-- 
cgit v1.2.3


From 3019cb0c9976481cf2afaf0a2005d90040204fa0 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 23 Apr 2017 11:51:53 +0700
Subject: [extractor/common] Rephrase comment

---
 youtube_dl/extractor/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6d023106e..9184e53e9 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1397,8 +1397,8 @@ class InfoExtractor(object):
 
         def build_stream_name():
             # Despite specification does not mention NAME attribute for
-            # EXT-X-STREAM-INF tag (see [1] or vidio test in
-            # test_parse_m3u8_formats) it still sometimes may be present
+            # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
+            # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
             # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
             stream_name = last_stream_inf.get('NAME')
             if stream_name:
-- 
cgit v1.2.3


From 40e41780f1d770a355f01e3c1e6fb09ff392f97e Mon Sep 17 00:00:00 2001
From: Tithen-Firion <tithen.firion.0@gmail.com>
Date: Tue, 25 Apr 2017 15:12:54 +0200
Subject: [phantomjs] add cookie support

---
 youtube_dl/extractor/common.py |  8 ++++--
 youtube_dl/utils.py            | 62 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 4 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index dcc9d628a..e54adc9f0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2343,10 +2343,12 @@ class InfoExtractor(object):
                 self._downloader.report_warning(msg)
         return res
 
-    def _set_cookie(self, domain, name, value, expire_time=None):
+    def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+                    path='/', secure=False, discard=False, rest={}, **kwargs):
         cookie = compat_cookiejar.Cookie(
-            0, name, value, None, None, domain, None,
-            None, '/', True, False, expire_time, '', None, None, None)
+            0, name, value, port, not port is None, domain, True,
+            domain.startswith('.'), path, True, secure, expire_time,
+            discard, None, None, rest)
         self._downloader.cookiejar.set_cookie(cookie)
 
     def _get_cookies(self, url):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 94e1b07a6..9c94b7ec9 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3654,6 +3654,37 @@ def write_xattr(path, key, value):
                         "or the 'xattr' binary.")
 
 
+def cookie_to_dict(cookie):
+    cookie_dict = {
+        'name': cookie.name,
+        'value': cookie.value,
+    };
+    if cookie.port_specified:
+        cookie_dict['port'] = cookie.port
+    if cookie.domain_specified:
+        cookie_dict['domain'] = cookie.domain
+    if cookie.path_specified:
+        cookie_dict['path'] = cookie.path
+    if not cookie.expires is None:
+        cookie_dict['expires'] = cookie.expires
+    if not cookie.secure is None:
+        cookie_dict['secure'] = cookie.secure
+    if not cookie.discard is None:
+        cookie_dict['discard'] = cookie.discard
+    try:
+        if (cookie.has_nonstandard_attr('httpOnly') or
+            cookie.has_nonstandard_attr('httponly') or
+            cookie.has_nonstandard_attr('HttpOnly')):
+            cookie_dict['httponly'] = True
+    except TypeError:
+        pass
+    return cookie_dict
+
+
+def cookie_jar_to_list(cookie_jar):
+    return [cookie_to_dict(cookie) for cookie in cookie_jar]
+
+
 class PhantomJSwrapper(object):
     """PhantomJS wrapper class"""
 
@@ -3674,6 +3705,9 @@ class PhantomJSwrapper(object):
         var fs = require('fs');
         var read = {{ mode: 'r', charset: 'utf-8' }};
         var write = {{ mode: 'w', charset: 'utf-8' }};
+        JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+          phantom.addCookie(x);
+        }});
         page.settings.resourceTimeout = {timeout};
         page.settings.userAgent = "{ua}";
         page.onLoadStarted = function() {{
@@ -3684,6 +3718,7 @@ class PhantomJSwrapper(object):
         }};
         var saveAndExit = function() {{
           fs.write("{html}", page.content, write);
+          fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
           phantom.exit();
         }};
         page.onLoadFinished = function(status) {{
@@ -3697,7 +3732,7 @@ class PhantomJSwrapper(object):
         page.open("");
     '''
 
-    _TMP_FILE_NAMES = ['script', 'html']
+    _TMP_FILE_NAMES = ['script', 'html', 'cookies']
 
     def __init__(self, extractor, timeout=10000):
         self.exe = check_executable('phantomjs', ['-v'])
@@ -3722,6 +3757,26 @@ class PhantomJSwrapper(object):
             except:
                 pass
 
+    def _save_cookies(self, url):
+        cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+        for cookie in cookies:
+            if 'path' not in cookie:
+                cookie['path'] = '/'
+            if 'domain' not in cookie:
+                cookie['domain'] = compat_urlparse.urlparse(url).netloc
+        with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+            f.write(json.dumps(cookies).encode('utf-8'))
+
+    def _load_cookies(self):
+        with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+            cookies = json.loads(f.read().decode('utf-8'))
+        for cookie in cookies:
+            if cookie['httponly'] is True:
+                cookie['rest'] = { 'httpOnly': None }
+            if 'expiry' in cookie:
+                cookie['expire_time'] = cookie['expiry']
+            self.extractor._set_cookie(**cookie)
+
     def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
         """
         Downloads webpage (if needed) and executes JS
@@ -3765,6 +3820,8 @@ class PhantomJSwrapper(object):
         with open(self._TMP_FILES['html'].name, 'wb') as f:
             f.write(html.encode('utf-8'))
 
+        self._save_cookies(url)
+
         replaces = self.options
         replaces['url'] = url
         user_agent = headers.get('User-Agent') or std_headers['User-Agent']
@@ -3791,5 +3848,8 @@ class PhantomJSwrapper(object):
                                  + encodeArgument(err))
         with open(self._TMP_FILES['html'].name, 'rb') as f:
             html = f.read().decode('utf-8')
+
+        self._load_cookies()
+
         return (html, encodeArgument(out))
 
-- 
cgit v1.2.3


From ac9c69ace7ee22e59a44d25c87b9b53d18762ff7 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 25 Apr 2017 23:46:05 +0700
Subject: [extractor/common] Improve jwplayer regex

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9184e53e9..8b3f04c61 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2233,7 +2233,7 @@ class InfoExtractor(object):
 
     def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
         mobj = re.search(
-            r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\).*?\.setup\s*\((?P<options>[^)]+)\)',
+            r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
             try:
-- 
cgit v1.2.3


From ff99fe529e52b2465f1d973e69df01a6391568d6 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 25 Apr 2017 22:07:10 +0700
Subject: Don't list master m3u8 playlists in format list (closes #12832)

---
 test/test_InfoExtractor.py         | 40 --------------------------------------
 youtube_dl/extractor/anvato.py     |  7 +------
 youtube_dl/extractor/common.py     |  4 ++--
 youtube_dl/extractor/funnyordie.py |  3 +--
 youtube_dl/extractor/gamespot.py   |  3 +--
 youtube_dl/extractor/lego.py       |  2 +-
 youtube_dl/extractor/pbs.py        |  2 +-
 youtube_dl/extractor/r7.py         |  3 +--
 youtube_dl/extractor/ted.py        |  2 +-
 youtube_dl/extractor/tvp.py        |  3 +--
 youtube_dl/extractor/viewster.py   |  3 +--
 11 files changed, 11 insertions(+), 61 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 2a3c3f7cb..14abf03c4 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -184,14 +184,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 'pluzz_francetv_11507',
                 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
                 [{
-                    'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
-                    'ext': 'mp4',
-                    'format_id': 'meta',
-                    'format_note': 'Quality selection URL',
-                    'protocol': 'm3u8',
-                    'preference': -100,
-                    'resolution': 'multiple'
-                }, {
                     'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
                     'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
                     'ext': 'mp4',
@@ -254,14 +246,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 'teamcoco_11995',
                 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
                 [{
-                    'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
-                    'ext': 'mp4',
-                    'format_id': 'meta',
-                    'format_note': 'Quality selection URL',
-                    'protocol': 'm3u8',
-                    'preference': -100,
-                    'resolution': 'multiple',
-                }, {
                     'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
                     'ext': 'mp4',
                     'format_id': 'audio-0-Default',
@@ -334,14 +318,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 'toggle_mobile_12211',
                 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
                 [{
-                    'url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
-                    'ext': 'mp4',
-                    'format_id': 'meta',
-                    'format_note': 'Quality selection URL',
-                    'protocol': 'm3u8',
-                    'preference': -100,
-                    'resolution': 'multiple'
-                }, {
                     'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
                     'ext': 'mp4',
                     'format_id': 'audio-English',
@@ -398,14 +374,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 'twitch_vod',
                 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
                 [{
-                    'url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
-                    'ext': 'mp4',
-                    'format_id': 'meta',
-                    'format_note': 'Quality selection URL',
-                    'protocol': 'm3u8',
-                    'preference': -100,
-                    'resolution': 'multiple'
-                }, {
                     'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
                     'manifest_url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
                     'ext': 'mp4',
@@ -478,14 +446,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 'vidio',
                 'https://www.vidio.com/videos/165683/playlist.m3u8',
                 [{
-                    'url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
-                    'ext': 'mp4',
-                    'format_id': 'meta',
-                    'format_note': 'Quality selection URL',
-                    'protocol': 'm3u8',
-                    'preference': -100,
-                    'resolution': 'multiple'
-                }, {
                     'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
                     'manifest_url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
                     'ext': 'mp4',
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 623f44dce..9fd91c2f6 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -178,12 +178,7 @@ class AnvatoIE(InfoExtractor):
             }
 
             if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
-                # Not using _extract_m3u8_formats here as individual media
-                # playlists are also included in published_urls.
-                if tbr is None:
-                    formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
-                    continue
-                else:
+                if tbr is not None:
                     a_format.update({
                         'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
                         'ext': 'mp4',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8b3f04c61..6d01f0800 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1334,7 +1334,7 @@ class InfoExtractor(object):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
 
-        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+        formats = []
 
         format_url = lambda u: (
             u
@@ -1438,7 +1438,7 @@ class InfoExtractor(object):
                 f = {
                     'format_id': '-'.join(format_id),
                     'url': manifest_url,
-                    'manifest_url': manifest_url,
+                    'manifest_url': m3u8_url,
                     'tbr': tbr,
                     'ext': ext,
                     'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 81c0ce9a3..49409369c 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -58,8 +58,7 @@ class FunnyOrDieIE(InfoExtractor):
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False)
         source_formats = list(filter(
-            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-            m3u8_formats))
+            lambda f: f.get('vcodec') != 'none', m3u8_formats))
 
         bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
         bitrates.sort()
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 682c49e79..00d311158 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -78,8 +78,7 @@ class GameSpotIE(OnceIE):
                     if m3u8_formats:
                         self._sort_formats(m3u8_formats)
                         m3u8_formats = list(filter(
-                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                            m3u8_formats))
+                            lambda f: f.get('vcodec') != 'none', m3u8_formats))
                     if len(qualities) == len(m3u8_formats):
                         for q, m3u8_format in zip(qualities, m3u8_formats):
                             f = m3u8_format.copy()
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
index d3bca6435..b312e77f1 100644
--- a/youtube_dl/extractor/lego.py
+++ b/youtube_dl/extractor/lego.py
@@ -86,7 +86,7 @@ class LEGOIE(InfoExtractor):
         formats = self._extract_akamai_formats(
             '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
         m3u8_formats = list(filter(
-            lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
             formats))
         if len(m3u8_formats) == len(self._BITRATES):
             self._sort_formats(m3u8_formats)
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 3e51b4dd7..6166dc2ad 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -515,7 +515,7 @@ class PBSIE(InfoExtractor):
                     http_url = format_url
         self._remove_duplicate_formats(formats)
         m3u8_formats = list(filter(
-            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
             formats))
         if http_url:
             for m3u8_format in m3u8_formats:
diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py
index ed38c77eb..e2202d603 100644
--- a/youtube_dl/extractor/r7.py
+++ b/youtube_dl/extractor/r7.py
@@ -62,8 +62,7 @@ class R7IE(InfoExtractor):
             # m3u8 format always matches the http format, let's copy metadata from
             # one to another
             m3u8_formats = list(filter(
-                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                formats))
+                lambda f: f.get('vcodec') != 'none', formats))
             if len(m3u8_formats) == 1:
                 f_copy = m3u8_formats[0].copy()
                 f_copy.update(f)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 1b1afab32..3f3c681ae 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -210,7 +210,7 @@ class TEDIE(InfoExtractor):
                     resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
 
         m3u8_formats = list(filter(
-            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
             formats))
         if http_url:
             for m3u8_format in m3u8_formats:
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 06ea2b40a..c5b3288ad 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -150,8 +150,7 @@ class TVPEmbedIE(InfoExtractor):
                 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
             self._sort_formats(m3u8_formats)
             m3u8_formats = list(filter(
-                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                m3u8_formats))
+                lambda f: f.get('vcodec') != 'none', m3u8_formats))
             formats.extend(m3u8_formats)
             for i, m3u8_format in enumerate(m3u8_formats, 2):
                 http_url = '%s-%d.mp4' % (video_url_base, i)
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
index fcf0cb100..d5d5b4c69 100644
--- a/youtube_dl/extractor/viewster.py
+++ b/youtube_dl/extractor/viewster.py
@@ -176,8 +176,7 @@ class ViewsterIE(InfoExtractor):
                     if m3u8_formats:
                         self._sort_formats(m3u8_formats)
                         m3u8_formats = list(filter(
-                            lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
-                            m3u8_formats))
+                            lambda f: f.get('vcodec') != 'none', m3u8_formats))
                     if len(qualities) == len(m3u8_formats):
                         for q, m3u8_format in zip(qualities, m3u8_formats):
                             f = m3u8_format.copy()
-- 
cgit v1.2.3


From c89b49f7432ebaed7c5032194df9240f5da4a84f Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Fri, 28 Apr 2017 03:00:14 +0700
Subject: [extractor/common] Add manifest_url for explicit group rendition
 formats

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6d01f0800..2cb55d6af 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1386,6 +1386,7 @@ class InfoExtractor(object):
                 f = {
                     'format_id': '-'.join(format_id),
                     'url': format_url(media_url),
+                    'manifest_url': m3u8_url,
                     'language': media.get('LANGUAGE'),
                     'ext': ext,
                     'protocol': entry_protocol,
-- 
cgit v1.2.3


From 33a81c2c6f4bd180ef69d5631862637ae0c8ec8e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 30 Apr 2017 21:11:55 +0700
Subject: [extractor/common] Extract view count from JSON-LD

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2cb55d6af..fba15d446 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -990,6 +990,7 @@ class InfoExtractor(object):
                 'tbr': int_or_none(e.get('bitrate')),
                 'width': int_or_none(e.get('width')),
                 'height': int_or_none(e.get('height')),
+                'view_count': int_or_none(e.get('interactionCount')),
             })
 
         for e in json_ld:
-- 
cgit v1.2.3


From 55949fede6da7c2d612f56863196eadcbc583de9 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Thu, 5 May 2016 21:40:19 +0100
Subject: [common] introduce chapters field

---
 youtube_dl/extractor/common.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index fba15d446..9541e5b42 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -245,6 +245,10 @@ class InfoExtractor(object):
                     specified in the URL.
     end_time:       Time in seconds where the reproduction should end, as
                     specified in the URL.
+    chapters:       A list of dictionaries, with the following entries:
+                        * "start_time" - The start time of the chapter in seconds
+                        * "end_time" - The end time of the chapter in seconds
+                        * "title" (optional, string)
 
     The following fields should only be used when the video belongs to some logical
     chapter or section:
-- 
cgit v1.2.3


From ff6f9a67040c47ea1645e3f91f3153c212ebc7ca Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Thu, 4 May 2017 16:04:25 +0100
Subject: [extractor/common] fix typo in _extract_akamai_formats

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9541e5b42..b9ad8461a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2174,7 +2174,7 @@ class InfoExtractor(object):
     def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
         formats = []
         hdcore_sign = 'hdcore=3.7.0'
-        f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
         hds_host = hosts.get('hds')
         if hds_host:
             f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
-- 
cgit v1.2.3


From 76d5a36391480294b6c26437ccfa34cb43145a7e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 14 May 2017 06:11:45 +0700
Subject: [extractor/common] Respect Width and Height attributes in ISM
 manifests

---
 youtube_dl/extractor/common.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index b9ad8461a..0bbb1103b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2001,6 +2001,12 @@ class InfoExtractor(object):
             compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
 
     def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+        """
+        Parse formats from ISM manifest.
+        References:
+         1. [MS-SSTR]: Smooth Streaming Protocol,
+            https://msdn.microsoft.com/en-us/library/ff469518.aspx
+        """
         if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
             return []
 
@@ -2022,8 +2028,11 @@ class InfoExtractor(object):
                     self.report_warning('%s is not a supported codec' % fourcc)
                     continue
                 tbr = int(track.attrib['Bitrate']) // 1000
-                width = int_or_none(track.get('MaxWidth'))
-                height = int_or_none(track.get('MaxHeight'))
+                # [1] does not mention Width and Height attributes. However,
+                # they're often present while MaxWidth and MaxHeight are
+                # missing, so should be used as fallbacks
+                width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+                height = int_or_none(track.get('MaxHeight') or track.get('Height'))
                 sampling_rate = int_or_none(track.get('SamplingRate'))
 
                 track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
-- 
cgit v1.2.3


From 6f766798049418c15e3c1b62c046e507093993dd Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 16 May 2017 22:11:34 +0700
Subject: [extractor/common] Add support for schemeless URLs in
 _extract_wowza_formats (closes #13088, closes #13092)

---
 youtube_dl/extractor/common.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 0bbb1103b..74b6f1197 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2205,8 +2205,9 @@ class InfoExtractor(object):
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
-        url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
-        http_base_url = 'http' + url_base
+        url_base = self._search_regex(
+            r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
+        http_base_url = self._proto_relative_url(url_base, scheme='http:')
         formats = []
         if 'm3u8' not in skip_protocols:
             formats.extend(self._extract_m3u8_formats(
-- 
cgit v1.2.3


From f2e2f0c777dc9e541d89b742ac12e7ad37d8cb8e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Wed, 17 May 2017 22:19:33 +0700
Subject: [extractor/common] Fix rtmp and rtsp formats' URLs in
 _extract_wowza_formats

---
 youtube_dl/extractor/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 74b6f1197..fec39da8b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2207,7 +2207,7 @@ class InfoExtractor(object):
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
         url_base = self._search_regex(
             r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
-        http_base_url = self._proto_relative_url(url_base, scheme='http:')
+        http_base_url = '%s:%s' % ('http', url_base)
         formats = []
         if 'm3u8' not in skip_protocols:
             formats.extend(self._extract_m3u8_formats(
@@ -2241,7 +2241,7 @@ class InfoExtractor(object):
             for protocol in ('rtmp', 'rtsp'):
                 if protocol not in skip_protocols:
                     formats.append({
-                        'url': protocol + url_base,
+                        'url': '%s:%s' % (protocol, url_base),
                         'format_id': protocol,
                         'protocol': protocol,
                     })
-- 
cgit v1.2.3


From 1afd0b0da70b6ed709c610aff98786d71511a629 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Fri, 9 Jun 2017 00:40:03 +0700
Subject: [extractor/common] Return unicode string from _match_id

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index fec39da8b..f027447c8 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -376,7 +376,7 @@ class InfoExtractor(object):
             cls._VALID_URL_RE = re.compile(cls._VALID_URL)
         m = cls._VALID_URL_RE.match(url)
         assert m
-        return m.group('id')
+        return compat_str(m.group('id'))
 
     @classmethod
     def working(cls):
-- 
cgit v1.2.3


From 0a268c6e11e9fb55f41b474497997bff61d97cd2 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Wed, 14 Jun 2017 22:02:15 +0700
Subject: [extractor/common] Improve jwplayer formats extraction (closes
 #13379)

---
 youtube_dl/extractor/common.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f027447c8..941385ae2 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2328,6 +2328,8 @@ class InfoExtractor(object):
         urls = []
         formats = []
         for source in jwplayer_sources_data:
+            if not isinstance(source, dict):
+                continue
             source_url = self._proto_relative_url(source.get('file'))
             if not source_url:
                 continue
-- 
cgit v1.2.3


From 6a9cb29509a3b1fa2c1ffce82acb2fa7a2699c29 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Thu, 15 Jun 2017 13:04:36 +0800
Subject: [extractor/common] Fix json dumping with --geo-bypass

The line "[debug] Using fake IP %s (%s) as X-Forwarded-For." was printed
to stdout even with -j/-J, which breaks the resultant JSON.
---
 ChangeLog                      | 3 +++
 youtube_dl/extractor/common.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/ChangeLog b/ChangeLog
index 9b45f08e3..5cd0b3393 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 version <unreleased>
 
+Core
+* [extractor/common] Fix json dumping with --geo-bypass
+
 Extractors
 * [bilibili] Fix extraction of videos with double quotes in titles (#13387)
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 941385ae2..6e415ea41 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -420,7 +420,7 @@ class InfoExtractor(object):
             if country_code:
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
                 if self._downloader.params.get('verbose', False):
-                    self._downloader.to_stdout(
+                    self._downloader.to_screen(
                         '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
                         % (self._x_forwarded_for_ip, country_code.upper()))
 
-- 
cgit v1.2.3


From 96a2daa1ee5de9ddff5d0180fccb8376725de60c Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Thu, 15 Jun 2017 23:40:39 +0700
Subject: [extractor/common] Improve jwplayer subtitles extraction

---
 youtube_dl/extractor/common.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6e415ea41..9751ab021 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2299,6 +2299,8 @@ class InfoExtractor(object):
             tracks = video_data.get('tracks')
             if tracks and isinstance(tracks, list):
                 for track in tracks:
+                    if not isinstance(track, dict):
+                        continue
                     if track.get('kind') != 'captions':
                         continue
                     track_url = urljoin(base_url, track.get('file'))
-- 
cgit v1.2.3


From c69701c6ab94ca2fce4fedf6390515acb55e9086 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Fri, 30 Jun 2017 22:19:06 +0700
Subject: [extractor/common] Improve _json_ld

---
 youtube_dl/extractor/common.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9751ab021..afeb4c5da 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1002,17 +1002,17 @@ class InfoExtractor(object):
                 item_type = e.get('@type')
                 if expected_type is not None and expected_type != item_type:
                     return info
-                if item_type == 'TVEpisode':
+                if item_type in ('TVEpisode', 'Episode'):
                     info.update({
                         'episode': unescapeHTML(e.get('name')),
                         'episode_number': int_or_none(e.get('episodeNumber')),
                         'description': unescapeHTML(e.get('description')),
                     })
                     part_of_season = e.get('partOfSeason')
-                    if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+                    if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
                         info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
                     part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
-                    if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+                    if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
                         info['series'] = unescapeHTML(part_of_series.get('name'))
                 elif item_type == 'Article':
                     info.update({
@@ -1022,10 +1022,10 @@ class InfoExtractor(object):
                     })
                 elif item_type == 'VideoObject':
                     extract_video_object(e)
-                elif item_type == 'WebPage':
-                    video = e.get('video')
-                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
-                        extract_video_object(video)
+                    continue
+                video = e.get('video')
+                if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+                    extract_video_object(video)
                 break
         return dict((k, v) for k, v in info.items() if v is not None)
 
-- 
cgit v1.2.3


From 4328ddf82b812420ffc120b4150251f751bff08c Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 9 Jul 2017 16:29:52 +0700
Subject: [extractor/common] Add support for AMP tags in
 _parse_html5_media_entries

---
 youtube_dl/extractor/common.py  |  7 +++++--
 youtube_dl/extractor/generic.py | 10 ++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index afeb4c5da..daa10885f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2132,15 +2132,18 @@ class InfoExtractor(object):
             return is_plain_url, formats
 
         entries = []
+        # amp-video and amp-audio are very similar to their HTML5 counterparts
+        # so we wll include them right here (see
+        # https://www.ampproject.org/docs/reference/components/amp-video)
         media_tags = [(media_tag, media_type, '')
                       for media_tag, media_type
-                      in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+                      in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
         media_tags.extend(re.findall(
             # We only allow video|audio followed by a whitespace or '>'.
             # Allowing more characters may end up in significant slow down (see
             # https://github.com/rg3/youtube-dl/issues/11979, example URL:
             # http://www.porntrex.com/maps/videositemap.xml).
-            r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+            r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
         for media_tag, media_type, media_content in media_tags:
             media_info = {
                 'formats': [],
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 95c38698d..919f4f987 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1770,6 +1770,16 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': [MediasetIE.ie_key()],
         },
+        {
+            # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+            'url': 'https://tvrain.ru/amp/418921/',
+            'md5': 'cc00413936695987e8de148b67d14f1d',
+            'info_dict': {
+                'id': '418921',
+                'ext': 'mp4',
+                'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+            },
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
-- 
cgit v1.2.3


From 749ca5eced0b9a8ed1ef79f4ee80908e0ac242c8 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 16 Jul 2017 04:33:14 +0700
Subject: [extractor/common] Fix playlist_from_matches

---
 youtube_dl/extractor/common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index daa10885f..748b4d59f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -730,12 +730,12 @@ class InfoExtractor(object):
             video_info['title'] = video_title
         return video_info
 
-    def playlist_from_matches(self, matches, video_id, video_title, getter=None, ie=None):
-        urlrs = orderedSet(
+    def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+        urls = orderedSet(
             self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
             for m in matches)
         return self.playlist_result(
-            urlrs, playlist_id=video_id, playlist_title=video_title)
+            urls, playlist_id=playlist_id, playlist_title=playlist_title)
 
     @staticmethod
     def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
-- 
cgit v1.2.3


From 1141e9104bc0f8d577f18cf28a1af58adea1248e Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 5 Aug 2017 06:57:19 +0700
Subject: Use relative paths for DASH fragments (closes #12990) 10x reduced
 JSON size refs #13810

---
 youtube_dl/downloader/dash.py  | 14 ++++++++++----
 youtube_dl/extractor/common.py | 16 ++++++++++------
 2 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 7491fdad8..576ece6db 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 from .fragment import FragmentFD
 from ..compat import compat_urllib_error
+from ..utils import urljoin
 
 
 class DashSegmentsFD(FragmentFD):
@@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
     FD_NAME = 'dashsegments'
 
     def real_download(self, filename, info_dict):
-        segments = info_dict['fragments'][:1] if self.params.get(
+        fragment_base_url = info_dict.get('fragment_base_url')
+        fragments = info_dict['fragments'][:1] if self.params.get(
             'test', False) else info_dict['fragments']
 
         ctx = {
             'filename': filename,
-            'total_frags': len(segments),
+            'total_frags': len(fragments),
         }
 
         self._prepare_and_start_frag_download(ctx)
@@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
         frag_index = 0
-        for i, segment in enumerate(segments):
+        for i, fragment in enumerate(fragments):
             frag_index += 1
             if frag_index <= ctx['fragment_index']:
                 continue
@@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
             count = 0
             while count <= fragment_retries:
                 try:
-                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+                    fragment_url = fragment.get('url')
+                    if not fragment_url:
+                        assert fragment_base_url
+                        fragment_url = urljoin(fragment_base_url, fragment['path'])
+                    success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
                     if not success:
                         return False
                     self._append_fragment(ctx, frag_content)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 748b4d59f..459e7ffd6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1892,9 +1892,13 @@ class InfoExtractor(object):
                                 'Bandwidth': bandwidth,
                             }
 
+                        def location_key(location):
+                            return 'url' if re.match(r'^https?://', location) else 'path'
+
                         if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
 
                             media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+                            media_location_key = location_key(media_template)
 
                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                             # can't be used at the same time
@@ -1904,7 +1908,7 @@ class InfoExtractor(object):
                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                 representation_ms_info['fragments'] = [{
-                                    'url': media_template % {
+                                    media_location_key: media_template % {
                                         'Number': segment_number,
                                         'Bandwidth': bandwidth,
                                     },
@@ -1928,7 +1932,7 @@ class InfoExtractor(object):
                                         'Number': segment_number,
                                     }
                                     representation_ms_info['fragments'].append({
-                                        'url': segment_url,
+                                        media_location_key: segment_url,
                                         'duration': float_or_none(segment_d, representation_ms_info['timescale']),
                                     })
 
@@ -1952,8 +1956,9 @@ class InfoExtractor(object):
                             for s in representation_ms_info['s']:
                                 duration = float_or_none(s['d'], timescale)
                                 for r in range(s.get('r', 0) + 1):
+                                    segment_uri = representation_ms_info['segment_urls'][segment_index]
                                     fragments.append({
-                                        'url': representation_ms_info['segment_urls'][segment_index],
+                                        location_key(segment_uri): segment_uri,
                                         'duration': duration,
                                     })
                                     segment_index += 1
@@ -1962,6 +1967,7 @@ class InfoExtractor(object):
                         # No fragments key is present in this case.
                         if 'fragments' in representation_ms_info:
                             f.update({
+                                'fragment_base_url': base_url,
                                 'fragments': [],
                                 'protocol': 'http_dash_segments',
                             })
@@ -1969,10 +1975,8 @@ class InfoExtractor(object):
                                 initialization_url = representation_ms_info['initialization_url']
                                 if not f.get('url'):
                                     f['url'] = initialization_url
-                                f['fragments'].append({'url': initialization_url})
+                                f['fragments'].append({location_key(initialization_url): initialization_url})
                             f['fragments'].extend(representation_ms_info['fragments'])
-                            for fragment in f['fragments']:
-                                fragment['url'] = urljoin(base_url, fragment['url'])
                         try:
                             existing_format = next(
                                 fo for fo in formats
-- 
cgit v1.2.3


From 82889d4ae517640df217f99e7744002e0deba47a Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 12 Aug 2017 16:48:11 +0700
Subject: [extractor/common] Respect source's type attribute for HTML5 media
 (closes #13892)

---
 youtube_dl/extractor/common.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 459e7ffd6..4d61275fd 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2114,9 +2114,9 @@ class InfoExtractor(object):
                 return f
             return {}
 
-        def _media_formats(src, cur_media_type):
+        def _media_formats(src, type_info, cur_media_type):
             full_url = absolute_url(src)
-            ext = determine_ext(full_url)
+            ext = type_info.get('ext') or determine_ext(full_url)
             if ext == 'm3u8':
                 is_plain_url = False
                 formats = self._extract_m3u8_formats(
@@ -2165,9 +2165,9 @@ class InfoExtractor(object):
                     src = source_attributes.get('src')
                     if not src:
                         continue
-                    is_plain_url, formats = _media_formats(src, media_type)
+                    f = parse_content_type(source_attributes.get('type'))
+                    is_plain_url, formats = _media_formats(src, f, media_type)
                     if is_plain_url:
-                        f = parse_content_type(source_attributes.get('type'))
                         f.update(formats[0])
                         media_info['formats'].append(f)
                     else:
-- 
cgit v1.2.3


From ac8491fcca6f9c0f6c7904e1cf13953f912eeb39 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 12 Aug 2017 17:11:35 +0700
Subject: [extractor/common] Make _family_friendly_search optional

---
 youtube_dl/extractor/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 4d61275fd..e565901af 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -940,7 +940,8 @@ class InfoExtractor(object):
 
     def _family_friendly_search(self, html):
         # See http://schema.org/VideoObject
-        family_friendly = self._html_search_meta('isFamilyFriendly', html)
+        family_friendly = self._html_search_meta(
+            'isFamilyFriendly', html, default=None)
 
         if not family_friendly:
             return None
-- 
cgit v1.2.3


From 868f79db41a4d81a87ef12c8bd5ef73205c9c029 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 12 Aug 2017 19:24:26 +0700
Subject: [extractor/common] Fix _media_formats

---
 youtube_dl/extractor/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e565901af..7fe888462 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2115,7 +2115,7 @@ class InfoExtractor(object):
                 return f
             return {}
 
-        def _media_formats(src, type_info, cur_media_type):
+        def _media_formats(src, cur_media_type, type_info={}):
             full_url = absolute_url(src)
             ext = type_info.get('ext') or determine_ext(full_url)
             if ext == 'm3u8':
@@ -2167,7 +2167,7 @@ class InfoExtractor(object):
                     if not src:
                         continue
                     f = parse_content_type(source_attributes.get('type'))
-                    is_plain_url, formats = _media_formats(src, f, media_type)
+                    is_plain_url, formats = _media_formats(src, media_type, f)
                     if is_plain_url:
                         f.update(formats[0])
                         media_info['formats'].append(f)
-- 
cgit v1.2.3


From 485047854376465f95309daad4966971f56728ef Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 15 Aug 2017 23:58:00 +0700
Subject: [extractor/common] Add support for float durations in
 _parse_mpd_formats (closes #13919)

---
 test/test_InfoExtractor.py           | 86 ++++++++++++++++++++++++++++++++++++
 test/testdata/mpd/float_duration.mpd | 18 ++++++++
 youtube_dl/extractor/common.py       |  2 +-
 3 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 test/testdata/mpd/float_duration.mpd

(limited to 'youtube_dl/extractor/common.py')

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 6f52e11f7..f18a823fc 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -10,6 +10,7 @@ import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import FakeYDL, expect_dict, expect_value
+from youtube_dl.compat import compat_etree_fromstring
 from youtube_dl.extractor.common import InfoExtractor
 from youtube_dl.extractor import YoutubeIE, get_info_extractor
 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -488,6 +489,91 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
 
+    def test_parse_mpd_formats(self):
+        _TEST_CASES = [
+            (
+                # https://github.com/rg3/youtube-dl/issues/13919
+                'float_duration',
+                'http://unknown/manifest.mpd',
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '318597',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001f',
+                    'tbr': 318.597,
+                    'width': 340,
+                    'height': 192,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '638590',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.42001f',
+                    'tbr': 638.59,
+                    'width': 512,
+                    'height': 288,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '1022565',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 1022.565,
+                    'width': 688,
+                    'height': 384,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '2046506',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.4d001f',
+                    'tbr': 2046.506,
+                    'width': 1024,
+                    'height': 576,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '3998017',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640029',
+                    'tbr': 3998.017,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': '5997485',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'none',
+                    'vcodec': 'avc1.640032',
+                    'tbr': 5997.485,
+                    'width': 1920,
+                    'height': 1080,
+                }]
+            ),
+        ]
+
+        for mpd_file, mpd_url, expected_formats in _TEST_CASES:
+            with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
+                         mode='r', encoding='utf-8') as f:
+                formats = self.ie._parse_mpd_formats(
+                    compat_etree_fromstring(f.read().encode('utf-8')),
+                    mpd_url=mpd_url)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/testdata/mpd/float_duration.mpd b/test/testdata/mpd/float_duration.mpd
new file mode 100644
index 000000000..8dc1d2d5e
--- /dev/null
+++ b/test/testdata/mpd/float_duration.mpd
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
+	<Period bitstreamSwitching="true">
+		<AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
+			<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+			<Representation id="318597" bandwidth="61587"></Representation>
+		</AdaptationSet>
+		<AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
+			<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
+			<Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
+			<Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
+			<Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
+			<Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
+			<Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
+			<Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
+		</AdaptationSet>
+	</Period>
+</MPD>
\ No newline at end of file
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7fe888462..e747258aa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1786,7 +1786,7 @@ class InfoExtractor(object):
                     ms_info['timescale'] = int(timescale)
                 segment_duration = source.get('duration')
                 if segment_duration:
-                    ms_info['segment_duration'] = int(segment_duration)
+                    ms_info['segment_duration'] = float(segment_duration)
 
             def extract_Initialization(source):
                 initialization = source.find(_add_ns('Initialization'))
-- 
cgit v1.2.3


From b359e977b9bdff704cd58f6f3b34185ecbe450e4 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 20 Aug 2017 14:16:58 +0700
Subject: [extractor/common] Make HLS and DASH extraction non fatal in
 _parse_html5_media_entries (closes #13970)

---
 youtube_dl/extractor/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e747258aa..ceba4ca1c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2123,11 +2123,11 @@ class InfoExtractor(object):
                 formats = self._extract_m3u8_formats(
                     full_url, video_id, ext='mp4',
                     entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
-                    preference=preference)
+                    preference=preference, fatal=False)
             elif ext == 'mpd':
                 is_plain_url = False
                 formats = self._extract_mpd_formats(
-                    full_url, video_id, mpd_id=mpd_id)
+                    full_url, video_id, mpd_id=mpd_id, fatal=False)
             else:
                 is_plain_url = True
                 formats = [{
-- 
cgit v1.2.3


From e01c3d2ef7264b5d3d6f99e7e0b61340885ed661 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Wed, 23 Aug 2017 00:32:41 +0700
Subject: [extractor/common] Introduce _parse_xml

---
 youtube_dl/extractor/common.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ceba4ca1c..1804c4de0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -27,6 +27,7 @@ from ..compat import (
     compat_urllib_parse_urlencode,
     compat_urllib_request,
     compat_urlparse,
+    compat_xml_parse_error,
 )
 from ..downloader.f4m import remove_encrypted_media
 from ..utils import (
@@ -646,15 +647,29 @@ class InfoExtractor(object):
 
     def _download_xml(self, url_or_request, video_id,
                       note='Downloading XML', errnote='Unable to download XML',
-                      transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
+                      transform_source=None, fatal=True, encoding=None,
+                      data=None, headers={}, query={}):
         """Return the xml as an xml.etree.ElementTree.Element"""
         xml_string = self._download_webpage(
-            url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
+            url_or_request, video_id, note, errnote, fatal=fatal,
+            encoding=encoding, data=data, headers=headers, query=query)
         if xml_string is False:
             return xml_string
+        return self._parse_xml(
+            xml_string, video_id, transform_source=transform_source,
+            fatal=fatal)
+
+    def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
         if transform_source:
             xml_string = transform_source(xml_string)
-        return compat_etree_fromstring(xml_string.encode('utf-8'))
+        try:
+            return compat_etree_fromstring(xml_string.encode('utf-8'))
+        except compat_xml_parse_error as ve:
+            errmsg = '%s: Failed to parse XML ' % video_id
+            if fatal:
+                raise ExtractorError(errmsg, cause=ve)
+            else:
+                self.report_warning(errmsg + str(ve))
 
     def _download_json(self, url_or_request, video_id,
                        note='Downloading JSON metadata',
-- 
cgit v1.2.3


From dd121cc1cab2f077cbf68ee432e83d4d094f0f9a Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 27 Aug 2017 03:12:56 +0700
Subject: [extractor/common] Extract height from res attribute of source tag
 for HTML5 videos (closes #14034)

---
 youtube_dl/extractor/common.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1804c4de0..b4af3f987 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2184,6 +2184,9 @@ class InfoExtractor(object):
                     f = parse_content_type(source_attributes.get('type'))
                     is_plain_url, formats = _media_formats(src, media_type, f)
                     if is_plain_url:
+                        # res attribute is not standard but seen several times
+                        # in the wild
+                        f['height'] = int_or_none(source_attributes.get('res'))
                         f.update(formats[0])
                         media_info['formats'].append(f)
                     else:
-- 
cgit v1.2.3


From 1ed4549942c34cce52b9c641cf9f532c38866149 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 27 Aug 2017 03:27:05 +0700
Subject: [extractor/common] Extract format id from label attribute of source
 tag for HTML5 videos (#14034)

---
 youtube_dl/extractor/common.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index b4af3f987..74d30ec50 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2186,7 +2186,10 @@ class InfoExtractor(object):
                     if is_plain_url:
                         # res attribute is not standard but seen several times
                         # in the wild
-                        f['height'] = int_or_none(source_attributes.get('res'))
+                        f.update({
+                            'height': int_or_none(source_attributes.get('res')),
+                            'format_id': source_attributes.get('label'),
+                        })
                         f.update(formats[0])
                         media_info['formats'].append(f)
                     else:
-- 
cgit v1.2.3


From 4ed2d7b7d1f67e499a46e507d957616e364565ca Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 17 Sep 2017 13:53:04 +0800
Subject: Fix flake8 issues after #14225

---
 youtube_dl/extractor/common.py |  2 +-
 youtube_dl/utils.py            | 33 +++++++++++++++++----------------
 2 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 317a9a76f..2bbbf8f4d 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2452,7 +2452,7 @@ class InfoExtractor(object):
     def _set_cookie(self, domain, name, value, expire_time=None, port=None,
                     path='/', secure=False, discard=False, rest={}, **kwargs):
         cookie = compat_cookiejar.Cookie(
-            0, name, value, port, not port is None, domain, True,
+            0, name, value, port, port is not None, domain, True,
             domain.startswith('.'), path, True, secure, expire_time,
             discard, None, None, rest)
         self._downloader.cookiejar.set_cookie(cookie)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index b724e0b70..acc4f987b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3830,23 +3830,23 @@ def cookie_to_dict(cookie):
     cookie_dict = {
         'name': cookie.name,
         'value': cookie.value,
-    };
+    }
     if cookie.port_specified:
         cookie_dict['port'] = cookie.port
     if cookie.domain_specified:
         cookie_dict['domain'] = cookie.domain
     if cookie.path_specified:
         cookie_dict['path'] = cookie.path
-    if not cookie.expires is None:
+    if cookie.expires is not None:
         cookie_dict['expires'] = cookie.expires
-    if not cookie.secure is None:
+    if cookie.secure is not None:
         cookie_dict['secure'] = cookie.secure
-    if not cookie.discard is None:
+    if cookie.discard is not None:
         cookie_dict['discard'] = cookie.discard
     try:
         if (cookie.has_nonstandard_attr('httpOnly') or
-            cookie.has_nonstandard_attr('httponly') or
-            cookie.has_nonstandard_attr('HttpOnly')):
+                cookie.has_nonstandard_attr('httponly') or
+                cookie.has_nonstandard_attr('HttpOnly')):
             cookie_dict['httponly'] = True
     except TypeError:
         pass
@@ -3957,7 +3957,7 @@ class PhantomJSwrapper(object):
             cookies = json.loads(f.read().decode('utf-8'))
         for cookie in cookies:
             if cookie['httponly'] is True:
-                cookie['rest'] = { 'httpOnly': None }
+                cookie['rest'] = {'httpOnly': None}
             if 'expiry' in cookie:
                 cookie['expire_time'] = cookie['expiry']
             self.extractor._set_cookie(**cookie)
@@ -3965,7 +3965,7 @@ class PhantomJSwrapper(object):
     def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
         """
         Downloads webpage (if needed) and executes JS
-        
+
         Params:
             url: website url
             html: optional, html code of website
@@ -3974,11 +3974,11 @@ class PhantomJSwrapper(object):
             note2: optional, displayed when executing JS
             headers: custom http headers
             jscode: code to be executed when page is loaded
-        
+
         Returns tuple with:
             * downloaded website (after JS execution)
             * anything you print with `console.log` (but not inside `page.execute`!)
-        
+
         In most cases you don't need to add any `jscode`.
         It is executed in `page.onLoadFinished`.
         `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
@@ -3992,7 +3992,7 @@ class PhantomJSwrapper(object):
               else
                 window.setTimeout(check, 500);
             }
-            
+
             page.evaluate(function(){
               document.querySelector('#a').click();
             });
@@ -4024,13 +4024,14 @@ class PhantomJSwrapper(object):
         else:
             self.extractor.to_screen('%s: %s' % (video_id, note2))
 
-        p = subprocess.Popen([self.exe, '--ssl-protocol=any',
-            self._TMP_FILES['script'].name], stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE)
+        p = subprocess.Popen([
+            self.exe, '--ssl-protocol=any',
+            self._TMP_FILES['script'].name
+        ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         out, err = p.communicate()
         if p.returncode != 0:
-            raise ExtractorError('Executing JS failed\n:'
-                                 + encodeArgument(err))
+            raise ExtractorError(
+                'Executing JS failed\n:' + encodeArgument(err))
         with open(self._TMP_FILES['html'].name, 'rb') as f:
             html = f.read().decode('utf-8')
 
-- 
cgit v1.2.3


From c110944fa2f21af733b4f3168764e1b008e11514 Mon Sep 17 00:00:00 2001
From: "M.K" <mk-pmb@users.noreply.github.com>
Date: Tue, 3 Oct 2017 22:50:27 +0200
Subject: [extractor/common] Fix typo in _parse_mpd_formats

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2bbbf8f4d..a878550c2 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
                             # can't be used at the same time
                             if '%(Number' in media_template and 's' not in representation_ms_info:
                                 segment_duration = None
-                                if 'total_number' not in representation_ms_info and 'segment_duration':
+                                if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
                                 representation_ms_info['fragments'] = [{
-- 
cgit v1.2.3


From 50d808f5c942b454b3febbf1ef5d78bd204ce469 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Thu, 12 Oct 2017 16:12:47 +0000
Subject: [common] add support for jwplayer youtube embeds

---
 youtube_dl/extractor/common.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a878550c2..a69240693 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2322,7 +2322,6 @@ class InfoExtractor(object):
             formats = self._parse_jwplayer_formats(
                 video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
                 mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
-            self._sort_formats(formats)
 
             subtitles = {}
             tracks = video_data.get('tracks')
@@ -2339,16 +2338,25 @@ class InfoExtractor(object):
                         'url': self._proto_relative_url(track_url)
                     })
 
-            entries.append({
+            entry = {
                 'id': this_video_id,
-                'title': video_data['title'] if require_title else video_data.get('title'),
+                'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
                 'description': video_data.get('description'),
                 'thumbnail': self._proto_relative_url(video_data.get('image')),
                 'timestamp': int_or_none(video_data.get('pubdate')),
                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                 'subtitles': subtitles,
-                'formats': formats,
-            })
+            }
+            # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+            if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+                entry.update({
+                    '_type': 'url_transparent',
+                    'url': formats[0]['url'],
+                })
+            else:
+                self._sort_formats(formats)
+                entry['formats'] = formats
+            entries.append(entry)
         if len(entries) == 1:
             return entries[0]
         else:
-- 
cgit v1.2.3


From 9211e3319e6006373d8b5055f7a3d0bbd734c57b Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 29 Oct 2017 07:05:55 +0700
Subject: [extractor/common] Prefix format id for audio only HLS formats

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a69240693..52f2055b5 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1401,7 +1401,7 @@ class InfoExtractor(object):
             media_url = media.get('URI')
             if media_url:
                 format_id = []
-                for v in (group_id, name):
+                for v in (m3u8_id, group_id, name):
                     if v:
                         format_id.append(v)
                 f = {
-- 
cgit v1.2.3


From 044eeb145556cb41485b6b6644f40b2161a4e0f1 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Wed, 1 Nov 2017 23:39:26 +0700
Subject: [extractor/common] Respect URL query in _extract_wowza_formats
 (closes #14645)

---
 youtube_dl/extractor/common.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 52f2055b5..a67ac4411 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2233,27 +2233,35 @@ class InfoExtractor(object):
         return formats
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+        query = compat_urlparse.urlparse(url).query
         url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
         url_base = self._search_regex(
             r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
         http_base_url = '%s:%s' % ('http', url_base)
         formats = []
+
+        def manifest_url(manifest):
+            m_url = '%s/%s' % (http_base_url, manifest)
+            if query:
+                m_url += '?%s' % query
+            return m_url
+
         if 'm3u8' not in skip_protocols:
             formats.extend(self._extract_m3u8_formats(
-                http_base_url + '/playlist.m3u8', video_id, 'mp4',
+                manifest_url('playlist.m3u8'), video_id, 'mp4',
                 m3u8_entry_protocol, m3u8_id='hls', fatal=False))
         if 'f4m' not in skip_protocols:
             formats.extend(self._extract_f4m_formats(
-                http_base_url + '/manifest.f4m',
+                manifest_url('manifest.f4m'),
                 video_id, f4m_id='hds', fatal=False))
         if 'dash' not in skip_protocols:
             formats.extend(self._extract_mpd_formats(
-                http_base_url + '/manifest.mpd',
+                manifest_url('manifest.mpd'),
                 video_id, mpd_id='dash', fatal=False))
         if re.search(r'(?:/smil:|\.smil)', url_base):
             if 'smil' not in skip_protocols:
                 rtmp_formats = self._extract_smil_formats(
-                    http_base_url + '/jwplayer.smil',
+                    manifest_url('jwplayer.smil'),
                     video_id, fatal=False)
                 for rtmp_format in rtmp_formats:
                     rtsp_format = rtmp_format.copy()
-- 
cgit v1.2.3


From 48107c198bd76e611e3d4c2486cdc5403829a05a Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 4 Nov 2017 22:10:55 +0700
Subject: [f4m] Prefer baseURL for relative URLs (closes #14660)

---
 youtube_dl/downloader/f4m.py   | 25 +++++++++++++++++--------
 youtube_dl/extractor/common.py | 14 +++++++-------
 2 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index c8fde9a89..fdb80f42a 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
                        media))
 
 
-def _add_ns(prop):
-    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+    return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+    base_url = xpath_text(
+        manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+        'base URL', default=None)
+    if base_url:
+        base_url = base_url.strip()
+    return base_url
 
 
 class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
             rate, media = list(filter(
                 lambda f: int(f[0]) == requested_bitrate, formats))[0]
 
-        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+        # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+        man_base_url = get_base_url(doc) or man_url
+
+        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        # From Adobe F4M 3.0 spec:
-        # The <baseURL> element SHALL be the base URL for all relative
-        # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
-        # URLs should be relative to the location of the containing document.
-        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(
+            bootstrap_node, man_base_url)
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a67ac4411..64fb869aa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -29,7 +29,10 @@ from ..compat import (
     compat_urlparse,
     compat_xml_parse_error,
 )
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+    get_base_url,
+    remove_encrypted_media,
+)
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
         media_nodes = remove_encrypted_media(media_nodes)
         if not media_nodes:
             return formats
-        base_url = xpath_text(
-            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
-            'base URL', default=None)
-        if base_url:
-            base_url = base_url.strip()
+
+        manifest_base_url = get_base_url(manifest)
 
         bootstrap_info = xpath_element(
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
                     continue
                 manifest_url = (
                     media_url if media_url.startswith('http://') or media_url.startswith('https://')
-                    else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+                    else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
                 # If media_url is itself a f4m manifest do the recursive extraction
                 # since bitrates in parent manifest (this one) and media_url manifest
                 # may differ leading to inability to resolve the format by requested
-- 
cgit v1.2.3


From 187ee66c941d9c397a46ffa490375e2c405500e5 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 4 Nov 2017 22:11:39 +0700
Subject: [extractor/common] Add protocol for f4m formats

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 64fb869aa..e2d9f52b0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1310,6 +1310,7 @@ class InfoExtractor(object):
                 'url': manifest_url,
                 'manifest_url': manifest_url,
                 'ext': 'flv' if bootstrap_info is not None else None,
+                'protocol': 'f4m',
                 'tbr': tbr,
                 'width': width,
                 'height': height,
-- 
cgit v1.2.3


From ea2295842f79c9efff3a9abce1d0eee7de4953d6 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Tue, 14 Nov 2017 17:41:30 +0100
Subject: [common] skip Apple FairPlay m3u8 manifests(closes #14741)

---
 youtube_dl/extractor/common.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e2d9f52b0..a9d68fc0c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1356,6 +1356,9 @@ class InfoExtractor(object):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
             return []
 
+        if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc):  # Apple FairPlay
+            return []
+
         formats = []
 
         format_url = lambda u: (
-- 
cgit v1.2.3


From f610dbb05f8d17cc95437958835a437c3777b38c Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 18 Nov 2017 19:02:56 +0700
Subject: [extractor/common] Use final URL when dumping request (closes #14769)

---
 youtube_dl/extractor/common.py | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a9d68fc0c..8e4ee0deb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -592,19 +592,11 @@ class InfoExtractor(object):
         if not encoding:
             encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
         if self._downloader.params.get('dump_intermediate_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            self.to_screen('Dumping request to ' + url)
+            self.to_screen('Dumping request to ' + urlh.geturl())
             dump = base64.b64encode(webpage_bytes).decode('ascii')
             self._downloader.to_screen(dump)
         if self._downloader.params.get('write_pages', False):
-            try:
-                url = url_or_request.get_full_url()
-            except AttributeError:
-                url = url_or_request
-            basen = '%s_%s' % (video_id, url)
+            basen = '%s_%s' % (video_id, urlh.geturl())
             if len(basen) > 240:
                 h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
                 basen = basen[:240 - len(h)] + h
-- 
cgit v1.2.3


From 41bf647e895faca42cdc2565ea034ed341593f8e Mon Sep 17 00:00:00 2001
From: Petr Novak <petr.novak@cosmoboy.cz>
Date: Sat, 25 Nov 2017 02:13:23 +0100
Subject: [extractor/common] Add support for DASH manifests with SegmentLists
 with bare SegmentURLs

---
 test/test_InfoExtractor.py      |  83 ++++++++++++++-
 test/testdata/mpd/urls_only.mpd | 218 ++++++++++++++++++++++++++++++++++++++++
 youtube_dl/extractor/common.py  |   9 ++
 3 files changed, 309 insertions(+), 1 deletion(-)
 create mode 100644 test/testdata/mpd/urls_only.mpd

(limited to 'youtube_dl/extractor/common.py')

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 7b95f883f..e58452ab5 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -562,7 +562,88 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'width': 1920,
                     'height': 1080,
                 }]
-            ),
+            ), (
+                'urls_only',
+                'http://unknown/manifest.mpd',
+                [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_144p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 200,
+                    'width': 256,
+                    'height': 144,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_240p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 400,
+                    'width': 424,
+                    'height': 240,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_360p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 800,
+                    'width': 640,
+                    'height': 360,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_480p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 1200,
+                    'width': 856,
+                    'height': 480,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_576p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 1600,
+                    'width': 1024,
+                    'height': 576,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_720p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 2400,
+                    'width': 1280,
+                    'height': 720,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'mp4',
+                    'format_id': 'h264_aac_1080p_m4s',
+                    'format_note': 'DASH video',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'avc3.42c01e',
+                    'tbr': 4400,
+                    'width': 1920,
+                    'height': 1080,
+                }]
+            )
         ]
 
         for mpd_file, mpd_url, expected_formats in _TEST_CASES:
diff --git a/test/testdata/mpd/urls_only.mpd b/test/testdata/mpd/urls_only.mpd
new file mode 100644
index 000000000..2b9d595d3
--- /dev/null
+++ b/test/testdata/mpd/urls_only.mpd
@@ -0,0 +1,218 @@
+<?xml version="1.0" ?>
+<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+  <Period duration="PT0H4M1.728S">
+    <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
+      <ContentComponent contentType="video" id="1"/>
+      <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+      <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
+        <SegmentList duration="10000" timescale="1000">
+          <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
+          <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
+        </SegmentList>
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8e4ee0deb..15999411b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1975,6 +1975,15 @@ class InfoExtractor(object):
                                     })
                                     segment_index += 1
                             representation_ms_info['fragments'] = fragments
+                        elif 'segment_urls' in representation_ms_info:
+                            # Segment URLs with no SegmentTimeline
+                            # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+                            fragments = []
+                            for segment_url in representation_ms_info['segment_urls']:
+                                fragments.append({
+                                    location_key(segment_url): segment_url,
+                                })
+                            representation_ms_info['fragments'] = fragments
                         # NB: MPD manifest may contain direct URLs to unfragmented media.
                         # No fragments key is present in this case.
                         if 'fragments' in representation_ms_info:
-- 
cgit v1.2.3


From 603fc4e0ea472c7c2a78ff201d69686a9e3fe1f2 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 2 Dec 2017 21:10:35 +0700
Subject: [extractor/common] Add durations for DASH fragments with bare
 SegmentURLs

---
 youtube_dl/extractor/common.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 15999411b..3baf683d8 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1979,10 +1979,16 @@ class InfoExtractor(object):
                             # Segment URLs with no SegmentTimeline
                             # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
                             fragments = []
+                            segment_duration = float_or_none(
+                                representation_ms_info['segment_duration'],
+                                representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
                             for segment_url in representation_ms_info['segment_urls']:
-                                fragments.append({
+                                fragment = {
                                     location_key(segment_url): segment_url,
-                                })
+                                }
+                                if segment_duration:
+                                    fragment['duration'] = segment_duration
+                                fragments.append(fragment)
                             representation_ms_info['fragments'] = fragments
                         # NB: MPD manifest may contain direct URLs to unfragmented media.
                         # No fragments key is present in this case.
-- 
cgit v1.2.3


From 78593e294cc4623108d17b1b0b2d26d507953006 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 2 Dec 2017 21:22:43 +0700
Subject: Add references for #14844

---
 test/test_InfoExtractor.py     | 1 +
 youtube_dl/extractor/common.py | 1 +
 2 files changed, 2 insertions(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index e58452ab5..8a372d2c9 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -563,6 +563,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'height': 1080,
                 }]
             ), (
+                # https://github.com/rg3/youtube-dl/pull/14844
                 'urls_only',
                 'http://unknown/manifest.mpd',
                 [{
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 3baf683d8..80a9c982f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1978,6 +1978,7 @@ class InfoExtractor(object):
                         elif 'segment_urls' in representation_ms_info:
                             # Segment URLs with no SegmentTimeline
                             # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+                            # https://github.com/rg3/youtube-dl/pull/14844
                             fragments = []
                             segment_duration = float_or_none(
                                 representation_ms_info['segment_duration'],
-- 
cgit v1.2.3


From c10c93238e6db0df8746fc185ca316b9d8ccece5 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 19 Dec 2017 03:51:03 +0700
Subject: [extractor/common] Introduce uploader, uploader_id and uploader_url
 meta fields for playlists (#11427, #15018)

---
 youtube_dl/extractor/common.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 80a9c982f..e5ef5e490 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -301,8 +301,9 @@ class InfoExtractor(object):
     There must be a key "entries", which is a list, an iterable, or a PagedList
     object, each element of which is a valid dictionary by this specification.
 
-    Additionally, playlists can have "title", "description" and "id" attributes
-    with the same semantics as videos (see above).
+    Additionally, playlists can have "id", "title", "description", "uploader",
+    "uploader_id", "uploader_url" attributes with the same semantics as videos
+    (see above).
 
 
     _type "multi_video" indicates that there are multiple videos that
-- 
cgit v1.2.3


From 2132edaa03857085821b6a1214ce1410e0c2e463 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 23 Dec 2017 20:57:35 +0700
Subject: [extractor/common] Move X-Forwarded-For setup code into
 _request_webpage

---
 youtube_dl/extractor/common.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e5ef5e490..3b79b8cb4 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -495,6 +495,16 @@ class InfoExtractor(object):
                 self.to_screen('%s' % (note,))
             else:
                 self.to_screen('%s: %s' % (video_id, note))
+
+        # Some sites check X-Forwarded-For HTTP header in order to figure out
+        # the origin of the client behind proxy. This allows bypassing geo
+        # restriction by faking this header's value to IP that belongs to some
+        # geo unrestricted country. We will do so once we encounter any
+        # geo restriction error.
+        if self._x_forwarded_for_ip:
+            if 'X-Forwarded-For' not in headers:
+                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
         if isinstance(url_or_request, compat_urllib_request.Request):
             url_or_request = update_Request(
                 url_or_request, data=data, headers=headers, query=query)
@@ -524,15 +534,6 @@ class InfoExtractor(object):
         if isinstance(url_or_request, (compat_str, str)):
             url_or_request = url_or_request.partition('#')[0]
 
-        # Some sites check X-Forwarded-For HTTP header in order to figure out
-        # the origin of the client behind proxy. This allows bypassing geo
-        # restriction by faking this header's value to IP that belongs to some
-        # geo unrestricted country. We will do so once we encounter any
-        # geo restriction error.
-        if self._x_forwarded_for_ip:
-            if 'X-Forwarded-For' not in headers:
-                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
-
         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
         if urlh is False:
             assert not fatal
-- 
cgit v1.2.3


From 9d6ac71c27b1dfb662c795ef598dbfd0286682da Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Fri, 29 Dec 2017 23:14:15 +0700
Subject: [extractor/common] Fix extraction of DASH formats with the same
 representation id (closes #15111)

---
 test/test_InfoExtractor.py     | 11 +++++++++++
 youtube_dl/extractor/common.py | 18 ++++++++----------
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 8a372d2c9..7b31d5198 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -493,9 +493,20 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         _TEST_CASES = [
             (
                 # https://github.com/rg3/youtube-dl/issues/13919
+                # Also tests duplicate representation ids, see
+                # https://github.com/rg3/youtube-dl/issues/15111
                 'float_duration',
                 'http://unknown/manifest.mpd',
                 [{
+                    'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'm4a',
+                    'format_id': '318597',
+                    'format_note': 'DASH audio',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 61.587,
+                }, {
                     'manifest_url': 'http://unknown/manifest.mpd',
                     'ext': 'mp4',
                     'format_id': '318597',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 3b79b8cb4..35d427eec 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2007,16 +2007,14 @@ class InfoExtractor(object):
                                     f['url'] = initialization_url
                                 f['fragments'].append({location_key(initialization_url): initialization_url})
                             f['fragments'].extend(representation_ms_info['fragments'])
-                        try:
-                            existing_format = next(
-                                fo for fo in formats
-                                if fo['format_id'] == representation_id)
-                        except StopIteration:
-                            full_info = formats_dict.get(representation_id, {}).copy()
-                            full_info.update(f)
-                            formats.append(full_info)
-                        else:
-                            existing_format.update(f)
+                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+                        # is not necessarily unique within a Period thus formats with
+                        # the same `format_id` are quite possible. There are numerous examples
+                        # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
+                        # https://github.com/rg3/youtube-dl/issues/13919)
+                        full_info = formats_dict.get(representation_id, {}).copy()
+                        full_info.update(f)
+                        formats.append(full_info)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
-- 
cgit v1.2.3


From 2501d41ef4b9ed0349cf4f9838e12873350e60d5 Mon Sep 17 00:00:00 2001
From: felix <felix.von.s@posteo.de>
Date: Sat, 12 Nov 2016 22:15:51 +0100
Subject: [common] use AACL as the default fourcc when AudioTag is 255

---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 35d427eec..5e7e7a3f7 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2054,7 +2054,7 @@ class InfoExtractor(object):
             stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
             stream_name = stream.get('Name')
             for track in stream.findall('QualityLevel'):
-                fourcc = track.get('FourCC')
+                fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                 # TODO: add support for WVC1 and WMAP
                 if fourcc not in ('H264', 'AVC1', 'AACL'):
                     self.report_warning('%s is not a supported codec' % fourcc)
-- 
cgit v1.2.3


From 126f225bcffbdc55725fe2272daf22489abacf54 Mon Sep 17 00:00:00 2001
From: Ondřej Caletka <ondrej@caletka.cz>
Date: Sat, 30 Dec 2017 22:02:46 +0100
Subject: [extractor/common] Add container meta field for formats extracted in
 _parse_mpd_formats

---
 youtube_dl/extractor/common.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 5e7e7a3f7..5b6a09c0b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1880,6 +1880,7 @@ class InfoExtractor(object):
                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
                             'format_note': 'DASH %s' % content_type,
                             'filesize': filesize,
+                            'container': mimetype2ext(mime_type) + '_dash',
                         }
                         f.update(parse_codecs(representation_attrib.get('codecs')))
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-- 
cgit v1.2.3