aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authordirkf <[email protected]>2024-02-21 00:09:48 +0000
committerdirkf <[email protected]>2024-03-08 13:03:42 +0000
commitf66372403fd9e1661199fea100ba2600fa9697b2 (patch)
tree4e99da07e86adb54cca8d33c0aa4f85b581c3b44
parent7216fa2ac4706e099ea2ad9a04fe7bf4300bc745 (diff)
downloadyoutube-dl-f66372403fd9e1661199fea100ba2600fa9697b2.tar.gz
youtube-dl-f66372403fd9e1661199fea100ba2600fa9697b2.zip
[InfoExtractor] Rework and improve JWPlayer extraction
* use traverse_obj() and _search_json() * support playlist `.load({**video1},{**video2}, ...)` * support transform_source=... for _extract_jwplayer_data()
-rw-r--r--youtube_dl/extractor/common.py55
1 files changed, 22 insertions, 33 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index b5e95a318..7fae9e57b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -3021,25 +3021,22 @@ class InfoExtractor(object):
return formats
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
- mobj = re.search(
- r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
- webpage)
- if mobj:
- try:
- jwplayer_data = self._parse_json(mobj.group('options'),
- video_id=video_id,
- transform_source=transform_source)
- except ExtractorError:
- pass
- else:
- if isinstance(jwplayer_data, dict):
- return jwplayer_data
+ return self._search_json(
+ r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
+ webpage, 'JWPlayer data', video_id,
+ # must be a {...} or sequence, ending
+ contains_pattern=r'\{[\s\S]*}(?(load)(?:\s*,\s*\{[\s\S]*})*)', end_pattern=r'(?(load)\]|\))',
+ transform_source=transform_source, default=None)
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
- jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
- return self._parse_jwplayer_data(
- jwplayer_data, video_id, *args, **kwargs)
+
+ # allow passing `transform_source` through to _find_jwplayer_data()
+ transform_source = kwargs.pop('transform_source', None)
+ kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
+
+ jwplayer_data = self._find_jwplayer_data(webpage, video_id, **kwfind)
+
+ return self._parse_jwplayer_data(jwplayer_data, video_id, *args, **kwargs)
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
@@ -3073,22 +3070,14 @@ class InfoExtractor(object):
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
subtitles = {}
- tracks = video_data.get('tracks')
- if tracks and isinstance(tracks, list):
- for track in tracks:
- if not isinstance(track, dict):
- continue
- track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
- continue
- if track_kind.lower() not in ('captions', 'subtitles'):
- continue
- track_url = urljoin(base_url, track.get('file'))
- if not track_url:
- continue
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track_url)
- })
+ for track in traverse_obj(video_data, (
+ 'tracks', lambda _, t: t.get('kind').lower() in ('captions', 'subtitles'))):
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
entry = {
'id': this_video_id,