aboutsummaryrefslogtreecommitdiffhomepage
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py38
1 files changed, 25 insertions, 13 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6fe520e9a..d633032ae 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1579,19 +1579,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.to_screen('Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
+ # Examples where `sig` is funcname:
+ # sig=function(a){a=a.split(""); ... ;return a.join("")};
+ # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a};
+ # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))}
+ # sig=function(J){J=J.split(""); ... ;return J.join("")};
+ # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J};
+ # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))}
funcname = self._search_regex(
- (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ (r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)',
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
+ # Old patterns
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
- r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
- r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
- r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
# Obsolete patterns
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
@@ -1658,6 +1665,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
+ # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
@@ -1666,7 +1674,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x)
\((?:[\w$()\s]+,)*?\s* # (
- (?P<b>[a-z])\s*=\s* # b=
+ (?P<b>[a-zA-Z])\s*=\s* # b=, R=
(?:
(?: # expect ,c=a.get(b) (etc)
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
@@ -1679,7 +1687,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
- ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
+ ,\s*(?P<c>[a-zA-Z])\s*=\s*[a-zA-Z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
@@ -1697,15 +1705,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
- \s*\{(?:(?!};).)+?["']enhanced_except_
+ \s*\{(?:(?!};).)+?(?:
+ ["']enhanced_except_ |
+ return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+
+ )
''', jscode, 'Initial JS player n function name', group='name')
if not idx:
return func_name
- return self._parse_json(self._search_regex(
- r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
- 'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
- func_name, transform_source=js_to_json)[int(idx)]
+ return self._search_json(
+ r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
+ 'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
+ func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
+ transform_source=js_to_json)[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
@@ -1734,7 +1746,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except Exception as e:
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
- if ret.startswith('enhanced_except_'):
+ if ret.startswith('enhanced_except_') or ret.endswith(s):
raise JSInterpreter.Exception('Signature function returned an exception')
return ret