diff options
author | dirkf <[email protected]> | 2024-12-07 03:39:44 +0000 |
---|---|---|
committer | dirkf <[email protected]> | 2024-12-16 12:38:51 +0000 |
commit | b4469a0f652c450a81901795a8f522ae5457a1cf (patch) | |
tree | 76d5cafbe8039ebe54a03a1fda7db50e94adb7ce /youtube_dl | |
parent | ce1e556b8fde56eb84bec1580e9f07f45e721c4f (diff) | |
download | youtube-dl-b4469a0f652c450a81901795a8f522ae5457a1cf.tar.gz youtube-dl-b4469a0f652c450a81901795a8f522ae5457a1cf.zip |
[YouTube] Handle player `3bb1f723`
* fix signature code extraction
* raise if n function returns input value
* add new tests from yt-dlp
Co-authored-by: bashonly
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6fe520e9a..d633032ae 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1579,19 +1579,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen('Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): + # Examples where `sig` is funcname: + # sig=function(a){a=a.split(""); ... ;return a.join("")}; + # ;c&&(c=sig(decodeURIComponent(c)),a.set(b,encodeURIComponent(c)));return a}; + # {var l=f,m=h.sp,n=sig(decodeURIComponent(h.s));l.set(m,encodeURIComponent(n))} + # sig=function(J){J=J.split(""); ... ;return J.join("")}; + # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J}; + # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))} funcname = self._search_regex( - (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', + (r'\b(?P<var>[a-zA-Z0-9$]+)&&\((?P=var)=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)', + r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P<arg>[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', + # Old patterns + r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)', - r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)', - r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', - r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') @@ -1658,6 +1665,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): func_name, idx = self._search_regex( + # (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}}; # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c) # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b) @@ -1666,7 +1674,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # older: (b=a.get("n"))&&(b=nfunc(b) r'''(?x) \((?:[\w$()\s]+,)*?\s* # ( - (?P<b>[a-z])\s*=\s* # b= + (?P<b>[a-zA-Z])\s*=\s* # b=, R= (?: (?: # expect ,c=a.get(b) (etc) String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| @@ -1679,7 +1687,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? (?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\)) | # ,c=a.get(b) - ,\s*(?P<c>[a-z])\s*=\s*[a-z]\s* + ,\s*(?P<c>[a-zA-Z])\s*=\s*[a-zA-Z]\s* (?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*? (?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\)) ) @@ -1697,15 +1705,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'''(?xs) (?:(?<=[^\w$])|^) # instead of \b, which ignores $ (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\) - \s*\{(?:(?!};).)+?["']enhanced_except_ + \s*\{(?:(?!};).)+?(?: + ["']enhanced_except_ | + return\s*(?P<q>"|')[a-zA-Z\d-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+ + ) ''', jscode, 'Initial JS player n function name', group='name') if not idx: return func_name - return self._parse_json(self._search_regex( - r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode, - 'Initial JS player n function list ({0}.{1})'.format(func_name, idx)), - func_name, transform_source=js_to_json)[int(idx)] + return self._search_json( + r'var\s+{0}\s*='.format(re.escape(func_name)), jscode, + 'Initial JS player n function list ({0}.{1})'.format(func_name, idx), + func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]', + transform_source=js_to_json)[int(idx)] def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) @@ -1734,7 +1746,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): except Exception as e: raise JSInterpreter.Exception(traceback.format_exc(), cause=e) - if ret.startswith('enhanced_except_'): + if ret.startswith('enhanced_except_') or ret.endswith(s): raise JSInterpreter.Exception('Signature function returned an exception') return ret |