aboutsummaryrefslogtreecommitdiffhomepage
path: root/youtube_dl/jsinterp.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/jsinterp.py')
-rw-r--r--youtube_dl/jsinterp.py92
1 files changed, 51 insertions, 41 deletions
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index a8456ec1c..08726e478 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -5,7 +5,6 @@ import json
import math
import operator
import re
-from collections import Counter
from .utils import (
error_to_compat_str,
@@ -15,6 +14,7 @@ from .utils import (
unified_timestamp,
)
from .compat import (
+ compat_basestring,
compat_collections_chain_map as ChainMap,
compat_itertools_zip_longest as zip_longest,
compat_str,
@@ -76,6 +76,10 @@ def _js_comp_op(op):
def wrapped(a, b):
if JS_Undefined in (a, b):
return False
+ if isinstance(a, compat_basestring):
+ b = compat_str(b or 0)
+ elif isinstance(b, compat_basestring):
+ a = compat_str(a or 0)
return op(a or 0, b or 0)
return wrapped
@@ -195,7 +199,6 @@ class JSInterpreter(object):
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
}
- _EXC_NAME = '__youtube_dl_exception__'
_OBJ_NAME = '__youtube_dl_jsinterp_obj'
OP_CHARS = None
@@ -242,9 +245,8 @@ class JSInterpreter(object):
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
if not expr:
return
- # collections.Counter() is ~10% slower
+ # collections.Counter() is ~10% slower in both 2.7 and 3.9
counters = {k: 0 for k in _MATCHING_PARENS.values()}
- # counters = Counter()
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, skipping = None, False, 0
after_op, in_regex_char_group, skip_re = True, False, 0
@@ -291,7 +293,9 @@ class JSInterpreter(object):
yield expr[start:]
@classmethod
- def _separate_at_paren(cls, expr, delim):
+ def _separate_at_paren(cls, expr, delim=None):
+ if delim is None:
+ delim = expr and _MATCHING_PARENS[expr[0]]
separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2:
@@ -376,7 +380,7 @@ class JSInterpreter(object):
if expr.startswith('new '):
obj = expr[4:]
if obj.startswith('Date('):
- left, right = self._separate_at_paren(obj[4:], ')')
+ left, right = self._separate_at_paren(obj[4:])
expr = unified_timestamp(
self.interpret_expression(left, local_vars, allow_recursion), False)
if not expr:
@@ -390,7 +394,7 @@ class JSInterpreter(object):
return None, should_return
if expr.startswith('{'):
- inner, outer = self._separate_at_paren(expr, '}')
+ inner, outer = self._separate_at_paren(expr)
# try for object expression (Map)
sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
@@ -406,7 +410,7 @@ class JSInterpreter(object):
expr = self._dump(inner, local_vars) + outer
if expr.startswith('('):
- inner, outer = self._separate_at_paren(expr, ')')
+ inner, outer = self._separate_at_paren(expr)
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
if not outer or should_abort:
return inner, should_abort or should_return
@@ -414,57 +418,63 @@ class JSInterpreter(object):
expr = self._dump(inner, local_vars) + outer
if expr.startswith('['):
- inner, outer = self._separate_at_paren(expr, ']')
+ inner, outer = self._separate_at_paren(expr)
name = self._named_object(local_vars, [
self.interpret_expression(item, local_vars, allow_recursion)
for item in self._separate(inner)])
expr = name + outer
m = re.match(r'''(?x)
- (?P<try>try|finally)\s*|
- (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
- (?P<switch>switch)\s*\(|
- (?P<for>for)\s*\(|
- '''.format(**globals()), expr)
+ (?P<try>try)\s*\{|
+ (?P<switch>switch)\s*\(|
+ (?P<for>for)\s*\(
+ ''', expr)
md = m.groupdict() if m else {}
if md.get('try'):
- if expr[m.end()] == '{':
- try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
- else:
- try_expr, expr = expr[m.end() - 1:], ''
+ try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ err = None
try:
ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
if should_abort:
return ret, True
- except JS_Throw as e:
- local_vars[self._EXC_NAME] = e.error
except Exception as e:
# XXX: This works for now, but makes debugging future issues very hard
- local_vars[self._EXC_NAME] = e
- ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
- return ret, should_abort or should_return
-
- elif md.get('catch'):
-
- catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
- if self._EXC_NAME in local_vars:
- catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
- ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
+ err = e
+
+ pending = (None, False)
+ m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+ if m:
+ sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ if err:
+ catch_vars = {}
+ if m.group('err'):
+ catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
+ catch_vars = local_vars.new_child(m=catch_vars)
+ err = None
+ pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
+
+ m = re.match(r'finally\s*\{', expr)
+ if m:
+ sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
if should_abort:
return ret, True
- ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
+ ret, should_abort = pending
+ if should_abort:
+ return ret, True
- return ret, should_abort or should_return
+ if err:
+ raise err
elif md.get('for'):
- constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+ constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
if remaining.startswith('{'):
- body, expr = self._separate_at_paren(remaining, '}')
+ body, expr = self._separate_at_paren(remaining)
else:
switch_m = re.match(r'switch\s*\(', remaining) # FIXME
if switch_m:
- switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
+ switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
body, expr = self._separate_at_paren(remaining, '}')
body = 'switch(%s){%s}' % (switch_val, body)
else:
@@ -483,11 +493,9 @@ class JSInterpreter(object):
except JS_Continue:
pass
self.interpret_expression(increment, local_vars, allow_recursion)
- ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
- return ret, should_abort or should_return
elif md.get('switch'):
- switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
+ switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
body, expr = self._separate_at_paren(remaining, '}')
items = body.replace('default:', 'case default:').split('case ')[1:]
@@ -510,6 +518,8 @@ class JSInterpreter(object):
break
if matched:
break
+
+ if md:
ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
return ret, should_abort or should_return
@@ -618,7 +628,7 @@ class JSInterpreter(object):
member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
arg_str = expr[m.end():]
if arg_str.startswith('('):
- arg_str, remaining = self._separate_at_paren(arg_str, ')')
+ arg_str, remaining = self._separate_at_paren(arg_str)
else:
arg_str, remaining = None, arg_str
@@ -795,7 +805,7 @@ class JSInterpreter(object):
\((?P<args>[^)]*)\)\s*
(?P<code>{.+})''' % {'name': re.escape(funcname)},
self.code)
- code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match
+ code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
if func_m is None:
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
return self.build_arglist(func_m.group('args')), code
@@ -810,7 +820,7 @@ class JSInterpreter(object):
if mobj is None:
break
start, body_start = mobj.span()
- body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+ body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(
local_vars,
self.extract_function_from_code(