summaryrefslogtreecommitdiffhomepage
path: root/youtube_dl/jsinterp.py
diff options
context:
space:
mode:
authorPhilipp Hagemeister <[email protected]>2014-03-30 07:02:58 +0200
committerPhilipp Hagemeister <[email protected]>2014-03-30 07:02:58 +0200
commit2b25cb5d7693b62736d4cdfa656289cc429c4c81 (patch)
tree3604fbd6cf32550b33fc826c03d5d9af753bc5c0 /youtube_dl/jsinterp.py
parent62fec3b2fffd12949da6fe057ce08d5bab2b7db5 (diff)
downloadyoutube-dl-2b25cb5d7693b62736d4cdfa656289cc429c4c81.tar.gz
youtube-dl-2b25cb5d7693b62736d4cdfa656289cc429c4c81.zip
[youtube] Move JavaScript interpreter into its own module
Diffstat (limited to 'youtube_dl/jsinterp.py')
-rw-r--r--youtube_dl/jsinterp.py113
1 files changed, 113 insertions, 0 deletions
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
new file mode 100644
index 000000000..129a4027b
--- /dev/null
+++ b/youtube_dl/jsinterp.py
@@ -0,0 +1,113 @@
+from __future__ import unicode_literals
+
+import re
+
+from .utils import (
+ ExtractorError,
+)
+
+
+class JSInterpreter(object):
+ def __init__(self, code):
+ self.code = code
+ self._functions = {}
+
+ def interpret_statement(self, stmt, local_vars, allow_recursion=20):
+ if allow_recursion < 0:
+ raise ExtractorError('Recursion limit reached')
+
+ if stmt.startswith('var '):
+ stmt = stmt[len('var '):]
+ ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
+ r'=(?P<expr>.*)$', stmt)
+ if ass_m:
+ if ass_m.groupdict().get('index'):
+ def assign(val):
+ lvar = local_vars[ass_m.group('out')]
+ idx = self.interpret_expression(
+ ass_m.group('index'), local_vars, allow_recursion)
+ assert isinstance(idx, int)
+ lvar[idx] = val
+ return val
+ expr = ass_m.group('expr')
+ else:
+ def assign(val):
+ local_vars[ass_m.group('out')] = val
+ return val
+ expr = ass_m.group('expr')
+ elif stmt.startswith('return '):
+ assign = lambda v: v
+ expr = stmt[len('return '):]
+ else:
+ raise ExtractorError(
+ 'Cannot determine left side of statement in %r' % stmt)
+
+ v = self.interpret_expression(expr, local_vars, allow_recursion)
+ return assign(v)
+
+ def interpret_expression(self, expr, local_vars, allow_recursion):
+ if expr.isdigit():
+ return int(expr)
+
+ if expr.isalpha():
+ return local_vars[expr]
+
+ m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+ if m:
+ member = m.group('member')
+ val = local_vars[m.group('in')]
+ if member == 'split("")':
+ return list(val)
+ if member == 'join("")':
+ return u''.join(val)
+ if member == 'length':
+ return len(val)
+ if member == 'reverse()':
+ return val[::-1]
+ slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
+ if slice_m:
+ idx = self.interpret_expression(
+ slice_m.group('idx'), local_vars, allow_recursion - 1)
+ return val[idx:]
+
+ m = re.match(
+ r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
+ if m:
+ val = local_vars[m.group('in')]
+ idx = self.interpret_expression(
+ m.group('idx'), local_vars, allow_recursion - 1)
+ return val[idx]
+
+ m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
+ if m:
+ a = self.interpret_expression(
+ m.group('a'), local_vars, allow_recursion)
+ b = self.interpret_expression(
+ m.group('b'), local_vars, allow_recursion)
+ return a % b
+
+ m = re.match(
+ r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
+ if m:
+ fname = m.group('func')
+ if fname not in self._functions:
+ self._functions[fname] = self.extract_function(fname)
+ argvals = [int(v) if v.isdigit() else local_vars[v]
+ for v in m.group('args').split(',')]
+ return self._functions[fname](argvals)
+ raise ExtractorError('Unsupported JS expression %r' % expr)
+
+ def extract_function(self, funcname):
+ func_m = re.search(
+ r'function ' + re.escape(funcname) +
+ r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+ self.code)
+ argnames = func_m.group('args').split(',')
+
+ def resf(args):
+ local_vars = dict(zip(argnames, args))
+ for stmt in func_m.group('code').split(';'):
+ res = self.interpret_statement(stmt, local_vars)
+ return res
+ return resf
+