From c1a03b1ac3a453508b358b6d1bc5d158cc80a0ce Mon Sep 17 00:00:00 2001 From: dirkf Date: Thu, 12 Dec 2024 00:00:32 +0000 Subject: [jsinterp] Fix and improve loose and strict equality operations * reimplement loose equality according to MDN (eg, 1 == "1") * improve strict equality (eg, "abc" === "abc" but 'abc' is not 'abc') * add tests for above --- test/test_jsinterp.py | 35 ++++++++++++++++++++-- youtube_dl/jsinterp.py | 80 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 101 insertions(+), 14 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index d063bbd36..86137d8e5 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import unicode_literals @@ -11,7 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import math import re -from youtube_dl.compat import compat_str +from youtube_dl.compat import compat_str as str from youtube_dl.jsinterp import JS_Undefined, JSInterpreter NaN = object() @@ -19,7 +20,7 @@ NaN = object() class TestJSInterpreter(unittest.TestCase): def _test(self, jsi_or_code, expected, func='f', args=()): - if isinstance(jsi_or_code, compat_str): + if isinstance(jsi_or_code, str): jsi_or_code = JSInterpreter(jsi_or_code) got = jsi_or_code.call_function(func, *args) if expected is NaN: @@ -89,7 +90,35 @@ class TestJSInterpreter(unittest.TestCase): self._test('function f(){return 19 & 21;}', 17) self._test('function f(){return 11 >> 2;}', 2) self._test('function f(){return []? 2+3: 4;}', 5) + # equality + self._test('function f(){return 1 == 1}', True) + self._test('function f(){return 1 == 1.0}', True) + self._test('function f(){return 1 == "1"}', True) self._test('function f(){return 1 == 2}', False) + self._test('function f(){return 1 != "1"}', False) + self._test('function f(){return 1 != 2}', True) + self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True) + self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False) + self._test('function f(){return NaN == NaN}', False) + self._test('function f(){return null == undefined}', True) + self._test('function f(){return "spam, eggs" == "spam, eggs"}', True) + # strict equality + self._test('function f(){return 1 === 1}', True) + self._test('function f(){return 1 === 1.0}', True) + self._test('function f(){return 1 === "1"}', False) + self._test('function f(){return 1 === 2}', False) + self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True) + self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False) + self._test('function f(){return NaN === NaN}', False) + self._test('function f(){return null === undefined}', False) + self._test('function f(){return null === null}', True) + self._test('function f(){return undefined === undefined}', True) + self._test('function f(){return "uninterned" === "uninterned"}', True) + self._test('function f(){return 1 === 1}', True) + self._test('function f(){return 1 === "1"}', False) + self._test('function f(){return 1 !== 1}', False) + self._test('function f(){return 1 !== "1"}', True) + # expressions self._test('function f(){return 0 && 1 || 2;}', 2) self._test('function f(){return 0 ?? 42;}', 0) self._test('function f(){return "life, the universe and everything" < 42;}', False) @@ -296,7 +325,7 @@ class TestJSInterpreter(unittest.TestCase): def test_undefined(self): self._test('function f() { return undefined === undefined; }', True) self._test('function f() { return undefined; }', JS_Undefined) - self._test('function f() {return undefined ?? 42; }', 42) + self._test('function f() { return undefined ?? 42; }', 42) self._test('function f() { let v; return v; }', JS_Undefined) self._test('function f() { let v; return v**0; }', 1) self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }', diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index acc68c533..b7a3ef69a 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -64,6 +65,10 @@ _NaN = float('nan') _Infinity = float('inf') +class JS_Undefined(object): + pass + + def _js_bit_op(op): def zeroise(x): @@ -107,12 +112,69 @@ def _js_exp(a, b): return (a or 0) ** b -def _js_eq_op(op): +def _js_to_primitive(v): + return ( + ','.join(map(_js_toString, v)) if isinstance(v, list) + else '[object Object]' if isinstance(v, dict) + else compat_str(v) if not isinstance(v, ( + compat_numeric_types, compat_basestring, bool)) + else v + ) + + +def _js_toString(v): + return ( + 'undefined' if v is JS_Undefined + else 'Infinity' if v == _Infinity + else 'NaN' if v is _NaN + else 'null' if v is None + else compat_str(v) if isinstance(v, compat_numeric_types) + else _js_to_primitive(v)) + + +_nullish = frozenset((None, JS_Undefined)) + + +def _js_eq(a, b): + # NaN != any + if _NaN in (a, b): + return False + # Object is Object + if isinstance(a, type(b)) and isinstance(b, (dict, list)): + return operator.is_(a, b) + # general case + if a == b: + return True + # null == undefined + a_b = set((a, b)) + if a_b & _nullish: + return a_b <= _nullish + a, b = _js_to_primitive(a), _js_to_primitive(b) + if not isinstance(a, compat_basestring): + a, b = b, a + # Number to String: convert the string to a number + # Conversion failure results in ... false + if isinstance(a, compat_basestring): + return float_or_none(a) == b + return a == b + + +def _js_neq(a, b): + return not _js_eq(a, b) + + +def _js_id_op(op): @wraps_op(op) def wrapped(a, b): - if set((a, b)) <= set((None, JS_Undefined)): - return op(a, a) + if _NaN in (a, b): + return op(_NaN, None) + if not isinstance(a, (compat_basestring, compat_numeric_types)): + a, b = b, a + # strings are === if == + # why 'a' is not 'a': https://stackoverflow.com/a/1504848 + if isinstance(a, (compat_basestring, compat_numeric_types)): + return a == b if op(0, 0) else a != b return op(a, b) return wrapped @@ -187,10 +249,10 @@ _OPERATORS = ( ) _COMP_OPERATORS = ( - ('===', operator.is_), - ('!==', operator.is_not), - ('==', _js_eq_op(operator.eq)), - ('!=', _js_eq_op(operator.ne)), + ('===', _js_id_op(operator.is_)), + ('!==', _js_id_op(operator.is_not)), + ('==', _js_eq), + ('!=', _js_neq), ('<=', _js_comp_op(operator.le)), ('>=', _js_comp_op(operator.ge)), ('<', _js_comp_op(operator.lt)), @@ -222,10 +284,6 @@ _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) _QUOTES = '\'"/' -class JS_Undefined(object): - pass - - class JS_Break(ExtractorError): def __init__(self): ExtractorError.__init__(self, 'Invalid break') -- cgit v1.2.3