diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 5121c8cf8..7e231f670 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -103,8 +103,6 @@ class TestJSInterpreter(unittest.TestCase): self.assertEqual(jsi.call_function('f'), -11) def test_comments(self): - 'Skipping: Not yet fully implemented' - return jsi = JSInterpreter(''' function x() { var x = /* 1 + */ 2; @@ -124,6 +122,9 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('f'), 3) + jsi = JSInterpreter('function f(){var x = 20; +/**/+x; return x;}') + self.assertEqual(jsi.call_function('f'), 20) + def test_precedence(self): jsi = JSInterpreter(''' function x() { diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 530a705b4..4b8458ca6 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -242,20 +242,66 @@ class JSInterpreter(object): flags |= cls._RE_FLAGS[ch] return flags, expr[idx + 1:] + @classmethod + def _decomment(cls, expr, comment_spans): + def code_spans(): + start = 0 + for comment_start, comment_end in comment_spans: + yield expr[start:comment_start] + start = comment_end + 1 + yield expr[start:] + return ' '.join(code_spans()) + @classmethod def _separate(cls, expr, delim=',', max_split=None, skip_delims=None): if not expr: return # collections.Counter() is ~10% slower in both 2.7 and 3.9 counters = {k: 0 for k in _MATCHING_PARENS.values()} - start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 + start, splits, pos, delim_len, skip = 0, 0, 0, len(delim) - 1, 0 in_quote, escaping, skipping = None, False, 0 + in_comment_block, in_comment_line, finishing_comment = False, False, False after_op, in_regex_char_group, skip_re = True, False, 0 + start_comment = None + comment_spans = [] for idx, char in enumerate(expr): + if skip > 0: + # If char is already handled in an earlier iteration, + # just skip is entirely this time round. + skip -= 1 + continue if skip_re > 0: skip_re -= 1 continue + if not in_quote: + if expr[idx:].startswith('//'): + in_comment_line = True + skip = 1 + start_comment = idx - start + continue + if expr[idx:].startswith('/*'): + in_comment_block = True + skip = 1 + start_comment = idx - start + continue + if in_comment_block: + if expr[idx:].startswith('*/'): + in_comment_block = False + finishing_comment = True + continue + if in_comment_line: + if char == '\n': + in_comment_line = False + comment_spans.append((start_comment, idx)) + start_comment = None + continue + if finishing_comment: + # Eat the final '/' of a '/* ... */' comment. + finishing_comment = False + comment_spans.append((start_comment, idx)) + start_comment = None + continue if not in_quote: if char in _MATCHING_PARENS: counters[_MATCHING_PARENS[char]] += 1 @@ -287,12 +333,12 @@ class JSInterpreter(object): if pos < delim_len: pos += 1 continue - yield expr[start: idx - delim_len] + yield cls._decomment(expr[start: idx - delim_len], comment_spans) start, pos = idx + 1, 0 splits += 1 if max_split and splits >= max_split: break - yield expr[start:] + yield cls._decomment(expr[start:], comment_spans) @classmethod def _separate_at_paren(cls, expr, delim=None):