X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/a6abd8dc822fb70852437ab5a77ced5f633739f8..4231113a7be907539052ce59df44e23c54d21d38:/youtube_dl/jsinterp.py diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 453e273..7bda596 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -6,6 +6,7 @@ import re from .utils import ( ExtractorError, + remove_quotes, ) _OPERATORS = [ @@ -30,13 +31,10 @@ class JSInterpreter(object): def __init__(self, code, objects=None): if objects is None: objects = {} - self.code = self._remove_comments(code) + self.code = code self._functions = {} self._objects = objects - def _remove_comments(self, code): - return re.sub(r'(?s)/\*.*?\*/', '', code) - def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') @@ -60,7 +58,6 @@ class JSInterpreter(object): def interpret_expression(self, expr, local_vars, allow_recursion): expr = expr.strip() - if expr == '': # Empty expression return None @@ -124,11 +121,19 @@ class JSInterpreter(object): pass m = re.match( - r'(?P%s)\.(?P[^(]+)(?:\(+(?P[^()]*)\))?$' % _NAME_RE, + r'(?P%s)\[(?P.+)\]$' % _NAME_RE, expr) + if m: + val = local_vars[m.group('in')] + idx = self.interpret_expression( + m.group('idx'), local_vars, allow_recursion - 1) + return val[idx] + + m = re.match( + r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*(?:\(+(?P[^()]*)\))?$' % _NAME_RE, expr) if m: variable = m.group('var') - member = m.group('member') + member = remove_quotes(m.group('member') or m.group('member2')) arg_str = m.group('args') if variable in local_vars: @@ -176,14 +181,6 @@ class JSInterpreter(object): return obj[member](argvals) - m = re.match( - r'(?P%s)\[(?P.+)\]$' % _NAME_RE, expr) - if m: - val = local_vars[m.group('in')] - idx = self.interpret_expression( - m.group('idx'), local_vars, allow_recursion - 1) - return val[idx] - for op, opfunc in _OPERATORS: m = re.match(r'(?P.+?)%s(?P.+)' % re.escape(op), expr) if not m: @@ -201,12 +198,12 @@ class JSInterpreter(object): return opfunc(x, y) m = re.match( - r'^(?P%s)\((?P[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr) + r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) + for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() if fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) @@ -214,31 +211,35 @@ class JSInterpreter(object): raise ExtractorError('Unsupported JS expression %r' % expr) def extract_object(self, objname): + _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' obj = {} obj_m = re.search( - (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + - r'\s*(?P([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + - r'\}\s*;', + r'''(?x) + (?(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) + }\s*; + ''' % (re.escape(objname), _FUNC_NAME_RE), self.code) fields = obj_m.group('fields') # Currently, it only supports function definitions fields_m = re.finditer( - r'(?P[a-zA-Z$0-9]+)\s*:\s*function' - r'\((?P[a-z,]+)\){(?P[^}]+)}', + r'''(?x) + (?P%s)\s*:\s*function\s*\((?P[a-z,]+)\){(?P[^}]+)} + ''' % _FUNC_NAME_RE, fields) for f in fields_m: argnames = f.group('args').split(',') - obj[f.group('key')] = self.build_function(argnames, f.group('code')) + obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) return obj def extract_function(self, funcname): func_m = re.search( r'''(?x) - (?:function\s+%s|[{;]%s\s*=\s*function)\s* + (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* \((?P[^)]*)\)\s* \{(?P[^}]+)\}''' % ( - re.escape(funcname), re.escape(funcname)), + re.escape(funcname), re.escape(funcname), re.escape(funcname)), self.code) if func_m is None: raise ExtractorError('Could not find JS function %r' % funcname)