]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/jsinterp.py
   1 from __future__ 
import unicode_literals
 
  16     ('>>', operator
.rshift
), 
  17     ('<<', operator
.lshift
), 
  21     ('/', operator
.truediv
), 
  24 _ASSIGN_OPERATORS 
= [(op 
+ '=', opfunc
) for op
, opfunc 
in _OPERATORS
] 
  25 _ASSIGN_OPERATORS
.append(('=', lambda cur
, right
: right
)) 
  27 _NAME_RE 
= r
'[a-zA-Z_$][a-zA-Z_$0-9]*' 
  30 class JSInterpreter(object): 
  31     def __init__(self
, code
, objects
=None): 
  36         self
._objects 
= objects
 
  38     def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=100): 
  39         if allow_recursion 
< 0: 
  40             raise ExtractorError('Recursion limit reached') 
  44         stmt_m 
= re
.match(r
'var\s', stmt
) 
  46             expr 
= stmt
[len(stmt_m
.group(0)):] 
  48             return_m 
= re
.match(r
'return(?:\s+|$)', stmt
) 
  50                 expr 
= stmt
[len(return_m
.group(0)):] 
  53                 # Try interpreting it as an expression 
  56         v 
= self
.interpret_expression(expr
, local_vars
, allow_recursion
) 
  57         return v
, should_abort
 
  59     def interpret_expression(self
, expr
, local_vars
, allow_recursion
): 
  61         if expr 
== '':  # Empty expression 
  64         if expr
.startswith('('): 
  66             for m 
in re
.finditer(r
'[()]', expr
): 
  72                         sub_expr 
= expr
[1:m
.start()] 
  73                         sub_result 
= self
.interpret_expression( 
  74                             sub_expr
, local_vars
, allow_recursion
) 
  75                         remaining_expr 
= expr
[m
.end():].strip() 
  76                         if not remaining_expr
: 
  79                             expr 
= json
.dumps(sub_result
) + remaining_expr
 
  82                 raise ExtractorError('Premature end of parens in %r' % expr
) 
  84         for op
, opfunc 
in _ASSIGN_OPERATORS
: 
  86                 (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])? 
  88                 (?P<expr>.*)$''' % (_NAME_RE
, re
.escape(op
)), expr
) 
  91             right_val 
= self
.interpret_expression( 
  92                 m
.group('expr'), local_vars
, allow_recursion 
- 1) 
  94             if m
.groupdict().get('index'): 
  95                 lvar 
= local_vars
[m
.group('out')] 
  96                 idx 
= self
.interpret_expression( 
  97                     m
.group('index'), local_vars
, allow_recursion
) 
  98                 assert isinstance(idx
, int) 
 100                 val 
= opfunc(cur
, right_val
) 
 104                 cur 
= local_vars
.get(m
.group('out')) 
 105                 val 
= opfunc(cur
, right_val
) 
 106                 local_vars
[m
.group('out')] = val
 
 113             r
'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE
, 
 116             return local_vars
[var_m
.group('name')] 
 119             return json
.loads(expr
) 
 124             r
'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE
, expr
) 
 126             val 
= local_vars
[m
.group('in')] 
 127             idx 
= self
.interpret_expression( 
 128                 m
.group('idx'), local_vars
, allow_recursion 
- 1) 
 132             r
'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE
, 
 135             variable 
= m
.group('var') 
 136             member 
= remove_quotes(m
.group('member') or m
.group('member2')) 
 137             arg_str 
= m
.group('args') 
 139             if variable 
in local_vars
: 
 140                 obj 
= local_vars
[variable
] 
 142                 if variable 
not in self
._objects
: 
 143                     self
._objects
[variable
] = self
.extract_object(variable
) 
 144                 obj 
= self
._objects
[variable
] 
 148                 if member 
== 'length': 
 152             assert expr
.endswith(')') 
 158                     self
.interpret_expression(v
, local_vars
, allow_recursion
) 
 159                     for v 
in arg_str
.split(',')]) 
 161             if member 
== 'split': 
 162                 assert argvals 
== ('',) 
 165                 assert len(argvals
) == 1 
 166                 return argvals
[0].join(obj
) 
 167             if member 
== 'reverse': 
 168                 assert len(argvals
) == 0 
 171             if member 
== 'slice': 
 172                 assert len(argvals
) == 1 
 173                 return obj
[argvals
[0]:] 
 174             if member 
== 'splice': 
 175                 assert isinstance(obj
, list) 
 176                 index
, howMany 
= argvals
 
 178                 for i 
in range(index
, min(index 
+ howMany
, len(obj
))): 
 179                     res
.append(obj
.pop(index
)) 
 182             return obj
[member
](argvals
) 
 184         for op
, opfunc 
in _OPERATORS
: 
 185             m 
= re
.match(r
'(?P<x>.+?)%s(?P<y>.+)' % re
.escape(op
), expr
) 
 188             x
, abort 
= self
.interpret_statement( 
 189                 m
.group('x'), local_vars
, allow_recursion 
- 1) 
 191                 raise ExtractorError( 
 192                     'Premature left-side return of %s in %r' % (op
, expr
)) 
 193             y
, abort 
= self
.interpret_statement( 
 194                 m
.group('y'), local_vars
, allow_recursion 
- 1) 
 196                 raise ExtractorError( 
 197                     'Premature right-side return of %s in %r' % (op
, expr
)) 
 201             r
'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE
, expr
) 
 203             fname 
= m
.group('func') 
 205                 int(v
) if v
.isdigit() else local_vars
[v
] 
 206                 for v 
in m
.group('args').split(',')]) if len(m
.group('args')) > 0 else tuple() 
 207             if fname 
not in self
._functions
: 
 208                 self
._functions
[fname
] = self
.extract_function(fname
) 
 209             return self
._functions
[fname
](argvals
) 
 211         raise ExtractorError('Unsupported JS expression %r' % expr
) 
 213     def extract_object(self
, objname
): 
 214         _FUNC_NAME_RE 
= r
'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' 
 218                 (?<!this\.)%s\s*=\s*{\s* 
 219                     (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) 
 221             ''' % (re
.escape(objname
), _FUNC_NAME_RE
), 
 223         fields 
= obj_m
.group('fields') 
 224         # Currently, it only supports function definitions 
 225         fields_m 
= re
.finditer( 
 227                 (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)} 
 231             argnames 
= f
.group('args').split(',') 
 232             obj
[remove_quotes(f
.group('key'))] = self
.build_function(argnames
, f
.group('code')) 
 236     def extract_function(self
, funcname
): 
 239                 (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* 
 240                 \((?P<args>[^)]*)\)\s* 
 241                 \{(?P<code>[^}]+)\}''' % ( 
 242                 re
.escape(funcname
), re
.escape(funcname
), re
.escape(funcname
)), 
 245             raise ExtractorError('Could not find JS function %r' % funcname
) 
 246         argnames 
= func_m
.group('args').split(',') 
 248         return self
.build_function(argnames
, func_m
.group('code')) 
 250     def call_function(self
, funcname
, *args
): 
 251         f 
= self
.extract_function(funcname
) 
 254     def build_function(self
, argnames
, code
): 
 256             local_vars 
= dict(zip(argnames
, args
)) 
 257             for stmt 
in code
.split(';'): 
 258                 res
, abort 
= self
.interpret_statement(stmt
, local_vars
)