]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/jsinterp.py
b4617fbad0fc40323a129ce1218f9f97590c89bb
   1 from __future__ 
import unicode_literals
 
  11 class JSInterpreter(object): 
  12     def __init__(self
, code
): 
  17     def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=20): 
  18         if allow_recursion 
< 0: 
  19             raise ExtractorError('Recursion limit reached') 
  21         if stmt
.startswith('var '): 
  22             stmt 
= stmt
[len('var '):] 
  23         ass_m 
= re
.match(r
'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + 
  24                          r
'=(?P<expr>.*)$', stmt
) 
  26             if ass_m
.groupdict().get('index'): 
  28                     lvar 
= local_vars
[ass_m
.group('out')] 
  29                     idx 
= self
.interpret_expression( 
  30                         ass_m
.group('index'), local_vars
, allow_recursion
) 
  31                     assert isinstance(idx
, int) 
  34                 expr 
= ass_m
.group('expr') 
  37                     local_vars
[ass_m
.group('out')] = val
 
  39                 expr 
= ass_m
.group('expr') 
  40         elif stmt
.startswith('return '): 
  42             expr 
= stmt
[len('return '):] 
  44             # Try interpreting it as an expression 
  48         v 
= self
.interpret_expression(expr
, local_vars
, allow_recursion
) 
  51     def interpret_expression(self
, expr
, local_vars
, allow_recursion
): 
  56             return local_vars
[expr
] 
  59             return json
.loads(expr
) 
  64             r
'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', 
  67             variable 
= m
.group('var') 
  68             member 
= m
.group('member') 
  69             arg_str 
= m
.group('args') 
  71             if variable 
in local_vars
: 
  72                 obj 
= local_vars
[variable
] 
  74                 if variable 
not in self
._objects
: 
  75                     self
._objects
[variable
] = self
.extract_object(variable
) 
  76                 obj 
= self
._objects
[variable
] 
  80                 if member 
== 'length': 
  84             assert expr
.endswith(')') 
  90                     self
.interpret_expression(v
, local_vars
, allow_recursion
) 
  91                     for v 
in arg_str
.split(',')]) 
  94                 assert argvals 
== ('',) 
  97                 assert len(argvals
) == 1 
  98                 return argvals
[0].join(obj
) 
  99             if member 
== 'reverse': 
 100                 assert len(argvals
) == 0 
 103             if member 
== 'slice': 
 104                 assert len(argvals
) == 1 
 105                 return obj
[argvals
[0]:] 
 106             if member 
== 'splice': 
 107                 assert isinstance(obj
, list) 
 108                 index
, howMany 
= argvals
 
 110                 for i 
in range(index
, min(index 
+ howMany
, len(obj
))): 
 111                     res
.append(obj
.pop(index
)) 
 114             return obj
[member
](argvals
) 
 117             r
'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr
) 
 119             val 
= local_vars
[m
.group('in')] 
 120             idx 
= self
.interpret_expression( 
 121                 m
.group('idx'), local_vars
, allow_recursion 
- 1) 
 124         m 
= re
.match(r
'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr
) 
 126             a 
= self
.interpret_expression( 
 127                 m
.group('a'), local_vars
, allow_recursion
) 
 128             b 
= self
.interpret_expression( 
 129                 m
.group('b'), local_vars
, allow_recursion
) 
 133             r
'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr
) 
 135             fname 
= m
.group('func') 
 137                 int(v
) if v
.isdigit() else local_vars
[v
] 
 138                 for v 
in m
.group('args').split(',')]) 
 139             if fname 
not in self
._functions
: 
 140                 self
._functions
[fname
] = self
.extract_function(fname
) 
 141             return self
._functions
[fname
](argvals
) 
 142         raise ExtractorError('Unsupported JS expression %r' % expr
) 
 144     def extract_object(self
, objname
): 
 147             (r
'(?:var\s+)?%s\s*=\s*\{' % re
.escape(objname
)) + 
 148             r
'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + 
 151         fields 
= obj_m
.group('fields') 
 152         # Currently, it only supports function definitions 
 153         fields_m 
= re
.finditer( 
 154             r
'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function' 
 155             r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', 
 158             argnames 
= f
.group('args').split(',') 
 159             obj
[f
.group('key')] = self
.build_function(argnames
, f
.group('code')) 
 163     def extract_function(self
, funcname
): 
 165             (r
'(?:function %s|[{;]%s\s*=\s*function)' % ( 
 166                 re
.escape(funcname
), re
.escape(funcname
))) + 
 167             r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', 
 170             raise ExtractorError('Could not find JS function %r' % funcname
) 
 171         argnames 
= func_m
.group('args').split(',') 
 173         return self
.build_function(argnames
, func_m
.group('code')) 
 175     def build_function(self
, argnames
, code
): 
 177             local_vars 
= dict(zip(argnames
, args
)) 
 178             for stmt 
in code
.split(';'): 
 179                 res 
= self
.interpret_statement(stmt
, local_vars
)