]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/jsinterp.py
1 from __future__
import unicode_literals
16 ('>>', operator
.rshift
),
17 ('<<', operator
.lshift
),
21 ('/', operator
.truediv
),
24 _ASSIGN_OPERATORS
= [(op
+ '=', opfunc
) for op
, opfunc
in _OPERATORS
]
25 _ASSIGN_OPERATORS
.append(('=', lambda cur
, right
: right
))
27 _NAME_RE
= r
'[a-zA-Z_$][a-zA-Z_$0-9]*'
30 class JSInterpreter(object):
31 def __init__(self
, code
, objects
=None):
36 self
._objects
= objects
38 def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=100):
39 if allow_recursion
< 0:
40 raise ExtractorError('Recursion limit reached')
44 stmt_m
= re
.match(r
'var\s', stmt
)
46 expr
= stmt
[len(stmt_m
.group(0)):]
48 return_m
= re
.match(r
'return(?:\s+|$)', stmt
)
50 expr
= stmt
[len(return_m
.group(0)):]
53 # Try interpreting it as an expression
56 v
= self
.interpret_expression(expr
, local_vars
, allow_recursion
)
57 return v
, should_abort
59 def interpret_expression(self
, expr
, local_vars
, allow_recursion
):
61 if expr
== '': # Empty expression
64 if expr
.startswith('('):
66 for m
in re
.finditer(r
'[()]', expr
):
72 sub_expr
= expr
[1:m
.start()]
73 sub_result
= self
.interpret_expression(
74 sub_expr
, local_vars
, allow_recursion
)
75 remaining_expr
= expr
[m
.end():].strip()
76 if not remaining_expr
:
79 expr
= json
.dumps(sub_result
) + remaining_expr
82 raise ExtractorError('Premature end of parens in %r' % expr
)
84 for op
, opfunc
in _ASSIGN_OPERATORS
:
86 (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
88 (?P<expr>.*)$''' % (_NAME_RE
, re
.escape(op
)), expr
)
91 right_val
= self
.interpret_expression(
92 m
.group('expr'), local_vars
, allow_recursion
- 1)
94 if m
.groupdict().get('index'):
95 lvar
= local_vars
[m
.group('out')]
96 idx
= self
.interpret_expression(
97 m
.group('index'), local_vars
, allow_recursion
)
98 assert isinstance(idx
, int)
100 val
= opfunc(cur
, right_val
)
104 cur
= local_vars
.get(m
.group('out'))
105 val
= opfunc(cur
, right_val
)
106 local_vars
[m
.group('out')] = val
113 r
'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE
,
116 return local_vars
[var_m
.group('name')]
119 return json
.loads(expr
)
124 r
'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE
, expr
)
126 val
= local_vars
[m
.group('in')]
127 idx
= self
.interpret_expression(
128 m
.group('idx'), local_vars
, allow_recursion
- 1)
132 r
'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE
,
135 variable
= m
.group('var')
136 member
= remove_quotes(m
.group('member') or m
.group('member2'))
137 arg_str
= m
.group('args')
139 if variable
in local_vars
:
140 obj
= local_vars
[variable
]
142 if variable
not in self
._objects
:
143 self
._objects
[variable
] = self
.extract_object(variable
)
144 obj
= self
._objects
[variable
]
148 if member
== 'length':
152 assert expr
.endswith(')')
158 self
.interpret_expression(v
, local_vars
, allow_recursion
)
159 for v
in arg_str
.split(',')])
161 if member
== 'split':
162 assert argvals
== ('',)
165 assert len(argvals
) == 1
166 return argvals
[0].join(obj
)
167 if member
== 'reverse':
168 assert len(argvals
) == 0
171 if member
== 'slice':
172 assert len(argvals
) == 1
173 return obj
[argvals
[0]:]
174 if member
== 'splice':
175 assert isinstance(obj
, list)
176 index
, howMany
= argvals
178 for i
in range(index
, min(index
+ howMany
, len(obj
))):
179 res
.append(obj
.pop(index
))
182 return obj
[member
](argvals
)
184 for op
, opfunc
in _OPERATORS
:
185 m
= re
.match(r
'(?P<x>.+?)%s(?P<y>.+)' % re
.escape(op
), expr
)
188 x
, abort
= self
.interpret_statement(
189 m
.group('x'), local_vars
, allow_recursion
- 1)
191 raise ExtractorError(
192 'Premature left-side return of %s in %r' % (op
, expr
))
193 y
, abort
= self
.interpret_statement(
194 m
.group('y'), local_vars
, allow_recursion
- 1)
196 raise ExtractorError(
197 'Premature right-side return of %s in %r' % (op
, expr
))
201 r
'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE
, expr
)
203 fname
= m
.group('func')
205 int(v
) if v
.isdigit() else local_vars
[v
]
206 for v
in m
.group('args').split(',')]) if len(m
.group('args')) > 0 else tuple()
207 if fname
not in self
._functions
:
208 self
._functions
[fname
] = self
.extract_function(fname
)
209 return self
._functions
[fname
](argvals
)
211 raise ExtractorError('Unsupported JS expression %r' % expr
)
213 def extract_object(self
, objname
):
214 _FUNC_NAME_RE
= r
'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
218 (?<!this\.)%s\s*=\s*{\s*
219 (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
221 ''' % (re
.escape(objname
), _FUNC_NAME_RE
),
223 fields
= obj_m
.group('fields')
224 # Currently, it only supports function definitions
225 fields_m
= re
.finditer(
227 (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
231 argnames
= f
.group('args').split(',')
232 obj
[remove_quotes(f
.group('key'))] = self
.build_function(argnames
, f
.group('code'))
236 def extract_function(self
, funcname
):
239 (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
240 \((?P<args>[^)]*)\)\s*
241 \{(?P<code>[^}]+)\}''' % (
242 re
.escape(funcname
), re
.escape(funcname
), re
.escape(funcname
)),
245 raise ExtractorError('Could not find JS function %r' % funcname
)
246 argnames
= func_m
.group('args').split(',')
248 return self
.build_function(argnames
, func_m
.group('code'))
250 def call_function(self
, funcname
, *args
):
251 f
= self
.extract_function(funcname
)
254 def build_function(self
, argnames
, code
):
256 local_vars
= dict(zip(argnames
, args
))
257 for stmt
in code
.split(';'):
258 res
, abort
= self
.interpret_statement(stmt
, local_vars
)