]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/jsinterp.py
1 from __future__
import unicode_literals
11 class JSInterpreter(object):
12 def __init__(self
, code
):
17 def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=20):
18 if allow_recursion
< 0:
19 raise ExtractorError('Recursion limit reached')
21 if stmt
.startswith('var '):
22 stmt
= stmt
[len('var '):]
23 ass_m
= re
.match(r
'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
24 r
'=(?P<expr>.*)$', stmt
)
26 if ass_m
.groupdict().get('index'):
28 lvar
= local_vars
[ass_m
.group('out')]
29 idx
= self
.interpret_expression(
30 ass_m
.group('index'), local_vars
, allow_recursion
)
31 assert isinstance(idx
, int)
34 expr
= ass_m
.group('expr')
37 local_vars
[ass_m
.group('out')] = val
39 expr
= ass_m
.group('expr')
40 elif stmt
.startswith('return '):
42 expr
= stmt
[len('return '):]
44 # Try interpreting it as an expression
48 v
= self
.interpret_expression(expr
, local_vars
, allow_recursion
)
51 def interpret_expression(self
, expr
, local_vars
, allow_recursion
):
56 return local_vars
[expr
]
59 return json
.loads(expr
)
64 r
'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
67 variable
= m
.group('var')
68 member
= m
.group('member')
69 arg_str
= m
.group('args')
71 if variable
in local_vars
:
72 obj
= local_vars
[variable
]
74 if variable
not in self
._objects
:
75 self
._objects
[variable
] = self
.extract_object(variable
)
76 obj
= self
._objects
[variable
]
80 if member
== 'length':
84 assert expr
.endswith(')')
90 self
.interpret_expression(v
, local_vars
, allow_recursion
)
91 for v
in arg_str
.split(',')])
94 assert argvals
== ('',)
97 assert len(argvals
) == 1
98 return argvals
[0].join(obj
)
99 if member
== 'reverse':
100 assert len(argvals
) == 0
103 if member
== 'slice':
104 assert len(argvals
) == 1
105 return obj
[argvals
[0]:]
106 if member
== 'splice':
107 assert isinstance(obj
, list)
108 index
, howMany
= argvals
110 for i
in range(index
, min(index
+ howMany
, len(obj
))):
111 res
.append(obj
.pop(index
))
114 return obj
[member
](argvals
)
117 r
'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr
)
119 val
= local_vars
[m
.group('in')]
120 idx
= self
.interpret_expression(
121 m
.group('idx'), local_vars
, allow_recursion
- 1)
124 m
= re
.match(r
'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr
)
126 a
= self
.interpret_expression(
127 m
.group('a'), local_vars
, allow_recursion
)
128 b
= self
.interpret_expression(
129 m
.group('b'), local_vars
, allow_recursion
)
133 r
'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr
)
135 fname
= m
.group('func')
137 int(v
) if v
.isdigit() else local_vars
[v
]
138 for v
in m
.group('args').split(',')])
139 if fname
not in self
._functions
:
140 self
._functions
[fname
] = self
.extract_function(fname
)
141 return self
._functions
[fname
](argvals
)
142 raise ExtractorError('Unsupported JS expression %r' % expr
)
144 def extract_object(self
, objname
):
147 (r
'(?:var\s+)?%s\s*=\s*\{' % re
.escape(objname
)) +
148 r
'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
151 fields
= obj_m
.group('fields')
152 # Currently, it only supports function definitions
153 fields_m
= re
.finditer(
154 r
'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
155 r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
158 argnames
= f
.group('args').split(',')
159 obj
[f
.group('key')] = self
.build_function(argnames
, f
.group('code'))
163 def extract_function(self
, funcname
):
165 (r
'(?:function %s|[{;]%s\s*=\s*function)' % (
166 re
.escape(funcname
), re
.escape(funcname
))) +
167 r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
170 raise ExtractorError('Could not find JS function %r' % funcname
)
171 argnames
= func_m
.group('args').split(',')
173 return self
.build_function(argnames
, func_m
.group('code'))
175 def build_function(self
, argnames
, code
):
177 local_vars
= dict(zip(argnames
, args
))
178 for stmt
in code
.split(';'):
179 res
= self
.interpret_statement(stmt
, local_vars
)