]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/jsinterp.py
ae5bca2e643f1ec4d719e8e895b0a655a98a151a
1 from __future__
import unicode_literals
10 class JSInterpreter(object):
11 def __init__(self
, code
):
16 def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=20):
17 if allow_recursion
< 0:
18 raise ExtractorError('Recursion limit reached')
20 if stmt
.startswith('var '):
21 stmt
= stmt
[len('var '):]
22 ass_m
= re
.match(r
'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
23 r
'=(?P<expr>.*)$', stmt
)
25 if ass_m
.groupdict().get('index'):
27 lvar
= local_vars
[ass_m
.group('out')]
28 idx
= self
.interpret_expression(
29 ass_m
.group('index'), local_vars
, allow_recursion
)
30 assert isinstance(idx
, int)
33 expr
= ass_m
.group('expr')
36 local_vars
[ass_m
.group('out')] = val
38 expr
= ass_m
.group('expr')
39 elif stmt
.startswith('return '):
41 expr
= stmt
[len('return '):]
44 'Cannot determine left side of statement in %r' % stmt
)
46 v
= self
.interpret_expression(expr
, local_vars
, allow_recursion
)
49 def interpret_expression(self
, expr
, local_vars
, allow_recursion
):
54 return local_vars
[expr
]
56 m
= re
.match(r
'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr
)
58 member
= m
.group('member')
59 variable
= m
.group('in')
61 if variable
not in local_vars
:
62 if variable
not in self
._objects
:
63 self
._objects
[variable
] = self
.extract_object(variable
)
64 obj
= self
._objects
[variable
]
65 key
, args
= member
.split('(', 1)
66 args
= args
.strip(')')
67 argvals
= [int(v
) if v
.isdigit() else local_vars
[v
]
68 for v
in args
.split(',')]
69 return obj
[key
](argvals
)
71 val
= local_vars
[variable
]
72 if member
== 'split("")':
74 if member
== 'join("")':
76 if member
== 'length':
78 if member
== 'reverse()':
80 slice_m
= re
.match(r
'slice\((?P<idx>.*)\)', member
)
82 idx
= self
.interpret_expression(
83 slice_m
.group('idx'), local_vars
, allow_recursion
- 1)
87 r
'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr
)
89 val
= local_vars
[m
.group('in')]
90 idx
= self
.interpret_expression(
91 m
.group('idx'), local_vars
, allow_recursion
- 1)
94 m
= re
.match(r
'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr
)
96 a
= self
.interpret_expression(
97 m
.group('a'), local_vars
, allow_recursion
)
98 b
= self
.interpret_expression(
99 m
.group('b'), local_vars
, allow_recursion
)
103 r
'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr
)
105 fname
= m
.group('func')
106 if fname
not in self
._functions
:
107 self
._functions
[fname
] = self
.extract_function(fname
)
108 argvals
= [int(v
) if v
.isdigit() else local_vars
[v
]
109 for v
in m
.group('args').split(',')]
110 return self
._functions
[fname
](argvals
)
111 raise ExtractorError('Unsupported JS expression %r' % expr
)
113 def extract_object(self
, objname
):
116 (r
'(?:var\s+)?%s\s*=\s*\{' % re
.escape(objname
)) +
117 r
'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
120 fields
= obj_m
.group('fields')
121 # Currently, it only supports function definitions
122 fields_m
= re
.finditer(
123 r
'(?P<key>[a-zA-Z$]+)\s*:\s*function'
124 r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
127 argnames
= f
.group('args').split(',')
128 obj
[f
.group('key')] = self
.build_function(argnames
, f
.group('code'))
132 def extract_function(self
, funcname
):
134 (r
'(?:function %s|[{;]%s\s*=\s*function)' % (
135 re
.escape(funcname
), re
.escape(funcname
))) +
136 r
'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
139 raise ExtractorError('Could not find JS function %r' % funcname
)
140 argnames
= func_m
.group('args').split(',')
142 return self
.build_function(argnames
, func_m
.group('code'))
144 def build_function(self
, argnames
, code
):
146 local_vars
= dict(zip(argnames
, args
))
147 for stmt
in code
.split(';'):
148 res
= self
.interpret_statement(stmt
, local_vars
)