]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/jsinterp.py
debian/changelog: Start new release.
[youtubedl] / youtube_dl / jsinterp.py
index 449482d3cf8dd147c62717ebc3332bbcbf6eb869..ae5bca2e643f1ec4d719e8e895b0a655a98a151a 100644 (file)
@@ -11,6 +11,7 @@ class JSInterpreter(object):
     def __init__(self, code):
         self.code = code
         self._functions = {}
+        self._objects = {}
 
     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
         if allow_recursion < 0:
@@ -55,11 +56,23 @@ class JSInterpreter(object):
         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
         if m:
             member = m.group('member')
-            val = local_vars[m.group('in')]
+            variable = m.group('in')
+
+            if variable not in local_vars:
+                if variable not in self._objects:
+                    self._objects[variable] = self.extract_object(variable)
+                obj = self._objects[variable]
+                key, args = member.split('(', 1)
+                args = args.strip(')')
+                argvals = [int(v) if v.isdigit() else local_vars[v]
+                           for v in args.split(',')]
+                return obj[key](argvals)
+
+            val = local_vars[variable]
             if member == 'split("")':
                 return list(val)
             if member == 'join("")':
-                return u''.join(val)
+                return ''.join(val)
             if member == 'length':
                 return len(val)
             if member == 'reverse()':
@@ -97,9 +110,28 @@ class JSInterpreter(object):
             return self._functions[fname](argvals)
         raise ExtractorError('Unsupported JS expression %r' % expr)
 
+    def extract_object(self, objname):
+        obj = {}
+        obj_m = re.search(
+            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+            r'\}\s*;',
+            self.code)
+        fields = obj_m.group('fields')
+        # Currently, it only supports function definitions
+        fields_m = re.finditer(
+            r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            fields)
+        for f in fields_m:
+            argnames = f.group('args').split(',')
+            obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+        return obj
+
     def extract_function(self, funcname):
         func_m = re.search(
-            (r'(?:function %s|%s\s*=\s*function)' % (
+            (r'(?:function %s|[{;]%s\s*=\s*function)' % (
                 re.escape(funcname), re.escape(funcname))) +
             r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
             self.code)
@@ -107,10 +139,12 @@ class JSInterpreter(object):
             raise ExtractorError('Could not find JS function %r' % funcname)
         argnames = func_m.group('args').split(',')
 
+        return self.build_function(argnames, func_m.group('code'))
+
+    def build_function(self, argnames, code):
         def resf(args):
             local_vars = dict(zip(argnames, args))
-            for stmt in func_m.group('code').split(';'):
+            for stmt in code.split(';'):
                 res = self.interpret_statement(stmt, local_vars)
             return res
         return resf
-