]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/swfinterp.py
Annotate some highlights of the release.
[youtubedl] / youtube_dl / swfinterp.py
index b63c65b201ec2b34c6482e8526694431041f75fe..0c71585753134e93fba8d8de5cee003d31f050c9 100644 (file)
@@ -4,10 +4,12 @@ import collections
 import io
 import zlib
 
-from .utils import (
+from .compat import (
     compat_str,
+    compat_struct_unpack,
+)
+from .utils import (
     ExtractorError,
-    struct_unpack,
 )
 
 
@@ -23,17 +25,17 @@ def _extract_tags(file_contents):
             file_contents[:1])
 
     # Determine number of bits in framesize rectangle
-    framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
+    framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3
     framesize_len = (5 + 4 * framesize_nbits + 7) // 8
 
     pos = framesize_len + 2 + 2
     while pos < len(content):
-        header16 = struct_unpack('<H', content[pos:pos + 2])[0]
+        header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0]
         pos += 2
         tag_code = header16 >> 6
         tag_len = header16 & 0x3f
         if tag_len == 0x3f:
-            tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
+            tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0]
             pos += 4
         assert pos + tag_len <= len(content), \
             ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
@@ -62,15 +64,17 @@ class _ScopeDict(dict):
 
 
 class _AVMClass(object):
-    def __init__(self, name_idx, name):
+    def __init__(self, name_idx, name, static_properties=None):
         self.name_idx = name_idx
         self.name = name
         self.method_names = {}
         self.method_idxs = {}
         self.methods = {}
         self.method_pyfunctions = {}
+        self.static_properties = static_properties if static_properties else {}
 
         self.variables = _ScopeDict(self)
+        self.constants = {}
 
     def make_object(self):
         return _AVMClass_Object(self)
@@ -99,7 +103,7 @@ def _read_int(reader):
     for _ in range(5):
         buf = reader.read(1)
         assert len(buf) == 1
-        b = struct_unpack('<B', buf)[0]
+        b = compat_struct_unpack('<B', buf)[0]
         res = res | ((b & 0x7f) << shift)
         if b & 0x80 == 0:
             break
@@ -111,6 +115,8 @@ def _u30(reader):
     res = _read_int(reader)
     assert res & 0xf0000000 == 0
     return res
+
+
 _u32 = _read_int
 
 
@@ -125,7 +131,7 @@ def _s24(reader):
     bs = reader.read(3)
     assert len(bs) == 3
     last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
-    return struct_unpack('<i', bs + last_byte)[0]
+    return compat_struct_unpack('<i', bs + last_byte)[0]
 
 
 def _read_string(reader):
@@ -144,12 +150,43 @@ def _read_bytes(count, reader):
 
 def _read_byte(reader):
     resb = _read_bytes(1, reader=reader)
-    res = struct_unpack('<B', resb)[0]
+    res = compat_struct_unpack('<B', resb)[0]
     return res
 
 
+StringClass = _AVMClass('(no name idx)', 'String')
+ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray')
+TimerClass = _AVMClass('(no name idx)', 'Timer')
+TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
+_builtin_classes = {
+    StringClass.name: StringClass,
+    ByteArrayClass.name: ByteArrayClass,
+    TimerClass.name: TimerClass,
+    TimerEventClass.name: TimerEventClass,
+}
+
+
+class _Undefined(object):
+    def __bool__(self):
+        return False
+    __nonzero__ = __bool__
+
+    def __hash__(self):
+        return 0
+
+    def __str__(self):
+        return 'undefined'
+    __repr__ = __str__
+
+
+undefined = _Undefined()
+
+
 class SWFInterpreter(object):
     def __init__(self, file_contents):
+        self._patched_functions = {
+            (TimerClass, 'addEventListener'): lambda params: undefined,
+        }
         code_tag = next(tag
                         for tag_code, tag in _extract_tags(file_contents)
                         if tag_code == 82)
@@ -170,11 +207,13 @@ class SWFInterpreter(object):
 
         # Constant pool
         int_count = u30()
+        self.constant_ints = [0]
         for _c in range(1, int_count):
-            s32()
+            self.constant_ints.append(s32())
+        self.constant_uints = [0]
         uint_count = u30()
         for _c in range(1, uint_count):
-            u32()
+            self.constant_uints.append(u32())
         double_count = u30()
         read_bytes(max(0, (double_count - 1)) * 8)
         string_count = u30()
@@ -212,6 +251,10 @@ class SWFInterpreter(object):
                 u30()  # namespace_idx
                 name_idx = u30()
                 self.multinames.append(self.constant_strings[name_idx])
+            elif kind == 0x09:
+                name_idx = u30()
+                u30()
+                self.multinames.append(self.constant_strings[name_idx])
             else:
                 self.multinames.append(_Multiname(kind))
                 for _c2 in range(MULTINAME_SIZES[kind]):
@@ -258,13 +301,28 @@ class SWFInterpreter(object):
             kind = kind_full & 0x0f
             attrs = kind_full >> 4
             methods = {}
-            if kind in [0x00, 0x06]:  # Slot or Const
+            constants = None
+            if kind == 0x00:  # Slot
                 u30()  # Slot id
                 u30()  # type_name_idx
                 vindex = u30()
                 if vindex != 0:
                     read_byte()  # vkind
-            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
+            elif kind == 0x06:  # Const
+                u30()  # Slot id
+                u30()  # type_name_idx
+                vindex = u30()
+                vkind = 'any'
+                if vindex != 0:
+                    vkind = read_byte()
+                if vkind == 0x03:  # Constant_Int
+                    value = self.constant_ints[vindex]
+                elif vkind == 0x04:  # Constant_UInt
+                    value = self.constant_uints[vindex]
+                else:
+                    return {}, None  # Ignore silently for now
+                constants = {self.multinames[trait_name_idx]: value}
+            elif kind in (0x01, 0x02, 0x03):  # Method / Getter / Setter
                 u30()  # disp_id
                 method_idx = u30()
                 methods[self.multinames[trait_name_idx]] = method_idx
@@ -283,7 +341,7 @@ class SWFInterpreter(object):
                 for _c3 in range(metadata_count):
                     u30()  # metadata index
 
-            return methods
+            return methods, constants
 
         # Classes
         class_count = u30()
@@ -305,18 +363,22 @@ class SWFInterpreter(object):
             u30()  # iinit
             trait_count = u30()
             for _c2 in range(trait_count):
-                trait_methods = parse_traits_info()
+                trait_methods, trait_constants = parse_traits_info()
                 avm_class.register_methods(trait_methods)
+                if trait_constants:
+                    avm_class.constants.update(trait_constants)
 
         assert len(classes) == class_count
         self._classes_by_name = dict((c.name, c) for c in classes)
 
         for avm_class in classes:
-            u30()  # cinit
+            avm_class.cinit_idx = u30()
             trait_count = u30()
             for _c2 in range(trait_count):
-                trait_methods = parse_traits_info()
+                trait_methods, trait_constants = parse_traits_info()
                 avm_class.register_methods(trait_methods)
+                if trait_constants:
+                    avm_class.constants.update(trait_constants)
 
         # Scripts
         script_count = u30()
@@ -329,6 +391,7 @@ class SWFInterpreter(object):
         # Method bodies
         method_body_count = u30()
         Method = collections.namedtuple('Method', ['code', 'local_count'])
+        self._all_methods = []
         for _c in range(method_body_count):
             method_idx = u30()
             u30()  # max_stack
@@ -337,9 +400,10 @@ class SWFInterpreter(object):
             u30()  # max_scope_depth
             code_length = u30()
             code = read_bytes(code_length)
+            m = Method(code, local_count)
+            self._all_methods.append(m)
             for avm_class in classes:
                 if method_idx in avm_class.method_idxs:
-                    m = Method(code, local_count)
                     avm_class.methods[avm_class.method_idxs[method_idx]] = m
             exception_count = u30()
             for _c2 in range(exception_count):
@@ -354,13 +418,27 @@ class SWFInterpreter(object):
 
         assert p + code_reader.tell() == len(code_tag)
 
-    def extract_class(self, class_name):
+    def patch_function(self, avm_class, func_name, f):
+        self._patched_functions[(avm_class, func_name)] = f
+
+    def extract_class(self, class_name, call_cinit=True):
         try:
-            return self._classes_by_name[class_name]
+            res = self._classes_by_name[class_name]
         except KeyError:
             raise ExtractorError('Class %r not found' % class_name)
 
+        if call_cinit and hasattr(res, 'cinit_idx'):
+            res.register_methods({'$cinit': res.cinit_idx})
+            res.methods['$cinit'] = self._all_methods[res.cinit_idx]
+            cinit = self.extract_function(res, '$cinit')
+            cinit([])
+
+        return res
+
     def extract_function(self, avm_class, func_name):
+        p = self._patched_functions.get((avm_class, func_name))
+        if p:
+            return p
         if func_name in avm_class.method_pyfunctions:
             return avm_class.method_pyfunctions[func_name]
         if func_name in self._classes_by_name:
@@ -379,10 +457,15 @@ class SWFInterpreter(object):
             registers = [avm_class.variables] + list(args) + [None] * m.local_count
             stack = []
             scopes = collections.deque([
-                self._classes_by_name, avm_class.variables])
+                self._classes_by_name, avm_class.constants, avm_class.variables])
             while True:
                 opcode = _read_byte(coder)
-                if opcode == 17:  # iftrue
+                if opcode == 9:  # label
+                    pass  # Spec says: "Do nothing."
+                elif opcode == 16:  # jump
+                    offset = s24()
+                    coder.seek(coder.tell() + offset)
+                elif opcode == 17:  # iftrue
                     offset = s24()
                     value = stack.pop()
                     if value:
@@ -392,9 +475,40 @@ class SWFInterpreter(object):
                     value = stack.pop()
                     if not value:
                         coder.seek(coder.tell() + offset)
+                elif opcode == 19:  # ifeq
+                    offset = s24()
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    if value2 == value1:
+                        coder.seek(coder.tell() + offset)
+                elif opcode == 20:  # ifne
+                    offset = s24()
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    if value2 != value1:
+                        coder.seek(coder.tell() + offset)
+                elif opcode == 21:  # iflt
+                    offset = s24()
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    if value1 < value2:
+                        coder.seek(coder.tell() + offset)
+                elif opcode == 32:  # pushnull
+                    stack.append(None)
+                elif opcode == 33:  # pushundefined
+                    stack.append(undefined)
                 elif opcode == 36:  # pushbyte
                     v = _read_byte(coder)
                     stack.append(v)
+                elif opcode == 37:  # pushshort
+                    v = u30()
+                    stack.append(v)
+                elif opcode == 38:  # pushtrue
+                    stack.append(True)
+                elif opcode == 39:  # pushfalse
+                    stack.append(False)
+                elif opcode == 40:  # pushnan
+                    stack.append(float('NaN'))
                 elif opcode == 42:  # dup
                     value = stack[-1]
                     stack.append(value)
@@ -419,11 +533,31 @@ class SWFInterpreter(object):
                         [stack.pop() for _ in range(arg_count)]))
                     obj = stack.pop()
 
-                    if isinstance(obj, _AVMClass_Object):
+                    if obj == StringClass:
+                        if mname == 'String':
+                            assert len(args) == 1
+                            assert isinstance(args[0], (
+                                int, compat_str, _Undefined))
+                            if args[0] == undefined:
+                                res = 'undefined'
+                            else:
+                                res = compat_str(args[0])
+                            stack.append(res)
+                            continue
+                        else:
+                            raise NotImplementedError(
+                                'Function String.%s is not yet implemented'
+                                % mname)
+                    elif isinstance(obj, _AVMClass_Object):
                         func = self.extract_function(obj.avm_class, mname)
                         res = func(args)
                         stack.append(res)
                         continue
+                    elif isinstance(obj, _AVMClass):
+                        func = self.extract_function(obj, mname)
+                        res = func(args)
+                        stack.append(res)
+                        continue
                     elif isinstance(obj, _ScopeDict):
                         if mname in obj.avm_class.method_names:
                             func = self.extract_function(obj.avm_class, mname)
@@ -442,6 +576,13 @@ class SWFInterpreter(object):
                                 res = obj.split(args[0])
                             stack.append(res)
                             continue
+                        elif mname == 'charCodeAt':
+                            assert len(args) <= 1
+                            idx = 0 if len(args) == 0 else args[0]
+                            assert isinstance(idx, int)
+                            res = ord(obj[idx])
+                            stack.append(res)
+                            continue
                     elif isinstance(obj, list):
                         if mname == 'slice':
                             assert len(args) == 1
@@ -458,9 +599,18 @@ class SWFInterpreter(object):
                     raise NotImplementedError(
                         'Unsupported property %r on %r'
                         % (mname, obj))
+                elif opcode == 71:  # returnvoid
+                    res = undefined
+                    return res
                 elif opcode == 72:  # returnvalue
                     res = stack.pop()
                     return res
+                elif opcode == 73:  # constructsuper
+                    # Not yet implemented, just hope it works without it
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
                 elif opcode == 74:  # constructproperty
                     index = u30()
                     arg_count = u30()
@@ -481,6 +631,17 @@ class SWFInterpreter(object):
                     args = list(reversed(
                         [stack.pop() for _ in range(arg_count)]))
                     obj = stack.pop()
+                    if isinstance(obj, _AVMClass_Object):
+                        func = self.extract_function(obj.avm_class, mname)
+                        res = func(args)
+                        assert res is undefined
+                        continue
+                    if isinstance(obj, _ScopeDict):
+                        assert mname in obj.avm_class.method_names
+                        func = self.extract_function(obj.avm_class, mname)
+                        res = func(args)
+                        assert res is undefined
+                        continue
                     if mname == 'reverse':
                         assert isinstance(obj, list)
                         obj.reverse()
@@ -504,7 +665,10 @@ class SWFInterpreter(object):
                             break
                     else:
                         res = scopes[0]
-                    stack.append(res[mname])
+                    if mname not in res and mname in _builtin_classes:
+                        stack.append(_builtin_classes[mname])
+                    else:
+                        stack.append(res[mname])
                 elif opcode == 94:  # findproperty
                     index = u30()
                     mname = self.multinames[index]
@@ -524,9 +688,15 @@ class SWFInterpreter(object):
                             break
                     else:
                         scope = avm_class.variables
-                    # I cannot find where static variables are initialized
-                    # so let's just return None
-                    res = scope.get(mname)
+
+                    if mname in scope:
+                        res = scope[mname]
+                    elif mname in _builtin_classes:
+                        res = _builtin_classes[mname]
+                    else:
+                        # Assume uninitialized
+                        # TODO warn here
+                        res = undefined
                     stack.append(res)
                 elif opcode == 97:  # setproperty
                     index = u30()
@@ -548,22 +718,57 @@ class SWFInterpreter(object):
                     pname = self.multinames[index]
                     if pname == 'length':
                         obj = stack.pop()
-                        assert isinstance(obj, list)
+                        assert isinstance(obj, (compat_str, list))
                         stack.append(len(obj))
+                    elif isinstance(pname, compat_str):  # Member access
+                        obj = stack.pop()
+                        if isinstance(obj, _AVMClass):
+                            res = obj.static_properties[pname]
+                            stack.append(res)
+                            continue
+
+                        assert isinstance(obj, (dict, _ScopeDict)),\
+                            'Accessing member %r on %r' % (pname, obj)
+                        res = obj.get(pname, undefined)
+                        stack.append(res)
                     else:  # Assume attribute access
                         idx = stack.pop()
                         assert isinstance(idx, int)
                         obj = stack.pop()
                         assert isinstance(obj, list)
                         stack.append(obj[idx])
+                elif opcode == 104:  # initproperty
+                    index = u30()
+                    value = stack.pop()
+                    idx = self.multinames[index]
+                    if isinstance(idx, _Multiname):
+                        idx = stack.pop()
+                    obj = stack.pop()
+                    obj[idx] = value
                 elif opcode == 115:  # convert_
                     value = stack.pop()
                     intvalue = int(value)
                     stack.append(intvalue)
                 elif opcode == 128:  # coerce
                     u30()
+                elif opcode == 130:  # coerce_a
+                    value = stack.pop()
+                    # um, yes, it's any value
+                    stack.append(value)
                 elif opcode == 133:  # coerce_s
                     assert isinstance(stack[-1], (type(None), compat_str))
+                elif opcode == 147:  # decrement
+                    value = stack.pop()
+                    assert isinstance(value, int)
+                    stack.append(value - 1)
+                elif opcode == 149:  # typeof
+                    value = stack.pop()
+                    return {
+                        _Undefined: 'undefined',
+                        compat_str: 'String',
+                        int: 'Number',
+                        float: 'Number',
+                    }[type(value)]
                 elif opcode == 160:  # add
                     value2 = stack.pop()
                     value1 = stack.pop()
@@ -574,16 +779,37 @@ class SWFInterpreter(object):
                     value1 = stack.pop()
                     res = value1 - value2
                     stack.append(res)
+                elif opcode == 162:  # multiply
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    res = value1 * value2
+                    stack.append(res)
                 elif opcode == 164:  # modulo
                     value2 = stack.pop()
                     value1 = stack.pop()
                     res = value1 % value2
                     stack.append(res)
+                elif opcode == 168:  # bitand
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    assert isinstance(value1, int)
+                    assert isinstance(value2, int)
+                    res = value1 & value2
+                    stack.append(res)
+                elif opcode == 171:  # equals
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    result = value1 == value2
+                    stack.append(result)
                 elif opcode == 175:  # greaterequals
                     value2 = stack.pop()
                     value1 = stack.pop()
                     result = value1 >= value2
                     stack.append(result)
+                elif opcode == 192:  # increment_i
+                    value = stack.pop()
+                    assert isinstance(value, int)
+                    stack.append(value + 1)
                 elif opcode == 208:  # getlocal_0
                     stack.append(registers[0])
                 elif opcode == 209:  # getlocal_1
@@ -606,4 +832,3 @@ class SWFInterpreter(object):
 
         avm_class.method_pyfunctions[func_name] = resfunc
         return resfunc
-