]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/swfinterp.py
1 from __future__
import unicode_literals
16 def _extract_tags(file_contents
):
17 if file_contents
[1:3] != b
'WS':
19 'Not an SWF file; header is %r' % file_contents
[:3])
20 if file_contents
[:1] == b
'C':
21 content
= zlib
.decompress(file_contents
[8:])
23 raise NotImplementedError(
24 'Unsupported compression format %r' %
27 # Determine number of bits in framesize rectangle
28 framesize_nbits
= compat_struct_unpack('!B', content
[:1])[0] >> 3
29 framesize_len
= (5 + 4 * framesize_nbits
+ 7) // 8
31 pos
= framesize_len
+ 2 + 2
32 while pos
< len(content
):
33 header16
= compat_struct_unpack('<H', content
[pos
:pos
+ 2])[0]
35 tag_code
= header16
>> 6
36 tag_len
= header16
& 0x3f
38 tag_len
= compat_struct_unpack('<I', content
[pos
:pos
+ 4])[0]
40 assert pos
+ tag_len
<= len(content
), \
41 ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
42 % (tag_code
, pos
, tag_len
, len(content
)))
43 yield (tag_code
, content
[pos
:pos
+ tag_len
])
47 class _AVMClass_Object(object):
48 def __init__(self
, avm_class
):
49 self
.avm_class
= avm_class
52 return '%s#%x' % (self
.avm_class
.name
, id(self
))
55 class _ScopeDict(dict):
56 def __init__(self
, avm_class
):
57 super(_ScopeDict
, self
).__init
__()
58 self
.avm_class
= avm_class
61 return '%s__Scope(%s)' % (
63 super(_ScopeDict
, self
).__repr
__())
66 class _AVMClass(object):
67 def __init__(self
, name_idx
, name
, static_properties
=None):
68 self
.name_idx
= name_idx
70 self
.method_names
= {}
73 self
.method_pyfunctions
= {}
74 self
.static_properties
= static_properties
if static_properties
else {}
76 self
.variables
= _ScopeDict(self
)
79 def make_object(self
):
80 return _AVMClass_Object(self
)
83 return '_AVMClass(%s)' % (self
.name
)
85 def register_methods(self
, methods
):
86 self
.method_names
.update(methods
.items())
87 self
.method_idxs
.update(dict(
89 for name
, idx
in methods
.items()))
92 class _Multiname(object):
93 def __init__(self
, kind
):
97 return '[MULTINAME kind: 0x%x]' % self
.kind
100 def _read_int(reader
):
106 b
= compat_struct_unpack('<B', buf
)[0]
107 res
= res |
((b
& 0x7f) << shift
)
115 res
= _read_int(reader
)
116 assert res
& 0xf0000000 == 0
124 v
= _read_int(reader
)
125 if v
& 0x80000000 != 0:
126 v
= - ((v ^
0xffffffff) + 1)
133 last_byte
= b
'\xff' if (ord(bs
[2:3]) >= 0x80) else b
'\x00'
134 return compat_struct_unpack('<i', bs
+ last_byte
)[0]
137 def _read_string(reader
):
139 resb
= reader
.read(slen
)
140 assert len(resb
) == slen
141 return resb
.decode('utf-8')
144 def _read_bytes(count
, reader
):
146 resb
= reader
.read(count
)
147 assert len(resb
) == count
151 def _read_byte(reader
):
152 resb
= _read_bytes(1, reader
=reader
)
153 res
= compat_struct_unpack('<B', resb
)[0]
157 StringClass
= _AVMClass('(no name idx)', 'String')
158 ByteArrayClass
= _AVMClass('(no name idx)', 'ByteArray')
159 TimerClass
= _AVMClass('(no name idx)', 'Timer')
160 TimerEventClass
= _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
162 StringClass
.name
: StringClass
,
163 ByteArrayClass
.name
: ByteArrayClass
,
164 TimerClass
.name
: TimerClass
,
165 TimerEventClass
.name
: TimerEventClass
,
169 class _Undefined(object):
172 __nonzero__
= __bool__
182 undefined
= _Undefined()
185 class SWFInterpreter(object):
186 def __init__(self
, file_contents
):
187 self
._patched
_functions
= {
188 (TimerClass
, 'addEventListener'): lambda params
: undefined
,
191 for tag_code
, tag
in _extract_tags(file_contents
)
193 p
= code_tag
.index(b
'\0', 4) + 1
194 code_reader
= io
.BytesIO(code_tag
[p
:])
196 # Parse ABC (AVM2 ByteCode)
198 # Define a couple convenience methods
199 u30
= lambda *args
: _u30(*args
, reader
=code_reader
)
200 s32
= lambda *args
: _s32(*args
, reader
=code_reader
)
201 u32
= lambda *args
: _u32(*args
, reader
=code_reader
)
202 read_bytes
= lambda *args
: _read_bytes(*args
, reader
=code_reader
)
203 read_byte
= lambda *args
: _read_byte(*args
, reader
=code_reader
)
205 # minor_version + major_version
210 self
.constant_ints
= [0]
211 for _c
in range(1, int_count
):
212 self
.constant_ints
.append(s32())
213 self
.constant_uints
= [0]
215 for _c
in range(1, uint_count
):
216 self
.constant_uints
.append(u32())
218 read_bytes(max(0, (double_count
- 1)) * 8)
220 self
.constant_strings
= ['']
221 for _c
in range(1, string_count
):
222 s
= _read_string(code_reader
)
223 self
.constant_strings
.append(s
)
224 namespace_count
= u30()
225 for _c
in range(1, namespace_count
):
229 for _c
in range(1, ns_set_count
):
231 for _c2
in range(count
):
233 multiname_count
= u30()
242 0x0e: 2, # MultinameA
243 0x1b: 1, # MultinameL
244 0x1c: 1, # MultinameLA
246 self
.multinames
= ['']
247 for _c
in range(1, multiname_count
):
249 assert kind
in MULTINAME_SIZES
, 'Invalid multiname kind %r' % kind
251 u30() # namespace_idx
253 self
.multinames
.append(self
.constant_strings
[name_idx
])
257 self
.multinames
.append(self
.constant_strings
[name_idx
])
259 self
.multinames
.append(_Multiname(kind
))
260 for _c2
in range(MULTINAME_SIZES
[kind
]):
265 MethodInfo
= collections
.namedtuple(
267 ['NEED_ARGUMENTS', 'NEED_REST'])
269 for method_id
in range(method_count
):
272 for _
in range(param_count
):
274 u30() # name index (always 0 for youtube)
276 if flags
& 0x08 != 0:
279 for c
in range(option_count
):
282 if flags
& 0x80 != 0:
283 # Param names present
284 for _
in range(param_count
):
286 mi
= MethodInfo(flags
& 0x01 != 0, flags
& 0x04 != 0)
287 method_infos
.append(mi
)
290 metadata_count
= u30()
291 for _c
in range(metadata_count
):
294 for _c2
in range(item_count
):
298 def parse_traits_info():
299 trait_name_idx
= u30()
300 kind_full
= read_byte()
301 kind
= kind_full
& 0x0f
302 attrs
= kind_full
>> 4
305 if kind
== 0x00: # Slot
307 u30() # type_name_idx
311 elif kind
== 0x06: # Const
313 u30() # type_name_idx
318 if vkind
== 0x03: # Constant_Int
319 value
= self
.constant_ints
[vindex
]
320 elif vkind
== 0x04: # Constant_UInt
321 value
= self
.constant_uints
[vindex
]
323 return {}, None # Ignore silently for now
324 constants
= {self
.multinames
[trait_name_idx
]: value
}
325 elif kind
in (0x01, 0x02, 0x03): # Method / Getter / Setter
328 methods
[self
.multinames
[trait_name_idx
]] = method_idx
329 elif kind
== 0x04: # Class
332 elif kind
== 0x05: # Function
335 methods
[function_idx
] = self
.multinames
[trait_name_idx
]
337 raise ExtractorError('Unsupported trait kind %d' % kind
)
339 if attrs
& 0x4 != 0: # Metadata present
340 metadata_count
= u30()
341 for _c3
in range(metadata_count
):
342 u30() # metadata index
344 return methods
, constants
349 for class_id
in range(class_count
):
352 cname
= self
.multinames
[name_idx
]
353 avm_class
= _AVMClass(name_idx
, cname
)
354 classes
.append(avm_class
)
356 u30() # super_name idx
358 if flags
& 0x08 != 0: # Protected namespace is present
359 u30() # protected_ns_idx
361 for _c2
in range(intrf_count
):
365 for _c2
in range(trait_count
):
366 trait_methods
, trait_constants
= parse_traits_info()
367 avm_class
.register_methods(trait_methods
)
369 avm_class
.constants
.update(trait_constants
)
371 assert len(classes
) == class_count
372 self
._classes
_by
_name
= dict((c
.name
, c
) for c
in classes
)
374 for avm_class
in classes
:
375 avm_class
.cinit_idx
= u30()
377 for _c2
in range(trait_count
):
378 trait_methods
, trait_constants
= parse_traits_info()
379 avm_class
.register_methods(trait_methods
)
381 avm_class
.constants
.update(trait_constants
)
385 for _c
in range(script_count
):
388 for _c2
in range(trait_count
):
392 method_body_count
= u30()
393 Method
= collections
.namedtuple('Method', ['code', 'local_count'])
394 self
._all
_methods
= []
395 for _c
in range(method_body_count
):
399 u30() # init_scope_depth
400 u30() # max_scope_depth
402 code
= read_bytes(code_length
)
403 m
= Method(code
, local_count
)
404 self
._all
_methods
.append(m
)
405 for avm_class
in classes
:
406 if method_idx
in avm_class
.method_idxs
:
407 avm_class
.methods
[avm_class
.method_idxs
[method_idx
]] = m
408 exception_count
= u30()
409 for _c2
in range(exception_count
):
416 for _c2
in range(trait_count
):
419 assert p
+ code_reader
.tell() == len(code_tag
)
421 def patch_function(self
, avm_class
, func_name
, f
):
422 self
._patched
_functions
[(avm_class
, func_name
)] = f
424 def extract_class(self
, class_name
, call_cinit
=True):
426 res
= self
._classes
_by
_name
[class_name
]
428 raise ExtractorError('Class %r not found' % class_name
)
430 if call_cinit
and hasattr(res
, 'cinit_idx'):
431 res
.register_methods({'$cinit': res
.cinit_idx
})
432 res
.methods
['$cinit'] = self
._all
_methods
[res
.cinit_idx
]
433 cinit
= self
.extract_function(res
, '$cinit')
438 def extract_function(self
, avm_class
, func_name
):
439 p
= self
._patched
_functions
.get((avm_class
, func_name
))
442 if func_name
in avm_class
.method_pyfunctions
:
443 return avm_class
.method_pyfunctions
[func_name
]
444 if func_name
in self
._classes
_by
_name
:
445 return self
._classes
_by
_name
[func_name
].make_object()
446 if func_name
not in avm_class
.methods
:
447 raise ExtractorError('Cannot find function %s.%s' % (
448 avm_class
.name
, func_name
))
449 m
= avm_class
.methods
[func_name
]
453 coder
= io
.BytesIO(m
.code
)
454 s24
= lambda: _s24(coder
)
455 u30
= lambda: _u30(coder
)
457 registers
= [avm_class
.variables
] + list(args
) + [None] * m
.local_count
459 scopes
= collections
.deque([
460 self
._classes
_by
_name
, avm_class
.constants
, avm_class
.variables
])
462 opcode
= _read_byte(coder
)
463 if opcode
== 9: # label
464 pass # Spec says: "Do nothing."
465 elif opcode
== 16: # jump
467 coder
.seek(coder
.tell() + offset
)
468 elif opcode
== 17: # iftrue
472 coder
.seek(coder
.tell() + offset
)
473 elif opcode
== 18: # iffalse
477 coder
.seek(coder
.tell() + offset
)
478 elif opcode
== 19: # ifeq
483 coder
.seek(coder
.tell() + offset
)
484 elif opcode
== 20: # ifne
489 coder
.seek(coder
.tell() + offset
)
490 elif opcode
== 21: # iflt
495 coder
.seek(coder
.tell() + offset
)
496 elif opcode
== 32: # pushnull
498 elif opcode
== 33: # pushundefined
499 stack
.append(undefined
)
500 elif opcode
== 36: # pushbyte
501 v
= _read_byte(coder
)
503 elif opcode
== 37: # pushshort
506 elif opcode
== 38: # pushtrue
508 elif opcode
== 39: # pushfalse
510 elif opcode
== 40: # pushnan
511 stack
.append(float('NaN'))
512 elif opcode
== 42: # dup
515 elif opcode
== 44: # pushstring
517 stack
.append(self
.constant_strings
[idx
])
518 elif opcode
== 48: # pushscope
519 new_scope
= stack
.pop()
520 scopes
.append(new_scope
)
521 elif opcode
== 66: # construct
523 args
= list(reversed(
524 [stack
.pop() for _
in range(arg_count
)]))
526 res
= obj
.avm_class
.make_object()
528 elif opcode
== 70: # callproperty
530 mname
= self
.multinames
[index
]
532 args
= list(reversed(
533 [stack
.pop() for _
in range(arg_count
)]))
536 if obj
== StringClass
:
537 if mname
== 'String':
538 assert len(args
) == 1
539 assert isinstance(args
[0], (
540 int, compat_str
, _Undefined
))
541 if args
[0] == undefined
:
544 res
= compat_str(args
[0])
548 raise NotImplementedError(
549 'Function String.%s is not yet implemented'
551 elif isinstance(obj
, _AVMClass_Object
):
552 func
= self
.extract_function(obj
.avm_class
, mname
)
556 elif isinstance(obj
, _AVMClass
):
557 func
= self
.extract_function(obj
, mname
)
561 elif isinstance(obj
, _ScopeDict
):
562 if mname
in obj
.avm_class
.method_names
:
563 func
= self
.extract_function(obj
.avm_class
, mname
)
569 elif isinstance(obj
, compat_str
):
571 assert len(args
) == 1
572 assert isinstance(args
[0], compat_str
)
576 res
= obj
.split(args
[0])
579 elif mname
== 'charCodeAt':
580 assert len(args
) <= 1
581 idx
= 0 if len(args
) == 0 else args
[0]
582 assert isinstance(idx
, int)
586 elif isinstance(obj
, list):
588 assert len(args
) == 1
589 assert isinstance(args
[0], int)
593 elif mname
== 'join':
594 assert len(args
) == 1
595 assert isinstance(args
[0], compat_str
)
596 res
= args
[0].join(obj
)
599 raise NotImplementedError(
600 'Unsupported property %r on %r'
602 elif opcode
== 71: # returnvoid
605 elif opcode
== 72: # returnvalue
608 elif opcode
== 73: # constructsuper
609 # Not yet implemented, just hope it works without it
611 args
= list(reversed(
612 [stack
.pop() for _
in range(arg_count
)]))
614 elif opcode
== 74: # constructproperty
617 args
= list(reversed(
618 [stack
.pop() for _
in range(arg_count
)]))
621 mname
= self
.multinames
[index
]
622 assert isinstance(obj
, _AVMClass
)
624 # We do not actually call the constructor for now;
625 # we just pretend it does nothing
626 stack
.append(obj
.make_object())
627 elif opcode
== 79: # callpropvoid
629 mname
= self
.multinames
[index
]
631 args
= list(reversed(
632 [stack
.pop() for _
in range(arg_count
)]))
634 if isinstance(obj
, _AVMClass_Object
):
635 func
= self
.extract_function(obj
.avm_class
, mname
)
637 assert res
is undefined
639 if isinstance(obj
, _ScopeDict
):
640 assert mname
in obj
.avm_class
.method_names
641 func
= self
.extract_function(obj
.avm_class
, mname
)
643 assert res
is undefined
645 if mname
== 'reverse':
646 assert isinstance(obj
, list)
649 raise NotImplementedError(
650 'Unsupported (void) property %r on %r'
652 elif opcode
== 86: # newarray
655 for i
in range(arg_count
):
656 arr
.append(stack
.pop())
659 elif opcode
== 93: # findpropstrict
661 mname
= self
.multinames
[index
]
662 for s
in reversed(scopes
):
668 if mname
not in res
and mname
in _builtin_classes
:
669 stack
.append(_builtin_classes
[mname
])
671 stack
.append(res
[mname
])
672 elif opcode
== 94: # findproperty
674 mname
= self
.multinames
[index
]
675 for s
in reversed(scopes
):
680 res
= avm_class
.variables
682 elif opcode
== 96: # getlex
684 mname
= self
.multinames
[index
]
685 for s
in reversed(scopes
):
690 scope
= avm_class
.variables
694 elif mname
in _builtin_classes
:
695 res
= _builtin_classes
[mname
]
697 # Assume uninitialized
701 elif opcode
== 97: # setproperty
704 idx
= self
.multinames
[index
]
705 if isinstance(idx
, _Multiname
):
709 elif opcode
== 98: # getlocal
711 stack
.append(registers
[index
])
712 elif opcode
== 99: # setlocal
715 registers
[index
] = value
716 elif opcode
== 102: # getproperty
718 pname
= self
.multinames
[index
]
719 if pname
== 'length':
721 assert isinstance(obj
, (compat_str
, list))
722 stack
.append(len(obj
))
723 elif isinstance(pname
, compat_str
): # Member access
725 if isinstance(obj
, _AVMClass
):
726 res
= obj
.static_properties
[pname
]
730 assert isinstance(obj
, (dict, _ScopeDict
)),\
731 'Accessing member %r on %r' % (pname
, obj
)
732 res
= obj
.get(pname
, undefined
)
734 else: # Assume attribute access
736 assert isinstance(idx
, int)
738 assert isinstance(obj
, list)
739 stack
.append(obj
[idx
])
740 elif opcode
== 104: # initproperty
743 idx
= self
.multinames
[index
]
744 if isinstance(idx
, _Multiname
):
748 elif opcode
== 115: # convert_
750 intvalue
= int(value
)
751 stack
.append(intvalue
)
752 elif opcode
== 128: # coerce
754 elif opcode
== 130: # coerce_a
756 # um, yes, it's any value
758 elif opcode
== 133: # coerce_s
759 assert isinstance(stack
[-1], (type(None), compat_str
))
760 elif opcode
== 147: # decrement
762 assert isinstance(value
, int)
763 stack
.append(value
- 1)
764 elif opcode
== 149: # typeof
767 _Undefined
: 'undefined',
768 compat_str
: 'String',
772 elif opcode
== 160: # add
775 res
= value1
+ value2
777 elif opcode
== 161: # subtract
780 res
= value1
- value2
782 elif opcode
== 162: # multiply
785 res
= value1
* value2
787 elif opcode
== 164: # modulo
790 res
= value1
% value2
792 elif opcode
== 168: # bitand
795 assert isinstance(value1
, int)
796 assert isinstance(value2
, int)
797 res
= value1
& value2
799 elif opcode
== 171: # equals
802 result
= value1
== value2
804 elif opcode
== 175: # greaterequals
807 result
= value1
>= value2
809 elif opcode
== 192: # increment_i
811 assert isinstance(value
, int)
812 stack
.append(value
+ 1)
813 elif opcode
== 208: # getlocal_0
814 stack
.append(registers
[0])
815 elif opcode
== 209: # getlocal_1
816 stack
.append(registers
[1])
817 elif opcode
== 210: # getlocal_2
818 stack
.append(registers
[2])
819 elif opcode
== 211: # getlocal_3
820 stack
.append(registers
[3])
821 elif opcode
== 212: # setlocal_0
822 registers
[0] = stack
.pop()
823 elif opcode
== 213: # setlocal_1
824 registers
[1] = stack
.pop()
825 elif opcode
== 214: # setlocal_2
826 registers
[2] = stack
.pop()
827 elif opcode
== 215: # setlocal_3
828 registers
[3] = stack
.pop()
830 raise NotImplementedError(
831 'Unsupported opcode %d' % opcode
)
833 avm_class
.method_pyfunctions
[func_name
] = resfunc