]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/swfinterp.py
7cf490aa43a878b3c377bea0b173c7a2b170c2c7
1 from __future__
import unicode_literals
16 def _extract_tags(file_contents
):
17 if file_contents
[1:3] != b
'WS':
19 'Not an SWF file; header is %r' % file_contents
[:3])
20 if file_contents
[:1] == b
'C':
21 content
= zlib
.decompress(file_contents
[8:])
23 raise NotImplementedError(
24 'Unsupported compression format %r' %
27 # Determine number of bits in framesize rectangle
28 framesize_nbits
= compat_struct_unpack('!B', content
[:1])[0] >> 3
29 framesize_len
= (5 + 4 * framesize_nbits
+ 7) // 8
31 pos
= framesize_len
+ 2 + 2
32 while pos
< len(content
):
33 header16
= compat_struct_unpack('<H', content
[pos
:pos
+ 2])[0]
35 tag_code
= header16
>> 6
36 tag_len
= header16
& 0x3f
38 tag_len
= compat_struct_unpack('<I', content
[pos
:pos
+ 4])[0]
40 assert pos
+ tag_len
<= len(content
), \
41 ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
42 % (tag_code
, pos
, tag_len
, len(content
)))
43 yield (tag_code
, content
[pos
:pos
+ tag_len
])
47 class _AVMClass_Object(object):
48 def __init__(self
, avm_class
):
49 self
.avm_class
= avm_class
52 return '%s#%x' % (self
.avm_class
.name
, id(self
))
55 class _ScopeDict(dict):
56 def __init__(self
, avm_class
):
57 super(_ScopeDict
, self
).__init
__()
58 self
.avm_class
= avm_class
61 return '%s__Scope(%s)' % (
63 super(_ScopeDict
, self
).__repr
__())
66 class _AVMClass(object):
67 def __init__(self
, name_idx
, name
, static_properties
=None):
68 self
.name_idx
= name_idx
70 self
.method_names
= {}
73 self
.method_pyfunctions
= {}
74 self
.static_properties
= static_properties
if static_properties
else {}
76 self
.variables
= _ScopeDict(self
)
79 def make_object(self
):
80 return _AVMClass_Object(self
)
83 return '_AVMClass(%s)' % (self
.name
)
85 def register_methods(self
, methods
):
86 self
.method_names
.update(methods
.items())
87 self
.method_idxs
.update(dict(
89 for name
, idx
in methods
.items()))
92 class _Multiname(object):
93 def __init__(self
, kind
):
97 return '[MULTINAME kind: 0x%x]' % self
.kind
100 def _read_int(reader
):
106 b
= compat_struct_unpack('<B', buf
)[0]
107 res
= res |
((b
& 0x7f) << shift
)
115 res
= _read_int(reader
)
116 assert res
& 0xf0000000 == 0
122 v
= _read_int(reader
)
123 if v
& 0x80000000 != 0:
124 v
= - ((v ^
0xffffffff) + 1)
131 last_byte
= b
'\xff' if (ord(bs
[2:3]) >= 0x80) else b
'\x00'
132 return compat_struct_unpack('<i', bs
+ last_byte
)[0]
135 def _read_string(reader
):
137 resb
= reader
.read(slen
)
138 assert len(resb
) == slen
139 return resb
.decode('utf-8')
142 def _read_bytes(count
, reader
):
144 resb
= reader
.read(count
)
145 assert len(resb
) == count
149 def _read_byte(reader
):
150 resb
= _read_bytes(1, reader
=reader
)
151 res
= compat_struct_unpack('<B', resb
)[0]
155 StringClass
= _AVMClass('(no name idx)', 'String')
156 ByteArrayClass
= _AVMClass('(no name idx)', 'ByteArray')
157 TimerClass
= _AVMClass('(no name idx)', 'Timer')
158 TimerEventClass
= _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
160 StringClass
.name
: StringClass
,
161 ByteArrayClass
.name
: ByteArrayClass
,
162 TimerClass
.name
: TimerClass
,
163 TimerEventClass
.name
: TimerEventClass
,
167 class _Undefined(object):
170 __nonzero__
= __bool__
179 undefined
= _Undefined()
182 class SWFInterpreter(object):
183 def __init__(self
, file_contents
):
184 self
._patched
_functions
= {
185 (TimerClass
, 'addEventListener'): lambda params
: undefined
,
188 for tag_code
, tag
in _extract_tags(file_contents
)
190 p
= code_tag
.index(b
'\0', 4) + 1
191 code_reader
= io
.BytesIO(code_tag
[p
:])
193 # Parse ABC (AVM2 ByteCode)
195 # Define a couple convenience methods
196 u30
= lambda *args
: _u30(*args
, reader
=code_reader
)
197 s32
= lambda *args
: _s32(*args
, reader
=code_reader
)
198 u32
= lambda *args
: _u32(*args
, reader
=code_reader
)
199 read_bytes
= lambda *args
: _read_bytes(*args
, reader
=code_reader
)
200 read_byte
= lambda *args
: _read_byte(*args
, reader
=code_reader
)
202 # minor_version + major_version
207 self
.constant_ints
= [0]
208 for _c
in range(1, int_count
):
209 self
.constant_ints
.append(s32())
210 self
.constant_uints
= [0]
212 for _c
in range(1, uint_count
):
213 self
.constant_uints
.append(u32())
215 read_bytes(max(0, (double_count
- 1)) * 8)
217 self
.constant_strings
= ['']
218 for _c
in range(1, string_count
):
219 s
= _read_string(code_reader
)
220 self
.constant_strings
.append(s
)
221 namespace_count
= u30()
222 for _c
in range(1, namespace_count
):
226 for _c
in range(1, ns_set_count
):
228 for _c2
in range(count
):
230 multiname_count
= u30()
239 0x0e: 2, # MultinameA
240 0x1b: 1, # MultinameL
241 0x1c: 1, # MultinameLA
243 self
.multinames
= ['']
244 for _c
in range(1, multiname_count
):
246 assert kind
in MULTINAME_SIZES
, 'Invalid multiname kind %r' % kind
248 u30() # namespace_idx
250 self
.multinames
.append(self
.constant_strings
[name_idx
])
254 self
.multinames
.append(self
.constant_strings
[name_idx
])
256 self
.multinames
.append(_Multiname(kind
))
257 for _c2
in range(MULTINAME_SIZES
[kind
]):
262 MethodInfo
= collections
.namedtuple(
264 ['NEED_ARGUMENTS', 'NEED_REST'])
266 for method_id
in range(method_count
):
269 for _
in range(param_count
):
271 u30() # name index (always 0 for youtube)
273 if flags
& 0x08 != 0:
276 for c
in range(option_count
):
279 if flags
& 0x80 != 0:
280 # Param names present
281 for _
in range(param_count
):
283 mi
= MethodInfo(flags
& 0x01 != 0, flags
& 0x04 != 0)
284 method_infos
.append(mi
)
287 metadata_count
= u30()
288 for _c
in range(metadata_count
):
291 for _c2
in range(item_count
):
295 def parse_traits_info():
296 trait_name_idx
= u30()
297 kind_full
= read_byte()
298 kind
= kind_full
& 0x0f
299 attrs
= kind_full
>> 4
302 if kind
== 0x00: # Slot
304 u30() # type_name_idx
308 elif kind
== 0x06: # Const
310 u30() # type_name_idx
315 if vkind
== 0x03: # Constant_Int
316 value
= self
.constant_ints
[vindex
]
317 elif vkind
== 0x04: # Constant_UInt
318 value
= self
.constant_uints
[vindex
]
320 return {}, None # Ignore silently for now
321 constants
= {self
.multinames
[trait_name_idx
]: value
}
322 elif kind
in (0x01, 0x02, 0x03): # Method / Getter / Setter
325 methods
[self
.multinames
[trait_name_idx
]] = method_idx
326 elif kind
== 0x04: # Class
329 elif kind
== 0x05: # Function
332 methods
[function_idx
] = self
.multinames
[trait_name_idx
]
334 raise ExtractorError('Unsupported trait kind %d' % kind
)
336 if attrs
& 0x4 != 0: # Metadata present
337 metadata_count
= u30()
338 for _c3
in range(metadata_count
):
339 u30() # metadata index
341 return methods
, constants
346 for class_id
in range(class_count
):
349 cname
= self
.multinames
[name_idx
]
350 avm_class
= _AVMClass(name_idx
, cname
)
351 classes
.append(avm_class
)
353 u30() # super_name idx
355 if flags
& 0x08 != 0: # Protected namespace is present
356 u30() # protected_ns_idx
358 for _c2
in range(intrf_count
):
362 for _c2
in range(trait_count
):
363 trait_methods
, trait_constants
= parse_traits_info()
364 avm_class
.register_methods(trait_methods
)
366 avm_class
.constants
.update(trait_constants
)
368 assert len(classes
) == class_count
369 self
._classes
_by
_name
= dict((c
.name
, c
) for c
in classes
)
371 for avm_class
in classes
:
372 avm_class
.cinit_idx
= u30()
374 for _c2
in range(trait_count
):
375 trait_methods
, trait_constants
= parse_traits_info()
376 avm_class
.register_methods(trait_methods
)
378 avm_class
.constants
.update(trait_constants
)
382 for _c
in range(script_count
):
385 for _c2
in range(trait_count
):
389 method_body_count
= u30()
390 Method
= collections
.namedtuple('Method', ['code', 'local_count'])
391 self
._all
_methods
= []
392 for _c
in range(method_body_count
):
396 u30() # init_scope_depth
397 u30() # max_scope_depth
399 code
= read_bytes(code_length
)
400 m
= Method(code
, local_count
)
401 self
._all
_methods
.append(m
)
402 for avm_class
in classes
:
403 if method_idx
in avm_class
.method_idxs
:
404 avm_class
.methods
[avm_class
.method_idxs
[method_idx
]] = m
405 exception_count
= u30()
406 for _c2
in range(exception_count
):
413 for _c2
in range(trait_count
):
416 assert p
+ code_reader
.tell() == len(code_tag
)
418 def patch_function(self
, avm_class
, func_name
, f
):
419 self
._patched
_functions
[(avm_class
, func_name
)] = f
421 def extract_class(self
, class_name
, call_cinit
=True):
423 res
= self
._classes
_by
_name
[class_name
]
425 raise ExtractorError('Class %r not found' % class_name
)
427 if call_cinit
and hasattr(res
, 'cinit_idx'):
428 res
.register_methods({'$cinit': res
.cinit_idx
})
429 res
.methods
['$cinit'] = self
._all
_methods
[res
.cinit_idx
]
430 cinit
= self
.extract_function(res
, '$cinit')
435 def extract_function(self
, avm_class
, func_name
):
436 p
= self
._patched
_functions
.get((avm_class
, func_name
))
439 if func_name
in avm_class
.method_pyfunctions
:
440 return avm_class
.method_pyfunctions
[func_name
]
441 if func_name
in self
._classes
_by
_name
:
442 return self
._classes
_by
_name
[func_name
].make_object()
443 if func_name
not in avm_class
.methods
:
444 raise ExtractorError('Cannot find function %s.%s' % (
445 avm_class
.name
, func_name
))
446 m
= avm_class
.methods
[func_name
]
450 coder
= io
.BytesIO(m
.code
)
451 s24
= lambda: _s24(coder
)
452 u30
= lambda: _u30(coder
)
454 registers
= [avm_class
.variables
] + list(args
) + [None] * m
.local_count
456 scopes
= collections
.deque([
457 self
._classes
_by
_name
, avm_class
.constants
, avm_class
.variables
])
459 opcode
= _read_byte(coder
)
460 if opcode
== 9: # label
461 pass # Spec says: "Do nothing."
462 elif opcode
== 16: # jump
464 coder
.seek(coder
.tell() + offset
)
465 elif opcode
== 17: # iftrue
469 coder
.seek(coder
.tell() + offset
)
470 elif opcode
== 18: # iffalse
474 coder
.seek(coder
.tell() + offset
)
475 elif opcode
== 19: # ifeq
480 coder
.seek(coder
.tell() + offset
)
481 elif opcode
== 20: # ifne
486 coder
.seek(coder
.tell() + offset
)
487 elif opcode
== 21: # iflt
492 coder
.seek(coder
.tell() + offset
)
493 elif opcode
== 32: # pushnull
495 elif opcode
== 33: # pushundefined
496 stack
.append(undefined
)
497 elif opcode
== 36: # pushbyte
498 v
= _read_byte(coder
)
500 elif opcode
== 37: # pushshort
503 elif opcode
== 38: # pushtrue
505 elif opcode
== 39: # pushfalse
507 elif opcode
== 40: # pushnan
508 stack
.append(float('NaN'))
509 elif opcode
== 42: # dup
512 elif opcode
== 44: # pushstring
514 stack
.append(self
.constant_strings
[idx
])
515 elif opcode
== 48: # pushscope
516 new_scope
= stack
.pop()
517 scopes
.append(new_scope
)
518 elif opcode
== 66: # construct
520 args
= list(reversed(
521 [stack
.pop() for _
in range(arg_count
)]))
523 res
= obj
.avm_class
.make_object()
525 elif opcode
== 70: # callproperty
527 mname
= self
.multinames
[index
]
529 args
= list(reversed(
530 [stack
.pop() for _
in range(arg_count
)]))
533 if obj
== StringClass
:
534 if mname
== 'String':
535 assert len(args
) == 1
536 assert isinstance(args
[0], (
537 int, compat_str
, _Undefined
))
538 if args
[0] == undefined
:
541 res
= compat_str(args
[0])
545 raise NotImplementedError(
546 'Function String.%s is not yet implemented'
548 elif isinstance(obj
, _AVMClass_Object
):
549 func
= self
.extract_function(obj
.avm_class
, mname
)
553 elif isinstance(obj
, _AVMClass
):
554 func
= self
.extract_function(obj
, mname
)
558 elif isinstance(obj
, _ScopeDict
):
559 if mname
in obj
.avm_class
.method_names
:
560 func
= self
.extract_function(obj
.avm_class
, mname
)
566 elif isinstance(obj
, compat_str
):
568 assert len(args
) == 1
569 assert isinstance(args
[0], compat_str
)
573 res
= obj
.split(args
[0])
576 elif mname
== 'charCodeAt':
577 assert len(args
) <= 1
578 idx
= 0 if len(args
) == 0 else args
[0]
579 assert isinstance(idx
, int)
583 elif isinstance(obj
, list):
585 assert len(args
) == 1
586 assert isinstance(args
[0], int)
590 elif mname
== 'join':
591 assert len(args
) == 1
592 assert isinstance(args
[0], compat_str
)
593 res
= args
[0].join(obj
)
596 raise NotImplementedError(
597 'Unsupported property %r on %r'
599 elif opcode
== 71: # returnvoid
602 elif opcode
== 72: # returnvalue
605 elif opcode
== 73: # constructsuper
606 # Not yet implemented, just hope it works without it
608 args
= list(reversed(
609 [stack
.pop() for _
in range(arg_count
)]))
611 elif opcode
== 74: # constructproperty
614 args
= list(reversed(
615 [stack
.pop() for _
in range(arg_count
)]))
618 mname
= self
.multinames
[index
]
619 assert isinstance(obj
, _AVMClass
)
621 # We do not actually call the constructor for now;
622 # we just pretend it does nothing
623 stack
.append(obj
.make_object())
624 elif opcode
== 79: # callpropvoid
626 mname
= self
.multinames
[index
]
628 args
= list(reversed(
629 [stack
.pop() for _
in range(arg_count
)]))
631 if isinstance(obj
, _AVMClass_Object
):
632 func
= self
.extract_function(obj
.avm_class
, mname
)
634 assert res
is undefined
636 if isinstance(obj
, _ScopeDict
):
637 assert mname
in obj
.avm_class
.method_names
638 func
= self
.extract_function(obj
.avm_class
, mname
)
640 assert res
is undefined
642 if mname
== 'reverse':
643 assert isinstance(obj
, list)
646 raise NotImplementedError(
647 'Unsupported (void) property %r on %r'
649 elif opcode
== 86: # newarray
652 for i
in range(arg_count
):
653 arr
.append(stack
.pop())
656 elif opcode
== 93: # findpropstrict
658 mname
= self
.multinames
[index
]
659 for s
in reversed(scopes
):
665 if mname
not in res
and mname
in _builtin_classes
:
666 stack
.append(_builtin_classes
[mname
])
668 stack
.append(res
[mname
])
669 elif opcode
== 94: # findproperty
671 mname
= self
.multinames
[index
]
672 for s
in reversed(scopes
):
677 res
= avm_class
.variables
679 elif opcode
== 96: # getlex
681 mname
= self
.multinames
[index
]
682 for s
in reversed(scopes
):
687 scope
= avm_class
.variables
691 elif mname
in _builtin_classes
:
692 res
= _builtin_classes
[mname
]
694 # Assume uninitialized
698 elif opcode
== 97: # setproperty
701 idx
= self
.multinames
[index
]
702 if isinstance(idx
, _Multiname
):
706 elif opcode
== 98: # getlocal
708 stack
.append(registers
[index
])
709 elif opcode
== 99: # setlocal
712 registers
[index
] = value
713 elif opcode
== 102: # getproperty
715 pname
= self
.multinames
[index
]
716 if pname
== 'length':
718 assert isinstance(obj
, (compat_str
, list))
719 stack
.append(len(obj
))
720 elif isinstance(pname
, compat_str
): # Member access
722 if isinstance(obj
, _AVMClass
):
723 res
= obj
.static_properties
[pname
]
727 assert isinstance(obj
, (dict, _ScopeDict
)),\
728 'Accessing member %r on %r' % (pname
, obj
)
729 res
= obj
.get(pname
, undefined
)
731 else: # Assume attribute access
733 assert isinstance(idx
, int)
735 assert isinstance(obj
, list)
736 stack
.append(obj
[idx
])
737 elif opcode
== 104: # initproperty
740 idx
= self
.multinames
[index
]
741 if isinstance(idx
, _Multiname
):
745 elif opcode
== 115: # convert_
747 intvalue
= int(value
)
748 stack
.append(intvalue
)
749 elif opcode
== 128: # coerce
751 elif opcode
== 130: # coerce_a
753 # um, yes, it's any value
755 elif opcode
== 133: # coerce_s
756 assert isinstance(stack
[-1], (type(None), compat_str
))
757 elif opcode
== 147: # decrement
759 assert isinstance(value
, int)
760 stack
.append(value
- 1)
761 elif opcode
== 149: # typeof
764 _Undefined
: 'undefined',
765 compat_str
: 'String',
769 elif opcode
== 160: # add
772 res
= value1
+ value2
774 elif opcode
== 161: # subtract
777 res
= value1
- value2
779 elif opcode
== 162: # multiply
782 res
= value1
* value2
784 elif opcode
== 164: # modulo
787 res
= value1
% value2
789 elif opcode
== 168: # bitand
792 assert isinstance(value1
, int)
793 assert isinstance(value2
, int)
794 res
= value1
& value2
796 elif opcode
== 171: # equals
799 result
= value1
== value2
801 elif opcode
== 175: # greaterequals
804 result
= value1
>= value2
806 elif opcode
== 192: # increment_i
808 assert isinstance(value
, int)
809 stack
.append(value
+ 1)
810 elif opcode
== 208: # getlocal_0
811 stack
.append(registers
[0])
812 elif opcode
== 209: # getlocal_1
813 stack
.append(registers
[1])
814 elif opcode
== 210: # getlocal_2
815 stack
.append(registers
[2])
816 elif opcode
== 211: # getlocal_3
817 stack
.append(registers
[3])
818 elif opcode
== 212: # setlocal_0
819 registers
[0] = stack
.pop()
820 elif opcode
== 213: # setlocal_1
821 registers
[1] = stack
.pop()
822 elif opcode
== 214: # setlocal_2
823 registers
[2] = stack
.pop()
824 elif opcode
== 215: # setlocal_3
825 registers
[3] = stack
.pop()
827 raise NotImplementedError(
828 'Unsupported opcode %d' % opcode
)
830 avm_class
.method_pyfunctions
[func_name
] = resfunc