]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/swfinterp.py
06c1d6cc1755ef022aa78967d4b651e21fd66618
1 from __future__
import unicode_literals
7 from .compat
import compat_str
14 def _extract_tags(file_contents
):
15 if file_contents
[1:3] != b
'WS':
17 'Not an SWF file; header is %r' % file_contents
[:3])
18 if file_contents
[:1] == b
'C':
19 content
= zlib
.decompress(file_contents
[8:])
21 raise NotImplementedError(
22 'Unsupported compression format %r' %
25 # Determine number of bits in framesize rectangle
26 framesize_nbits
= struct_unpack('!B', content
[:1])[0] >> 3
27 framesize_len
= (5 + 4 * framesize_nbits
+ 7) // 8
29 pos
= framesize_len
+ 2 + 2
30 while pos
< len(content
):
31 header16
= struct_unpack('<H', content
[pos
:pos
+ 2])[0]
33 tag_code
= header16
>> 6
34 tag_len
= header16
& 0x3f
36 tag_len
= struct_unpack('<I', content
[pos
:pos
+ 4])[0]
38 assert pos
+ tag_len
<= len(content
), \
39 ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
40 % (tag_code
, pos
, tag_len
, len(content
)))
41 yield (tag_code
, content
[pos
:pos
+ tag_len
])
45 class _AVMClass_Object(object):
46 def __init__(self
, avm_class
):
47 self
.avm_class
= avm_class
50 return '%s#%x' % (self
.avm_class
.name
, id(self
))
53 class _ScopeDict(dict):
54 def __init__(self
, avm_class
):
55 super(_ScopeDict
, self
).__init
__()
56 self
.avm_class
= avm_class
59 return '%s__Scope(%s)' % (
61 super(_ScopeDict
, self
).__repr
__())
64 class _AVMClass(object):
65 def __init__(self
, name_idx
, name
, static_properties
=None):
66 self
.name_idx
= name_idx
68 self
.method_names
= {}
71 self
.method_pyfunctions
= {}
72 self
.static_properties
= static_properties
if static_properties
else {}
74 self
.variables
= _ScopeDict(self
)
77 def make_object(self
):
78 return _AVMClass_Object(self
)
81 return '_AVMClass(%s)' % (self
.name
)
83 def register_methods(self
, methods
):
84 self
.method_names
.update(methods
.items())
85 self
.method_idxs
.update(dict(
87 for name
, idx
in methods
.items()))
90 class _Multiname(object):
91 def __init__(self
, kind
):
95 return '[MULTINAME kind: 0x%x]' % self
.kind
98 def _read_int(reader
):
104 b
= struct_unpack('<B', buf
)[0]
105 res
= res |
((b
& 0x7f) << shift
)
113 res
= _read_int(reader
)
114 assert res
& 0xf0000000 == 0
120 v
= _read_int(reader
)
121 if v
& 0x80000000 != 0:
122 v
= - ((v ^
0xffffffff) + 1)
129 last_byte
= b
'\xff' if (ord(bs
[2:3]) >= 0x80) else b
'\x00'
130 return struct_unpack('<i', bs
+ last_byte
)[0]
133 def _read_string(reader
):
135 resb
= reader
.read(slen
)
136 assert len(resb
) == slen
137 return resb
.decode('utf-8')
140 def _read_bytes(count
, reader
):
142 resb
= reader
.read(count
)
143 assert len(resb
) == count
147 def _read_byte(reader
):
148 resb
= _read_bytes(1, reader
=reader
)
149 res
= struct_unpack('<B', resb
)[0]
153 StringClass
= _AVMClass('(no name idx)', 'String')
154 ByteArrayClass
= _AVMClass('(no name idx)', 'ByteArray')
155 TimerClass
= _AVMClass('(no name idx)', 'Timer')
156 TimerEventClass
= _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'})
158 StringClass
.name
: StringClass
,
159 ByteArrayClass
.name
: ByteArrayClass
,
160 TimerClass
.name
: TimerClass
,
161 TimerEventClass
.name
: TimerEventClass
,
165 class _Undefined(object):
168 __nonzero__
= __bool__
177 undefined
= _Undefined()
180 class SWFInterpreter(object):
181 def __init__(self
, file_contents
):
182 self
._patched
_functions
= {
183 (TimerClass
, 'addEventListener'): lambda params
: undefined
,
186 for tag_code
, tag
in _extract_tags(file_contents
)
188 p
= code_tag
.index(b
'\0', 4) + 1
189 code_reader
= io
.BytesIO(code_tag
[p
:])
191 # Parse ABC (AVM2 ByteCode)
193 # Define a couple convenience methods
194 u30
= lambda *args
: _u30(*args
, reader
=code_reader
)
195 s32
= lambda *args
: _s32(*args
, reader
=code_reader
)
196 u32
= lambda *args
: _u32(*args
, reader
=code_reader
)
197 read_bytes
= lambda *args
: _read_bytes(*args
, reader
=code_reader
)
198 read_byte
= lambda *args
: _read_byte(*args
, reader
=code_reader
)
200 # minor_version + major_version
205 self
.constant_ints
= [0]
206 for _c
in range(1, int_count
):
207 self
.constant_ints
.append(s32())
208 self
.constant_uints
= [0]
210 for _c
in range(1, uint_count
):
211 self
.constant_uints
.append(u32())
213 read_bytes(max(0, (double_count
- 1)) * 8)
215 self
.constant_strings
= ['']
216 for _c
in range(1, string_count
):
217 s
= _read_string(code_reader
)
218 self
.constant_strings
.append(s
)
219 namespace_count
= u30()
220 for _c
in range(1, namespace_count
):
224 for _c
in range(1, ns_set_count
):
226 for _c2
in range(count
):
228 multiname_count
= u30()
237 0x0e: 2, # MultinameA
238 0x1b: 1, # MultinameL
239 0x1c: 1, # MultinameLA
241 self
.multinames
= ['']
242 for _c
in range(1, multiname_count
):
244 assert kind
in MULTINAME_SIZES
, 'Invalid multiname kind %r' % kind
246 u30() # namespace_idx
248 self
.multinames
.append(self
.constant_strings
[name_idx
])
252 self
.multinames
.append(self
.constant_strings
[name_idx
])
254 self
.multinames
.append(_Multiname(kind
))
255 for _c2
in range(MULTINAME_SIZES
[kind
]):
260 MethodInfo
= collections
.namedtuple(
262 ['NEED_ARGUMENTS', 'NEED_REST'])
264 for method_id
in range(method_count
):
267 for _
in range(param_count
):
269 u30() # name index (always 0 for youtube)
271 if flags
& 0x08 != 0:
274 for c
in range(option_count
):
277 if flags
& 0x80 != 0:
278 # Param names present
279 for _
in range(param_count
):
281 mi
= MethodInfo(flags
& 0x01 != 0, flags
& 0x04 != 0)
282 method_infos
.append(mi
)
285 metadata_count
= u30()
286 for _c
in range(metadata_count
):
289 for _c2
in range(item_count
):
293 def parse_traits_info():
294 trait_name_idx
= u30()
295 kind_full
= read_byte()
296 kind
= kind_full
& 0x0f
297 attrs
= kind_full
>> 4
300 if kind
== 0x00: # Slot
302 u30() # type_name_idx
306 elif kind
== 0x06: # Const
308 u30() # type_name_idx
313 if vkind
== 0x03: # Constant_Int
314 value
= self
.constant_ints
[vindex
]
315 elif vkind
== 0x04: # Constant_UInt
316 value
= self
.constant_uints
[vindex
]
318 return {}, None # Ignore silently for now
319 constants
= {self
.multinames
[trait_name_idx
]: value
}
320 elif kind
in (0x01, 0x02, 0x03): # Method / Getter / Setter
323 methods
[self
.multinames
[trait_name_idx
]] = method_idx
324 elif kind
== 0x04: # Class
327 elif kind
== 0x05: # Function
330 methods
[function_idx
] = self
.multinames
[trait_name_idx
]
332 raise ExtractorError('Unsupported trait kind %d' % kind
)
334 if attrs
& 0x4 != 0: # Metadata present
335 metadata_count
= u30()
336 for _c3
in range(metadata_count
):
337 u30() # metadata index
339 return methods
, constants
344 for class_id
in range(class_count
):
347 cname
= self
.multinames
[name_idx
]
348 avm_class
= _AVMClass(name_idx
, cname
)
349 classes
.append(avm_class
)
351 u30() # super_name idx
353 if flags
& 0x08 != 0: # Protected namespace is present
354 u30() # protected_ns_idx
356 for _c2
in range(intrf_count
):
360 for _c2
in range(trait_count
):
361 trait_methods
, trait_constants
= parse_traits_info()
362 avm_class
.register_methods(trait_methods
)
364 avm_class
.constants
.update(trait_constants
)
366 assert len(classes
) == class_count
367 self
._classes
_by
_name
= dict((c
.name
, c
) for c
in classes
)
369 for avm_class
in classes
:
370 avm_class
.cinit_idx
= u30()
372 for _c2
in range(trait_count
):
373 trait_methods
, trait_constants
= parse_traits_info()
374 avm_class
.register_methods(trait_methods
)
376 avm_class
.constants
.update(trait_constants
)
380 for _c
in range(script_count
):
383 for _c2
in range(trait_count
):
387 method_body_count
= u30()
388 Method
= collections
.namedtuple('Method', ['code', 'local_count'])
389 self
._all
_methods
= []
390 for _c
in range(method_body_count
):
394 u30() # init_scope_depth
395 u30() # max_scope_depth
397 code
= read_bytes(code_length
)
398 m
= Method(code
, local_count
)
399 self
._all
_methods
.append(m
)
400 for avm_class
in classes
:
401 if method_idx
in avm_class
.method_idxs
:
402 avm_class
.methods
[avm_class
.method_idxs
[method_idx
]] = m
403 exception_count
= u30()
404 for _c2
in range(exception_count
):
411 for _c2
in range(trait_count
):
414 assert p
+ code_reader
.tell() == len(code_tag
)
416 def patch_function(self
, avm_class
, func_name
, f
):
417 self
._patched
_functions
[(avm_class
, func_name
)] = f
419 def extract_class(self
, class_name
, call_cinit
=True):
421 res
= self
._classes
_by
_name
[class_name
]
423 raise ExtractorError('Class %r not found' % class_name
)
425 if call_cinit
and hasattr(res
, 'cinit_idx'):
426 res
.register_methods({'$cinit': res
.cinit_idx
})
427 res
.methods
['$cinit'] = self
._all
_methods
[res
.cinit_idx
]
428 cinit
= self
.extract_function(res
, '$cinit')
433 def extract_function(self
, avm_class
, func_name
):
434 p
= self
._patched
_functions
.get((avm_class
, func_name
))
437 if func_name
in avm_class
.method_pyfunctions
:
438 return avm_class
.method_pyfunctions
[func_name
]
439 if func_name
in self
._classes
_by
_name
:
440 return self
._classes
_by
_name
[func_name
].make_object()
441 if func_name
not in avm_class
.methods
:
442 raise ExtractorError('Cannot find function %s.%s' % (
443 avm_class
.name
, func_name
))
444 m
= avm_class
.methods
[func_name
]
448 coder
= io
.BytesIO(m
.code
)
449 s24
= lambda: _s24(coder
)
450 u30
= lambda: _u30(coder
)
452 registers
= [avm_class
.variables
] + list(args
) + [None] * m
.local_count
454 scopes
= collections
.deque([
455 self
._classes
_by
_name
, avm_class
.constants
, avm_class
.variables
])
457 opcode
= _read_byte(coder
)
458 if opcode
== 9: # label
459 pass # Spec says: "Do nothing."
460 elif opcode
== 16: # jump
462 coder
.seek(coder
.tell() + offset
)
463 elif opcode
== 17: # iftrue
467 coder
.seek(coder
.tell() + offset
)
468 elif opcode
== 18: # iffalse
472 coder
.seek(coder
.tell() + offset
)
473 elif opcode
== 19: # ifeq
478 coder
.seek(coder
.tell() + offset
)
479 elif opcode
== 20: # ifne
484 coder
.seek(coder
.tell() + offset
)
485 elif opcode
== 21: # iflt
490 coder
.seek(coder
.tell() + offset
)
491 elif opcode
== 32: # pushnull
493 elif opcode
== 33: # pushundefined
494 stack
.append(undefined
)
495 elif opcode
== 36: # pushbyte
496 v
= _read_byte(coder
)
498 elif opcode
== 37: # pushshort
501 elif opcode
== 38: # pushtrue
503 elif opcode
== 39: # pushfalse
505 elif opcode
== 40: # pushnan
506 stack
.append(float('NaN'))
507 elif opcode
== 42: # dup
510 elif opcode
== 44: # pushstring
512 stack
.append(self
.constant_strings
[idx
])
513 elif opcode
== 48: # pushscope
514 new_scope
= stack
.pop()
515 scopes
.append(new_scope
)
516 elif opcode
== 66: # construct
518 args
= list(reversed(
519 [stack
.pop() for _
in range(arg_count
)]))
521 res
= obj
.avm_class
.make_object()
523 elif opcode
== 70: # callproperty
525 mname
= self
.multinames
[index
]
527 args
= list(reversed(
528 [stack
.pop() for _
in range(arg_count
)]))
531 if obj
== StringClass
:
532 if mname
== 'String':
533 assert len(args
) == 1
534 assert isinstance(args
[0], (
535 int, compat_str
, _Undefined
))
536 if args
[0] == undefined
:
539 res
= compat_str(args
[0])
543 raise NotImplementedError(
544 'Function String.%s is not yet implemented'
546 elif isinstance(obj
, _AVMClass_Object
):
547 func
= self
.extract_function(obj
.avm_class
, mname
)
551 elif isinstance(obj
, _AVMClass
):
552 func
= self
.extract_function(obj
, mname
)
556 elif isinstance(obj
, _ScopeDict
):
557 if mname
in obj
.avm_class
.method_names
:
558 func
= self
.extract_function(obj
.avm_class
, mname
)
564 elif isinstance(obj
, compat_str
):
566 assert len(args
) == 1
567 assert isinstance(args
[0], compat_str
)
571 res
= obj
.split(args
[0])
574 elif mname
== 'charCodeAt':
575 assert len(args
) <= 1
576 idx
= 0 if len(args
) == 0 else args
[0]
577 assert isinstance(idx
, int)
581 elif isinstance(obj
, list):
583 assert len(args
) == 1
584 assert isinstance(args
[0], int)
588 elif mname
== 'join':
589 assert len(args
) == 1
590 assert isinstance(args
[0], compat_str
)
591 res
= args
[0].join(obj
)
594 raise NotImplementedError(
595 'Unsupported property %r on %r'
597 elif opcode
== 71: # returnvoid
600 elif opcode
== 72: # returnvalue
603 elif opcode
== 73: # constructsuper
604 # Not yet implemented, just hope it works without it
606 args
= list(reversed(
607 [stack
.pop() for _
in range(arg_count
)]))
609 elif opcode
== 74: # constructproperty
612 args
= list(reversed(
613 [stack
.pop() for _
in range(arg_count
)]))
616 mname
= self
.multinames
[index
]
617 assert isinstance(obj
, _AVMClass
)
619 # We do not actually call the constructor for now;
620 # we just pretend it does nothing
621 stack
.append(obj
.make_object())
622 elif opcode
== 79: # callpropvoid
624 mname
= self
.multinames
[index
]
626 args
= list(reversed(
627 [stack
.pop() for _
in range(arg_count
)]))
629 if isinstance(obj
, _AVMClass_Object
):
630 func
= self
.extract_function(obj
.avm_class
, mname
)
632 assert res
is undefined
634 if isinstance(obj
, _ScopeDict
):
635 assert mname
in obj
.avm_class
.method_names
636 func
= self
.extract_function(obj
.avm_class
, mname
)
638 assert res
is undefined
640 if mname
== 'reverse':
641 assert isinstance(obj
, list)
644 raise NotImplementedError(
645 'Unsupported (void) property %r on %r'
647 elif opcode
== 86: # newarray
650 for i
in range(arg_count
):
651 arr
.append(stack
.pop())
654 elif opcode
== 93: # findpropstrict
656 mname
= self
.multinames
[index
]
657 for s
in reversed(scopes
):
663 if mname
not in res
and mname
in _builtin_classes
:
664 stack
.append(_builtin_classes
[mname
])
666 stack
.append(res
[mname
])
667 elif opcode
== 94: # findproperty
669 mname
= self
.multinames
[index
]
670 for s
in reversed(scopes
):
675 res
= avm_class
.variables
677 elif opcode
== 96: # getlex
679 mname
= self
.multinames
[index
]
680 for s
in reversed(scopes
):
685 scope
= avm_class
.variables
689 elif mname
in _builtin_classes
:
690 res
= _builtin_classes
[mname
]
692 # Assume uninitialized
696 elif opcode
== 97: # setproperty
699 idx
= self
.multinames
[index
]
700 if isinstance(idx
, _Multiname
):
704 elif opcode
== 98: # getlocal
706 stack
.append(registers
[index
])
707 elif opcode
== 99: # setlocal
710 registers
[index
] = value
711 elif opcode
== 102: # getproperty
713 pname
= self
.multinames
[index
]
714 if pname
== 'length':
716 assert isinstance(obj
, (compat_str
, list))
717 stack
.append(len(obj
))
718 elif isinstance(pname
, compat_str
): # Member access
720 if isinstance(obj
, _AVMClass
):
721 res
= obj
.static_properties
[pname
]
725 assert isinstance(obj
, (dict, _ScopeDict
)),\
726 'Accessing member %r on %r' % (pname
, obj
)
727 res
= obj
.get(pname
, undefined
)
729 else: # Assume attribute access
731 assert isinstance(idx
, int)
733 assert isinstance(obj
, list)
734 stack
.append(obj
[idx
])
735 elif opcode
== 104: # initproperty
738 idx
= self
.multinames
[index
]
739 if isinstance(idx
, _Multiname
):
743 elif opcode
== 115: # convert_
745 intvalue
= int(value
)
746 stack
.append(intvalue
)
747 elif opcode
== 128: # coerce
749 elif opcode
== 130: # coerce_a
751 # um, yes, it's any value
753 elif opcode
== 133: # coerce_s
754 assert isinstance(stack
[-1], (type(None), compat_str
))
755 elif opcode
== 147: # decrement
757 assert isinstance(value
, int)
758 stack
.append(value
- 1)
759 elif opcode
== 149: # typeof
762 _Undefined
: 'undefined',
763 compat_str
: 'String',
767 elif opcode
== 160: # add
770 res
= value1
+ value2
772 elif opcode
== 161: # subtract
775 res
= value1
- value2
777 elif opcode
== 162: # multiply
780 res
= value1
* value2
782 elif opcode
== 164: # modulo
785 res
= value1
% value2
787 elif opcode
== 168: # bitand
790 assert isinstance(value1
, int)
791 assert isinstance(value2
, int)
792 res
= value1
& value2
794 elif opcode
== 171: # equals
797 result
= value1
== value2
799 elif opcode
== 175: # greaterequals
802 result
= value1
>= value2
804 elif opcode
== 192: # increment_i
806 assert isinstance(value
, int)
807 stack
.append(value
+ 1)
808 elif opcode
== 208: # getlocal_0
809 stack
.append(registers
[0])
810 elif opcode
== 209: # getlocal_1
811 stack
.append(registers
[1])
812 elif opcode
== 210: # getlocal_2
813 stack
.append(registers
[2])
814 elif opcode
== 211: # getlocal_3
815 stack
.append(registers
[3])
816 elif opcode
== 212: # setlocal_0
817 registers
[0] = stack
.pop()
818 elif opcode
== 213: # setlocal_1
819 registers
[1] = stack
.pop()
820 elif opcode
== 214: # setlocal_2
821 registers
[2] = stack
.pop()
822 elif opcode
== 215: # setlocal_3
823 registers
[3] = stack
.pop()
825 raise NotImplementedError(
826 'Unsupported opcode %d' % opcode
)
828 avm_class
.method_pyfunctions
[func_name
] = resfunc