]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/swfinterp.py
   1 from __future__ 
import unicode_literals
 
  16 def _extract_tags(file_contents
): 
  17     if file_contents
[1:3] != b
'WS': 
  19             'Not an SWF file; header is %r' % file_contents
[:3]) 
  20     if file_contents
[:1] == b
'C': 
  21         content 
= zlib
.decompress(file_contents
[8:]) 
  23         raise NotImplementedError( 
  24             'Unsupported compression format %r' % 
  27     # Determine number of bits in framesize rectangle 
  28     framesize_nbits 
= compat_struct_unpack('!B', content
[:1])[0] >> 3 
  29     framesize_len 
= (5 + 4 * framesize_nbits 
+ 7) // 8 
  31     pos 
= framesize_len 
+ 2 + 2 
  32     while pos 
< len(content
): 
  33         header16 
= compat_struct_unpack('<H', content
[pos
:pos 
+ 2])[0] 
  35         tag_code 
= header16 
>> 6 
  36         tag_len 
= header16 
& 0x3f 
  38             tag_len 
= compat_struct_unpack('<I', content
[pos
:pos 
+ 4])[0] 
  40         assert pos 
+ tag_len 
<= len(content
), \
 
  41             ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' 
  42                 % (tag_code
, pos
, tag_len
, len(content
))) 
  43         yield (tag_code
, content
[pos
:pos 
+ tag_len
]) 
  47 class _AVMClass_Object(object): 
  48     def __init__(self
, avm_class
): 
  49         self
.avm_class 
= avm_class
 
  52         return '%s#%x' % (self
.avm_class
.name
, id(self
)) 
  55 class _ScopeDict(dict): 
  56     def __init__(self
, avm_class
): 
  57         super(_ScopeDict
, self
).__init
__() 
  58         self
.avm_class 
= avm_class
 
  61         return '%s__Scope(%s)' % ( 
  63             super(_ScopeDict
, self
).__repr
__()) 
  66 class _AVMClass(object): 
  67     def __init__(self
, name_idx
, name
, static_properties
=None): 
  68         self
.name_idx 
= name_idx
 
  70         self
.method_names 
= {} 
  73         self
.method_pyfunctions 
= {} 
  74         self
.static_properties 
= static_properties 
if static_properties 
else {} 
  76         self
.variables 
= _ScopeDict(self
) 
  79     def make_object(self
): 
  80         return _AVMClass_Object(self
) 
  83         return '_AVMClass(%s)' % (self
.name
) 
  85     def register_methods(self
, methods
): 
  86         self
.method_names
.update(methods
.items()) 
  87         self
.method_idxs
.update(dict( 
  89             for name
, idx 
in methods
.items())) 
  92 class _Multiname(object): 
  93     def __init__(self
, kind
): 
  97         return '[MULTINAME kind: 0x%x]' % self
.kind
 
 100 def _read_int(reader
): 
 106         b 
= compat_struct_unpack('<B', buf
)[0] 
 107         res 
= res | 
((b 
& 0x7f) << shift
) 
 115     res 
= _read_int(reader
) 
 116     assert res 
& 0xf0000000 == 0 
 124     v 
= _read_int(reader
) 
 125     if v 
& 0x80000000 != 0: 
 126         v 
= - ((v ^ 
0xffffffff) + 1) 
 133     last_byte 
= b
'\xff' if (ord(bs
[2:3]) >= 0x80) else b
'\x00' 
 134     return compat_struct_unpack('<i', bs 
+ last_byte
)[0] 
 137 def _read_string(reader
): 
 139     resb 
= reader
.read(slen
) 
 140     assert len(resb
) == slen
 
 141     return resb
.decode('utf-8') 
 144 def _read_bytes(count
, reader
): 
 146     resb 
= reader
.read(count
) 
 147     assert len(resb
) == count
 
 151 def _read_byte(reader
): 
 152     resb 
= _read_bytes(1, reader
=reader
) 
 153     res 
= compat_struct_unpack('<B', resb
)[0] 
 157 StringClass 
= _AVMClass('(no name idx)', 'String') 
 158 ByteArrayClass 
= _AVMClass('(no name idx)', 'ByteArray') 
 159 TimerClass 
= _AVMClass('(no name idx)', 'Timer') 
 160 TimerEventClass 
= _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'}) 
 162     StringClass
.name
: StringClass
, 
 163     ByteArrayClass
.name
: ByteArrayClass
, 
 164     TimerClass
.name
: TimerClass
, 
 165     TimerEventClass
.name
: TimerEventClass
, 
 169 class _Undefined(object): 
 172     __nonzero__ 
= __bool__
 
 182 undefined 
= _Undefined() 
 185 class SWFInterpreter(object): 
 186     def __init__(self
, file_contents
): 
 187         self
._patched
_functions 
= { 
 188             (TimerClass
, 'addEventListener'): lambda params
: undefined
, 
 191                         for tag_code
, tag 
in _extract_tags(file_contents
) 
 193         p 
= code_tag
.index(b
'\0', 4) + 1 
 194         code_reader 
= io
.BytesIO(code_tag
[p
:]) 
 196         # Parse ABC (AVM2 ByteCode) 
 198         # Define a couple convenience methods 
 199         u30 
= lambda *args
: _u30(*args
, reader
=code_reader
) 
 200         s32 
= lambda *args
: _s32(*args
, reader
=code_reader
) 
 201         u32 
= lambda *args
: _u32(*args
, reader
=code_reader
) 
 202         read_bytes 
= lambda *args
: _read_bytes(*args
, reader
=code_reader
) 
 203         read_byte 
= lambda *args
: _read_byte(*args
, reader
=code_reader
) 
 205         # minor_version + major_version 
 210         self
.constant_ints 
= [0] 
 211         for _c 
in range(1, int_count
): 
 212             self
.constant_ints
.append(s32()) 
 213         self
.constant_uints 
= [0] 
 215         for _c 
in range(1, uint_count
): 
 216             self
.constant_uints
.append(u32()) 
 218         read_bytes(max(0, (double_count 
- 1)) * 8) 
 220         self
.constant_strings 
= [''] 
 221         for _c 
in range(1, string_count
): 
 222             s 
= _read_string(code_reader
) 
 223             self
.constant_strings
.append(s
) 
 224         namespace_count 
= u30() 
 225         for _c 
in range(1, namespace_count
): 
 229         for _c 
in range(1, ns_set_count
): 
 231             for _c2 
in range(count
): 
 233         multiname_count 
= u30() 
 242             0x0e: 2,  # MultinameA 
 243             0x1b: 1,  # MultinameL 
 244             0x1c: 1,  # MultinameLA 
 246         self
.multinames 
= [''] 
 247         for _c 
in range(1, multiname_count
): 
 249             assert kind 
in MULTINAME_SIZES
, 'Invalid multiname kind %r' % kind
 
 251                 u30()  # namespace_idx 
 253                 self
.multinames
.append(self
.constant_strings
[name_idx
]) 
 257                 self
.multinames
.append(self
.constant_strings
[name_idx
]) 
 259                 self
.multinames
.append(_Multiname(kind
)) 
 260                 for _c2 
in range(MULTINAME_SIZES
[kind
]): 
 265         MethodInfo 
= collections
.namedtuple( 
 267             ['NEED_ARGUMENTS', 'NEED_REST']) 
 269         for method_id 
in range(method_count
): 
 272             for _ 
in range(param_count
): 
 274             u30()  # name index (always 0 for youtube) 
 276             if flags 
& 0x08 != 0: 
 279                 for c 
in range(option_count
): 
 282             if flags 
& 0x80 != 0: 
 283                 # Param names present 
 284                 for _ 
in range(param_count
): 
 286             mi 
= MethodInfo(flags 
& 0x01 != 0, flags 
& 0x04 != 0) 
 287             method_infos
.append(mi
) 
 290         metadata_count 
= u30() 
 291         for _c 
in range(metadata_count
): 
 294             for _c2 
in range(item_count
): 
 298         def parse_traits_info(): 
 299             trait_name_idx 
= u30() 
 300             kind_full 
= read_byte() 
 301             kind 
= kind_full 
& 0x0f 
 302             attrs 
= kind_full 
>> 4 
 305             if kind 
== 0x00:  # Slot 
 307                 u30()  # type_name_idx 
 311             elif kind 
== 0x06:  # Const 
 313                 u30()  # type_name_idx 
 318                 if vkind 
== 0x03:  # Constant_Int 
 319                     value 
= self
.constant_ints
[vindex
] 
 320                 elif vkind 
== 0x04:  # Constant_UInt 
 321                     value 
= self
.constant_uints
[vindex
] 
 323                     return {}, None  # Ignore silently for now 
 324                 constants 
= {self
.multinames
[trait_name_idx
]: value
} 
 325             elif kind 
in (0x01, 0x02, 0x03):  # Method / Getter / Setter 
 328                 methods
[self
.multinames
[trait_name_idx
]] = method_idx
 
 329             elif kind 
== 0x04:  # Class 
 332             elif kind 
== 0x05:  # Function 
 335                 methods
[function_idx
] = self
.multinames
[trait_name_idx
] 
 337                 raise ExtractorError('Unsupported trait kind %d' % kind
) 
 339             if attrs 
& 0x4 != 0:  # Metadata present 
 340                 metadata_count 
= u30() 
 341                 for _c3 
in range(metadata_count
): 
 342                     u30()  # metadata index 
 344             return methods
, constants
 
 349         for class_id 
in range(class_count
): 
 352             cname 
= self
.multinames
[name_idx
] 
 353             avm_class 
= _AVMClass(name_idx
, cname
) 
 354             classes
.append(avm_class
) 
 356             u30()  # super_name idx 
 358             if flags 
& 0x08 != 0:  # Protected namespace is present 
 359                 u30()  # protected_ns_idx 
 361             for _c2 
in range(intrf_count
): 
 365             for _c2 
in range(trait_count
): 
 366                 trait_methods
, trait_constants 
= parse_traits_info() 
 367                 avm_class
.register_methods(trait_methods
) 
 369                     avm_class
.constants
.update(trait_constants
) 
 371         assert len(classes
) == class_count
 
 372         self
._classes
_by
_name 
= dict((c
.name
, c
) for c 
in classes
) 
 374         for avm_class 
in classes
: 
 375             avm_class
.cinit_idx 
= u30() 
 377             for _c2 
in range(trait_count
): 
 378                 trait_methods
, trait_constants 
= parse_traits_info() 
 379                 avm_class
.register_methods(trait_methods
) 
 381                     avm_class
.constants
.update(trait_constants
) 
 385         for _c 
in range(script_count
): 
 388             for _c2 
in range(trait_count
): 
 392         method_body_count 
= u30() 
 393         Method 
= collections
.namedtuple('Method', ['code', 'local_count']) 
 394         self
._all
_methods 
= [] 
 395         for _c 
in range(method_body_count
): 
 399             u30()  # init_scope_depth 
 400             u30()  # max_scope_depth 
 402             code 
= read_bytes(code_length
) 
 403             m 
= Method(code
, local_count
) 
 404             self
._all
_methods
.append(m
) 
 405             for avm_class 
in classes
: 
 406                 if method_idx 
in avm_class
.method_idxs
: 
 407                     avm_class
.methods
[avm_class
.method_idxs
[method_idx
]] = m
 
 408             exception_count 
= u30() 
 409             for _c2 
in range(exception_count
): 
 416             for _c2 
in range(trait_count
): 
 419         assert p 
+ code_reader
.tell() == len(code_tag
) 
 421     def patch_function(self
, avm_class
, func_name
, f
): 
 422         self
._patched
_functions
[(avm_class
, func_name
)] = f
 
 424     def extract_class(self
, class_name
, call_cinit
=True): 
 426             res 
= self
._classes
_by
_name
[class_name
] 
 428             raise ExtractorError('Class %r not found' % class_name
) 
 430         if call_cinit 
and hasattr(res
, 'cinit_idx'): 
 431             res
.register_methods({'$cinit': res
.cinit_idx
}) 
 432             res
.methods
['$cinit'] = self
._all
_methods
[res
.cinit_idx
] 
 433             cinit 
= self
.extract_function(res
, '$cinit') 
 438     def extract_function(self
, avm_class
, func_name
): 
 439         p 
= self
._patched
_functions
.get((avm_class
, func_name
)) 
 442         if func_name 
in avm_class
.method_pyfunctions
: 
 443             return avm_class
.method_pyfunctions
[func_name
] 
 444         if func_name 
in self
._classes
_by
_name
: 
 445             return self
._classes
_by
_name
[func_name
].make_object() 
 446         if func_name 
not in avm_class
.methods
: 
 447             raise ExtractorError('Cannot find function %s.%s' % ( 
 448                 avm_class
.name
, func_name
)) 
 449         m 
= avm_class
.methods
[func_name
] 
 453             coder 
= io
.BytesIO(m
.code
) 
 454             s24 
= lambda: _s24(coder
) 
 455             u30 
= lambda: _u30(coder
) 
 457             registers 
= [avm_class
.variables
] + list(args
) + [None] * m
.local_count
 
 459             scopes 
= collections
.deque([ 
 460                 self
._classes
_by
_name
, avm_class
.constants
, avm_class
.variables
]) 
 462                 opcode 
= _read_byte(coder
) 
 463                 if opcode 
== 9:  # label 
 464                     pass  # Spec says: "Do nothing." 
 465                 elif opcode 
== 16:  # jump 
 467                     coder
.seek(coder
.tell() + offset
) 
 468                 elif opcode 
== 17:  # iftrue 
 472                         coder
.seek(coder
.tell() + offset
) 
 473                 elif opcode 
== 18:  # iffalse 
 477                         coder
.seek(coder
.tell() + offset
) 
 478                 elif opcode 
== 19:  # ifeq 
 483                         coder
.seek(coder
.tell() + offset
) 
 484                 elif opcode 
== 20:  # ifne 
 489                         coder
.seek(coder
.tell() + offset
) 
 490                 elif opcode 
== 21:  # iflt 
 495                         coder
.seek(coder
.tell() + offset
) 
 496                 elif opcode 
== 32:  # pushnull 
 498                 elif opcode 
== 33:  # pushundefined 
 499                     stack
.append(undefined
) 
 500                 elif opcode 
== 36:  # pushbyte 
 501                     v 
= _read_byte(coder
) 
 503                 elif opcode 
== 37:  # pushshort 
 506                 elif opcode 
== 38:  # pushtrue 
 508                 elif opcode 
== 39:  # pushfalse 
 510                 elif opcode 
== 40:  # pushnan 
 511                     stack
.append(float('NaN')) 
 512                 elif opcode 
== 42:  # dup 
 515                 elif opcode 
== 44:  # pushstring 
 517                     stack
.append(self
.constant_strings
[idx
]) 
 518                 elif opcode 
== 48:  # pushscope 
 519                     new_scope 
= stack
.pop() 
 520                     scopes
.append(new_scope
) 
 521                 elif opcode 
== 66:  # construct 
 523                     args 
= list(reversed( 
 524                         [stack
.pop() for _ 
in range(arg_count
)])) 
 526                     res 
= obj
.avm_class
.make_object() 
 528                 elif opcode 
== 70:  # callproperty 
 530                     mname 
= self
.multinames
[index
] 
 532                     args 
= list(reversed( 
 533                         [stack
.pop() for _ 
in range(arg_count
)])) 
 536                     if obj 
== StringClass
: 
 537                         if mname 
== 'String': 
 538                             assert len(args
) == 1 
 539                             assert isinstance(args
[0], ( 
 540                                 int, compat_str
, _Undefined
)) 
 541                             if args
[0] == undefined
: 
 544                                 res 
= compat_str(args
[0]) 
 548                             raise NotImplementedError( 
 549                                 'Function String.%s is not yet implemented' 
 551                     elif isinstance(obj
, _AVMClass_Object
): 
 552                         func 
= self
.extract_function(obj
.avm_class
, mname
) 
 556                     elif isinstance(obj
, _AVMClass
): 
 557                         func 
= self
.extract_function(obj
, mname
) 
 561                     elif isinstance(obj
, _ScopeDict
): 
 562                         if mname 
in obj
.avm_class
.method_names
: 
 563                             func 
= self
.extract_function(obj
.avm_class
, mname
) 
 569                     elif isinstance(obj
, compat_str
): 
 571                             assert len(args
) == 1 
 572                             assert isinstance(args
[0], compat_str
) 
 576                                 res 
= obj
.split(args
[0]) 
 579                         elif mname 
== 'charCodeAt': 
 580                             assert len(args
) <= 1 
 581                             idx 
= 0 if len(args
) == 0 else args
[0] 
 582                             assert isinstance(idx
, int) 
 586                     elif isinstance(obj
, list): 
 588                             assert len(args
) == 1 
 589                             assert isinstance(args
[0], int) 
 593                         elif mname 
== 'join': 
 594                             assert len(args
) == 1 
 595                             assert isinstance(args
[0], compat_str
) 
 596                             res 
= args
[0].join(obj
) 
 599                     raise NotImplementedError( 
 600                         'Unsupported property %r on %r' 
 602                 elif opcode 
== 71:  # returnvoid 
 605                 elif opcode 
== 72:  # returnvalue 
 608                 elif opcode 
== 73:  # constructsuper 
 609                     # Not yet implemented, just hope it works without it 
 611                     args 
= list(reversed( 
 612                         [stack
.pop() for _ 
in range(arg_count
)])) 
 614                 elif opcode 
== 74:  # constructproperty 
 617                     args 
= list(reversed( 
 618                         [stack
.pop() for _ 
in range(arg_count
)])) 
 621                     mname 
= self
.multinames
[index
] 
 622                     assert isinstance(obj
, _AVMClass
) 
 624                     # We do not actually call the constructor for now; 
 625                     # we just pretend it does nothing 
 626                     stack
.append(obj
.make_object()) 
 627                 elif opcode 
== 79:  # callpropvoid 
 629                     mname 
= self
.multinames
[index
] 
 631                     args 
= list(reversed( 
 632                         [stack
.pop() for _ 
in range(arg_count
)])) 
 634                     if isinstance(obj
, _AVMClass_Object
): 
 635                         func 
= self
.extract_function(obj
.avm_class
, mname
) 
 637                         assert res 
is undefined
 
 639                     if isinstance(obj
, _ScopeDict
): 
 640                         assert mname 
in obj
.avm_class
.method_names
 
 641                         func 
= self
.extract_function(obj
.avm_class
, mname
) 
 643                         assert res 
is undefined
 
 645                     if mname 
== 'reverse': 
 646                         assert isinstance(obj
, list) 
 649                         raise NotImplementedError( 
 650                             'Unsupported (void) property %r on %r' 
 652                 elif opcode 
== 86:  # newarray 
 655                     for i 
in range(arg_count
): 
 656                         arr
.append(stack
.pop()) 
 659                 elif opcode 
== 93:  # findpropstrict 
 661                     mname 
= self
.multinames
[index
] 
 662                     for s 
in reversed(scopes
): 
 668                     if mname 
not in res 
and mname 
in _builtin_classes
: 
 669                         stack
.append(_builtin_classes
[mname
]) 
 671                         stack
.append(res
[mname
]) 
 672                 elif opcode 
== 94:  # findproperty 
 674                     mname 
= self
.multinames
[index
] 
 675                     for s 
in reversed(scopes
): 
 680                         res 
= avm_class
.variables
 
 682                 elif opcode 
== 96:  # getlex 
 684                     mname 
= self
.multinames
[index
] 
 685                     for s 
in reversed(scopes
): 
 690                         scope 
= avm_class
.variables
 
 694                     elif mname 
in _builtin_classes
: 
 695                         res 
= _builtin_classes
[mname
] 
 697                         # Assume uninitialized 
 701                 elif opcode 
== 97:  # setproperty 
 704                     idx 
= self
.multinames
[index
] 
 705                     if isinstance(idx
, _Multiname
): 
 709                 elif opcode 
== 98:  # getlocal 
 711                     stack
.append(registers
[index
]) 
 712                 elif opcode 
== 99:  # setlocal 
 715                     registers
[index
] = value
 
 716                 elif opcode 
== 102:  # getproperty 
 718                     pname 
= self
.multinames
[index
] 
 719                     if pname 
== 'length': 
 721                         assert isinstance(obj
, (compat_str
, list)) 
 722                         stack
.append(len(obj
)) 
 723                     elif isinstance(pname
, compat_str
):  # Member access 
 725                         if isinstance(obj
, _AVMClass
): 
 726                             res 
= obj
.static_properties
[pname
] 
 730                         assert isinstance(obj
, (dict, _ScopeDict
)),\
 
 731                             'Accessing member %r on %r' % (pname
, obj
) 
 732                         res 
= obj
.get(pname
, undefined
) 
 734                     else:  # Assume attribute access 
 736                         assert isinstance(idx
, int) 
 738                         assert isinstance(obj
, list) 
 739                         stack
.append(obj
[idx
]) 
 740                 elif opcode 
== 104:  # initproperty 
 743                     idx 
= self
.multinames
[index
] 
 744                     if isinstance(idx
, _Multiname
): 
 748                 elif opcode 
== 115:  # convert_ 
 750                     intvalue 
= int(value
) 
 751                     stack
.append(intvalue
) 
 752                 elif opcode 
== 128:  # coerce 
 754                 elif opcode 
== 130:  # coerce_a 
 756                     # um, yes, it's any value 
 758                 elif opcode 
== 133:  # coerce_s 
 759                     assert isinstance(stack
[-1], (type(None), compat_str
)) 
 760                 elif opcode 
== 147:  # decrement 
 762                     assert isinstance(value
, int) 
 763                     stack
.append(value 
- 1) 
 764                 elif opcode 
== 149:  # typeof 
 767                         _Undefined
: 'undefined', 
 768                         compat_str
: 'String', 
 772                 elif opcode 
== 160:  # add 
 775                     res 
= value1 
+ value2
 
 777                 elif opcode 
== 161:  # subtract 
 780                     res 
= value1 
- value2
 
 782                 elif opcode 
== 162:  # multiply 
 785                     res 
= value1 
* value2
 
 787                 elif opcode 
== 164:  # modulo 
 790                     res 
= value1 
% value2
 
 792                 elif opcode 
== 168:  # bitand 
 795                     assert isinstance(value1
, int) 
 796                     assert isinstance(value2
, int) 
 797                     res 
= value1 
& value2
 
 799                 elif opcode 
== 171:  # equals 
 802                     result 
= value1 
== value2
 
 804                 elif opcode 
== 175:  # greaterequals 
 807                     result 
= value1 
>= value2
 
 809                 elif opcode 
== 192:  # increment_i 
 811                     assert isinstance(value
, int) 
 812                     stack
.append(value 
+ 1) 
 813                 elif opcode 
== 208:  # getlocal_0 
 814                     stack
.append(registers
[0]) 
 815                 elif opcode 
== 209:  # getlocal_1 
 816                     stack
.append(registers
[1]) 
 817                 elif opcode 
== 210:  # getlocal_2 
 818                     stack
.append(registers
[2]) 
 819                 elif opcode 
== 211:  # getlocal_3 
 820                     stack
.append(registers
[3]) 
 821                 elif opcode 
== 212:  # setlocal_0 
 822                     registers
[0] = stack
.pop() 
 823                 elif opcode 
== 213:  # setlocal_1 
 824                     registers
[1] = stack
.pop() 
 825                 elif opcode 
== 214:  # setlocal_2 
 826                     registers
[2] = stack
.pop() 
 827                 elif opcode 
== 215:  # setlocal_3 
 828                     registers
[3] = stack
.pop() 
 830                     raise NotImplementedError( 
 831                         'Unsupported opcode %d' % opcode
) 
 833         avm_class
.method_pyfunctions
[func_name
] = resfunc